583 files changed, 18955 insertions, 7337 deletions
diff --git a/clang/cmake/caches/Fuchsia-stage2-instrumented.cmake b/clang/cmake/caches/Fuchsia-stage2-instrumented.cmake
index ecd478a..b328f3d 100644
--- a/clang/cmake/caches/Fuchsia-stage2-instrumented.cmake
+++ b/clang/cmake/caches/Fuchsia-stage2-instrumented.cmake
@@ -43,3 +43,6 @@ set(CLANG_BOOTSTRAP_CMAKE_ARGS
   ${EXTRA_ARGS}
   -C ${CMAKE_CURRENT_LIST_DIR}/Fuchsia-stage2.cmake
   CACHE STRING "")
+
+# Do not use LLVM build for generating PGO data.
+set(CLANG_PGO_TRAINING_USE_LLVM_BUILD OFF CACHE BOOL "")
diff --git a/clang/cmake/caches/Fuchsia-stage2.cmake b/clang/cmake/caches/Fuchsia-stage2.cmake
index 674eb2a..3d4d71a 100644
--- a/clang/cmake/caches/Fuchsia-stage2.cmake
+++ b/clang/cmake/caches/Fuchsia-stage2.cmake
@@ -340,7 +340,7 @@ foreach(target armv6m-none-eabi;armv7m-none-eabi;armv7em-none-eabi;armv8m.main-n
   foreach(lang C;CXX;ASM)
     # TODO: The preprocessor defines workaround various issues in libc and libc++ integration.
     # These should be addressed and removed over time.
-    set(RUNTIMES_${target}_CMAKE_${lang}_local_flags "--target=${target} -Wno-atomic-alignment \"-Dvfprintf(stream, format, vlist)=vprintf(format, vlist)\" \"-Dfprintf(stream, format, ...)=printf(format)\" -D_LIBCPP_PRINT=1")
+    set(RUNTIMES_${target}_CMAKE_${lang}_local_flags "--target=${target} -Wno-atomic-alignment \"-Dvfprintf(stream, format, vlist)=vprintf(format, vlist)\" \"-Dfprintf(stream, format, ...)=printf(format)\" \"-Dfputs(string, stream)=puts(string)\" -D_LIBCPP_PRINT=1")
     if(NOT ${target} STREQUAL "aarch64-none-elf")
       set(RUNTIMES_${target}_CMAKE_${lang}_local_flags "${RUNTIMES_${target}_CMAKE_${lang}_local_flags} -mthumb")
     endif()
@@ -405,7 +405,7 @@ foreach(target riscv32-unknown-elf)
   foreach(lang C;CXX;ASM)
     # TODO: The preprocessor defines workaround various issues in libc and libc++ integration.
     # These should be addressed and removed over time.
-    set(RUNTIMES_${target}_CMAKE_${lang}_FLAGS "--target=${target} -march=rv32imafc -mabi=ilp32f -Wno-atomic-alignment \"-Dvfprintf(stream, format, vlist)=vprintf(format, vlist)\" \"-Dfprintf(stream, format, ...)=printf(format)\" -D_LIBCPP_PRINT=1" CACHE STRING "")
+    set(RUNTIMES_${target}_CMAKE_${lang}_FLAGS "--target=${target} -march=rv32imafc -mabi=ilp32f -Wno-atomic-alignment \"-Dvfprintf(stream, format, vlist)=vprintf(format, vlist)\" \"-Dfprintf(stream, format, ...)=printf(format)\" \"-Dfputs(string, stream)=puts(string)\" -D_LIBCPP_PRINT=1" CACHE STRING "")
   endforeach()
   foreach(type SHARED;MODULE;EXE)
     set(RUNTIMES_${target}_CMAKE_${type}_LINKER_FLAGS "-fuse-ld=lld" CACHE STRING "")
diff --git a/clang/cmake/caches/Fuchsia.cmake b/clang/cmake/caches/Fuchsia.cmake
index ee1d681..a3f86f6 100644
--- a/clang/cmake/caches/Fuchsia.cmake
+++ b/clang/cmake/caches/Fuchsia.cmake
@@ -76,6 +76,9 @@ foreach(variable ${_FUCHSIA_BOOTSTRAP_PASSTHROUGH})
     get_property(value CACHE ${variable} PROPERTY VALUE)
     get_property(type CACHE ${variable} PROPERTY TYPE)
     set(BOOTSTRAP_${variable} "${value}" CACHE ${type} "")
+    if(FUCHSIA_ENABLE_PGO)
+      set(BOOTSTRAP_BOOTSTRAP_${variable} "${value}" CACHE ${type} "")
+    endif()
   endif()
 endforeach()
 
diff --git a/clang/cmake/caches/Release.cmake b/clang/cmake/caches/Release.cmake
index fb12dfc..a523cc5 100644
--- a/clang/cmake/caches/Release.cmake
+++ b/clang/cmake/caches/Release.cmake
@@ -102,7 +102,7 @@ if (LLVM_RELEASE_ENABLE_LTO)
   # FIXME: We can't use LLVM_ENABLE_LTO=Thin here, because it causes the CMake
   # step for the libcxx build to fail.  CMAKE_INTERPROCEDURAL_OPTIMIZATION does
   # enable ThinLTO, though.
-  set(RUNTIMES_CMAKE_ARGS "-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=ON -DLLVM_ENABLE_LLD=ON" CACHE STRING "")
+  set(RUNTIMES_CMAKE_ARGS "-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=ON -DLLVM_ENABLE_LLD=ON -DLLVM_ENABLE_FATLTO=ON" CACHE STRING "")
 endif()
 
 # Stage 1 Common Config
@@ -144,3 +144,7 @@ set_final_stage_var(CPACK_GENERATOR "TXZ" STRING)
 set_final_stage_var(CPACK_ARCHIVE_THREADS "0" STRING)
 
 set_final_stage_var(LLVM_USE_STATIC_ZSTD "ON" BOOL)
+if (LLVM_RELEASE_ENABLE_LTO)
+  set_final_stage_var(LLVM_ENABLE_FATLTO "ON" BOOL)
+  set_final_stage_var(CPACK_PRE_BUILD_SCRIPTS "${CMAKE_CURRENT_LIST_DIR}/release_cpack_pre_build_strip_lto.cmake" STRING)
+endif()
diff --git a/clang/cmake/caches/release_cpack_pre_build_strip_lto.cmake b/clang/cmake/caches/release_cpack_pre_build_strip_lto.cmake
new file mode 100644
index 0000000..743b64f
--- /dev/null
+++ b/clang/cmake/caches/release_cpack_pre_build_strip_lto.cmake
@@ -0,0 +1,5 @@
+file(GLOB files ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/lib/*.a)
+
+foreach(file ${files})
+  execute_process(COMMAND ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/bin/llvm-strip --no-strip-all -R .llvm.lto ${file})
+endforeach()
diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst
index bfc8094..02986a9 100644
--- a/clang/docs/ClangFormatStyleOptions.rst
+++ b/clang/docs/ClangFormatStyleOptions.rst
@@ -1912,7 +1912,7 @@ the configuration (without a prefix: ``Auto``).
 
   * ``SFS_InlineOnly`` (in configuration: ``InlineOnly``)
     Only merge functions defined inside a class. Same as ``inline``,
-    except it does not implies ``empty``: i.e. top level empty functions
+    except it does not imply ``empty``: i.e. top level empty functions
     are not merged either.
 
     .. code-block:: c++
@@ -4201,8 +4201,8 @@ the configuration (without a prefix: ``Auto``).
   * ``""`` means "arbitrary suffix"
   * ``"$"`` means "no suffix"
 
-  For example, if configured to ``"(_test)?$"``, then a header a.h would be seen
-  as the "main" include in both a.cc and a_test.cc.
+  For example, if configured to ``"(_test)?$"``, then a header a.h would be
+  seen as the "main" include in both a.cc and a_test.cc.
 
 .. _IncludeIsMainSourceRegex:
 
@@ -6400,6 +6400,14 @@ the configuration (without a prefix: ``Auto``).
        IF (...)                        vs.    IF(...)
          <conditional-body>                     <conditional-body>
 
+  * ``bool AfterNot`` If ``true``, put a space between alternative operator ``not`` and the
+    opening parenthesis.
+
+    .. code-block:: c++
+
+       true:                                  false:
+       return not (a || b);            vs.    return not(a || b);
+
   * ``bool AfterOverloadedOperator`` If ``true``, put a space between operator overloading and opening
     parentheses.
 
diff --git a/clang/docs/ClangTools.rst b/clang/docs/ClangTools.rst
index 60e2159..3216328 100644
--- a/clang/docs/ClangTools.rst
+++ b/clang/docs/ClangTools.rst
@@ -89,13 +89,50 @@ they'll be tracked here. The focus of this documentation is on the scope
 and features of the tools for other tool developers; each tool should
 provide its own user-focused documentation.
 
-``clang-tidy``
+``Clang-Doc``
+-------------
+
+`Clang-Doc <https://clang.llvm.org/extra/clang-doc.html>`_ is a tool for
+generating C and C++ documentation from source code and comments.
+
+``Clang-Include-Fixer``
+-----------------------
+
+`Clang-Include-Fixer <https://clang.llvm.org/extra/clang-include-fixer.html>`_
+is a tool to automate the addition of missing ``#include`` directives in a C++
+file. It adds missing namespace qualifiers to unidentified symbols when
+necessary and also removes unused headers.
+
+``Clang-Tidy``
 --------------
 
-`clang-tidy <https://clang.llvm.org/extra/clang-tidy/>`_ is a clang-based C++
+`Clang-Tidy <https://clang.llvm.org/extra/clang-tidy/>`_ is a Clang-based C++
 linter tool. It provides an extensible framework for building compiler-based
 static analyses detecting and fixing bug-prone patterns, performance,
-portability and maintainability issues.
+portability and maintainability issues. It also has checks for modernizing code
+to newer language standards.
+
+``Clangd``
+----------
+
+`Clangd <https://clangd.llvm.org/>`_ is a language server that can work with
+many editors via a plugin. It understands your C++ code and adds smart
+features to your editor: code completion, compile errors, go-to-definition and
+more.
+
+``Modularize``
+--------------
+
+`Modularize <https://clang.llvm.org/extra/modularize.html>`_ is a standalone
+tool that checks whether a set of headers provides the consistent definitions
+required to use modules.
+
+``pp-trace``
+------------
+
+`pp-trace <https://clang.llvm.org/extra/pp-trace.html>`_ is a standalone tool
+that traces preprocessor activity. It’s also used as a test of Clang’s
+``PPCallbacks`` interface.
 
 
 Ideas for new Tools
diff --git a/clang/docs/CommandGuide/clang.rst b/clang/docs/CommandGuide/clang.rst
index 7d49f2c..4c1f8e4 100644
--- a/clang/docs/CommandGuide/clang.rst
+++ b/clang/docs/CommandGuide/clang.rst
@@ -645,7 +645,7 @@ Driver Options
 
   Save internal code generation (LLVM) statistics to a file in the current
   directory (:option:`-save-stats`/"-save-stats=cwd") or the directory
-  of the output file ("-save-state=obj").
+  of the output file ("-save-stats=obj").
 
   You can also use environment variables to control the statistics reporting.
   Setting ``CC_PRINT_INTERNAL_STAT`` to ``1`` enables the feature, the report
diff --git a/clang/docs/HIPSupport.rst b/clang/docs/HIPSupport.rst
index 406e1c8..b4a671e 100644
--- a/clang/docs/HIPSupport.rst
+++ b/clang/docs/HIPSupport.rst
@@ -545,37 +545,22 @@ The following restrictions imposed on user code apply to both modes:
 1. Pointers to function, and all associated features, such as e.g. dynamic
    polymorphism, cannot be used (directly or transitively) by the user provided
    callable passed to an algorithm invocation;
-2. Global / namespace scope / ``static`` / ``thread`` storage duration variables
-   cannot be used (directly or transitively) in name by the user provided
-   callable;
-
-   - When executing in **HMM Mode** they can be used in address e.g.:
-
-     .. code-block:: C++
-
-        namespace { int foo = 42; }
-
-        bool never(const std::vector<int>& v) {
-          return std::any_of(std::execution::par_unseq, std::cbegin(v), std::cend(v), [](auto&& x) {
-            return x == foo;
-          });
-        }
-
-        bool only_in_hmm_mode(const std::vector<int>& v) {
-          return std::any_of(std::execution::par_unseq, std::cbegin(v), std::cend(v),
-                             [p = &foo](auto&& x) { return x == *p; });
-        }
-
-3. Only algorithms that are invoked with the ``parallel_unsequenced_policy`` are
+2. ``static`` (except for program-wide unique ones) / ``thread`` storage
+   duration variables cannot be used (directly or transitively) in name by the
+   user provided callable;
+3. User code must be compiled in ``-fgpu-rdc`` mode in order for global /
+   namespace scope variables / program-wide unique ``static`` storage duration
+   variables to be usable in name by the user provided callable;
+4. Only algorithms that are invoked with the ``parallel_unsequenced_policy`` are
    candidates for offload;
-4. Only algorithms that are invoked with iterator arguments that model
+5. Only algorithms that are invoked with iterator arguments that model
    `random_access_iterator <https://en.cppreference.com/w/cpp/iterator/random_access_iterator>`_
    are candidates for offload;
-5. `Exceptions <https://en.cppreference.com/w/cpp/language/exceptions>`_ cannot
+6. `Exceptions <https://en.cppreference.com/w/cpp/language/exceptions>`_ cannot
    be used by the user provided callable;
-6. Dynamic memory allocation (e.g. ``operator new``) cannot be used by the user
+7. Dynamic memory allocation (e.g. ``operator new``) cannot be used by the user
    provided callable;
-7. Selective offload is not possible i.e. it is not possible to indicate that
+8. Selective offload is not possible i.e. it is not possible to indicate that
    only some algorithms invoked with the ``parallel_unsequenced_policy`` are to
    be executed on the accelerator.
 
@@ -585,15 +570,6 @@ additional restrictions:
 1. All code that is expected to interoperate has to be recompiled with the
    ``--hipstdpar-interpose-alloc`` flag i.e. it is not safe to compose libraries
    that have been independently compiled;
-2. automatic storage duration (i.e. stack allocated) variables cannot be used
-   (directly or transitively) by the user provided callable e.g.
-
-   .. code-block:: c++
-
-      bool never(const std::vector<int>& v, int n) {
-        return std::any_of(std::execution::par_unseq, std::cbegin(v), std::cend(v),
-                           [p = &n](auto&& x) { return x == *p; });
-      }
 
 Current Support
 ===============
@@ -626,17 +602,12 @@ Linux operating system. Support is synthesised in the following table:
 
 The minimum Linux kernel version for running in HMM mode is 6.4.
 
-The forwarding header can be obtained from
-`its GitHub repository <https://github.com/ROCm/roc-stdpar>`_.
-It will be packaged with a future `ROCm <https://rocm.docs.amd.com/en/latest/>`_
-release. Because accelerated algorithms are provided via
-`rocThrust <https://rocm.docs.amd.com/projects/rocThrust/en/latest/>`_, a
-transitive dependency on
-`rocPrim <https://rocm.docs.amd.com/projects/rocPRIM/en/latest/>`_ exists. Both
-can be obtained either by installing their associated components of the
-`ROCm <https://rocm.docs.amd.com/en/latest/>`_ stack, or from their respective
-repositories. The list algorithms that can be offloaded is available
-`here <https://github.com/ROCm/roc-stdpar#algorithm-support-status>`_.
+The forwarding header is packaged by
+`ROCm <https://rocm.docs.amd.com/en/latest/>`_, and is obtainable by installing
+the `hipstdpar` packege. The list algorithms that can be offloaded is available
+`here <https://github.com/ROCm/roc-stdpar#algorithm-support-status>`_. More
+details are available via the dedicated blog
+`<https://rocm.blogs.amd.com/software-tools-optimization/hipstdpar/README.html>`_.
 
 HIP Specific Elements
 ---------------------
@@ -690,9 +661,8 @@ HIP Specific Elements
 Open Questions / Future Developments
 ====================================
 
-1. The restriction on the use of global / namespace scope / ``static`` /
-   ``thread`` storage duration variables in offloaded algorithms will be lifted
-   in the future, when running in **HMM Mode**;
+1. The restriction on the use of ``static`` / ``thread`` storage duration
+   variables in offloaded algorithms might be lifted;
 2. The restriction on the use of dynamic memory allocation in offloaded
    algorithms will be lifted in the future.
 3. The restriction on the use of pointers to function, and associated features
diff --git a/clang/docs/InternalsManual.rst b/clang/docs/InternalsManual.rst
index 8a44db7..f33950b 100644
--- a/clang/docs/InternalsManual.rst
+++ b/clang/docs/InternalsManual.rst
@@ -312,7 +312,7 @@ Description:
   * number: A simple decimal number matches if the argument is the same as the
     number.  Example: ``"%plural{1:mouse|:mice}0"``
   * range: A range in square brackets matches if the argument is within the
-    range.  Then range is inclusive on both ends.  Example:
+    range.  The range is inclusive on both ends.  Example:
     ``"%plural{0:none|1:one|[2,5]:some|:many}0"``
   * modulo: A modulo operator is followed by a number, and equals sign and
     either a number or a range.  The tests are the same as for plain numbers
@@ -341,7 +341,7 @@ Example:
 Class:
   Integers
 Description:
-  This is a formatter which represents the argument number in a human readable
+  This is a formatter which represents the argument number in a human-readable
   format: the value ``123`` stays ``123``, ``12345`` becomes ``12.34k``,
   ``6666666` becomes ``6.67M``, and so on for 'G' and 'T'.
 
@@ -561,7 +561,7 @@ documentation for the ``-verify`` mode can be found at
 
 There are many other possible implementations of this interface, and this is
 why we prefer diagnostics to pass down rich structured information in
-arguments.  For example, an HTML output might want declaration names be
+arguments.  For example, an HTML output might want declaration names to be
 linkified to where they come from in the source.  Another example is that a GUI
 might let you click on typedefs to expand them.  This application would want to
 pass significantly more information about types through to the GUI than a
@@ -846,7 +846,7 @@ Option Marshalling Infrastructure
 The option marshalling infrastructure automates the parsing of the Clang
 ``-cc1`` frontend command line arguments into ``CompilerInvocation`` and their
 generation from ``CompilerInvocation``. The system replaces lots of repetitive
-C++ code with simple, declarative tablegen annotations and it's being used for
+C++ code with simple, declarative tablegen annotations and is being used for
 the majority of the ``-cc1`` command line interface. This section provides an
 overview of the system.
 
@@ -986,7 +986,7 @@ line.
     NegFlag<SetFalse, [], [], "Use the new pass manager in LLVM">,
     BothFlags<[], [ClangOption, CC1Option]>>;
 
-With most such pair of flags, the ``-cc1`` frontend accepts only the flag that
+With most such pairs of flags, the ``-cc1`` frontend accepts only the flag that
 changes the default key path value. The Clang driver is responsible for
 accepting both and either forwarding the changing flag or discarding the flag
 that would just set the key path to its default.
@@ -1042,8 +1042,8 @@ and the result is assigned to the key path on success.
 The key path defaults to the value specified in ``MarshallingInfoEnum`` prefixed
 by the contents of ``NormalizedValuesScope`` and ``::``. This ensures correct
 reference to an enum case is formed even if the enum resides in different
-namespace or is an enum class. If the value present on command line does not
-match any of the comma-separated values from ``Values``, an error diagnostics is
+namespace or is an enum class. If the value present on the command line does not
+match any of the comma-separated values from ``Values``, an error diagnostic is
 issued. Otherwise, the corresponding element from ``NormalizedValues`` at the
 same index is assigned to the key path (also correctly scoped). The number of
 comma-separated string values and elements of the array within
@@ -1111,7 +1111,7 @@ The Token class
 ---------------
 
 The ``Token`` class is used to represent a single lexed token.  Tokens are
-intended to be used by the lexer/preprocess and parser libraries, but are not
+intended to be used by the lexer/preprocessor and parser libraries, but are not
 intended to live beyond them (for example, they should not live in the ASTs).
 
 Tokens most often live on the stack (or some other location that is efficient
@@ -1253,7 +1253,7 @@ In order to do this, whenever the parser expects a ``tok::identifier`` or
 ``tok::coloncolon``, it should call the ``TryAnnotateTypeOrScopeToken`` or
 ``TryAnnotateCXXScopeToken`` methods to form the annotation token.  These
 methods will maximally form the specified annotation tokens and replace the
-current token with them, if applicable.  If the current tokens is not valid for
+current token with them, if applicable.  If the current token is not valid for
 an annotation token, it will remain an identifier or "``::``" token.
 
 .. _Lexer:
@@ -1276,7 +1276,7 @@ The lexer has a couple of interesting modal features:
   This mode is used for lexing within an "``#if 0``" block, for example.
 * The lexer can capture and return comments as tokens.  This is required to
   support the ``-C`` preprocessor mode, which passes comments through, and is
-  used by the diagnostic checker to identifier expect-error annotations.
+  used by the diagnostic checker to identify expect-error annotations.
 * The lexer can be in ``ParsingFilename`` mode, which happens when
   preprocessing after reading a ``#include`` directive.  This mode changes the
   parsing of "``<``" to return an "angled string" instead of a bunch of tokens
@@ -1308,7 +1308,7 @@ The ``TokenLexer`` class
 ------------------------
 
 The ``TokenLexer`` class is a token provider that returns tokens from a list of
-tokens that came from somewhere else.  It typically used for two things: 1)
+tokens that came from somewhere else.  It is typically used for two things: 1)
 returning tokens from a macro definition as it is being expanded 2) returning
 tokens from an arbitrary buffer of tokens.  The later use is used by
 ``_Pragma`` and will most likely be used to handle unbounded look-ahead for the
@@ -1509,7 +1509,7 @@ type checker must verify that the operand has a pointer type.  It would not be
 correct to check that with "``isa<PointerType>(SubExpr->getType())``", because
 this predicate would fail if the subexpression had a typedef type.
 
-The solution to this problem are a set of helper methods on ``Type``, used to
+The solution to this problem is a set of helper methods on ``Type``, used to
 check their properties.  In this case, it would be correct to use
 "``SubExpr->getType()->isPointerType()``" to do the check.  This predicate will
 return true if the *canonical type is a pointer*, which is true any time the
@@ -1632,7 +1632,7 @@ the names are inside the ``DeclarationName`` class).
 
 ``CXXLiteralOperatorName``
 
-  The name is a C++11 user defined literal operator.  User defined
+  The name is a C++11 user-defined literal operator.  User-defined
   Literal operators are named according to the suffix they define,
   e.g., "``_foo``" for "``operator "" _foo``".  Use
   ``N.getCXXLiteralIdentifier()`` to retrieve the corresponding
@@ -2215,7 +2215,7 @@ Consequently, we must either set the virtual flag for the definition (but then
 we create a malformed AST which the parser would never create), or we import
 the whole redeclaration chain of the function. The most recent version of the
 ``ASTImporter`` uses the latter mechanism. We do import all function
-declarations - regardless if they are definitions or prototypes - in the order
+declarations - regardless of whether they are definitions or prototypes - in the order
 as they appear in the "from" context.
 
 .. One definition
@@ -2338,7 +2338,7 @@ library receive an Error object, which they must check.
 During import of a specific declaration, it may happen that some AST nodes had
 already been created before we recognize an error. In this case, we signal back
 the error to the caller, but the "to" context remains polluted with those nodes
-which had been created. Ideally, those nodes should not had been created, but
+which had been created. Ideally, those nodes should not have been created, but
 that time we did not know about the error, the error happened later. Since the
 AST is immutable (most of the cases we can't remove existing nodes) we choose
 to mark these nodes as erroneous.
@@ -2579,7 +2579,7 @@ that there are global declarations which collide with declarations from other
 translation units, but they are not referenced outside from their translation
 unit. These declarations should be in an unnamed namespace ideally. If we treat
 these collisions liberally then CTU analysis can find more results. Note, the
-feature be able to choose between name conflict handling strategies is still an
+feature to be able to choose between name conflict handling strategies is still an
 ongoing work.
 
 .. _CFG:
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index f448a9a..29ef20f 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -68,6 +68,9 @@ It can be used like this:
   ``__has_builtin`` should not be used to detect support for a builtin macro;
   use ``#ifdef`` instead.
 
+  When compiling with target offloading, ``__has_builtin`` only considers the
+  currently active target.
+
 ``__has_constexpr_builtin``
 ---------------------------
 
@@ -138,7 +141,7 @@ for support for non-standardized features, i.e. features not prefixed ``c_``,
 ``cxx_`` or ``objc_``.
 
 Another use of ``__has_feature`` is to check for compiler features not related
-to the language standard, such as e.g. :doc:`AddressSanitizer
+to the language standard, such as :doc:`AddressSanitizer
 <AddressSanitizer>`.
 
 If the ``-pedantic-errors`` option is given, ``__has_extension`` is equivalent
@@ -377,8 +380,8 @@ Builtin Macros
 
 ``__FILE_NAME__``
   Clang-specific extension that functions similar to ``__FILE__`` but only
-  renders the last path component (the filename) instead of an invocation
-  dependent full path to that file.
+  renders the last path component (the filename) instead of an
+  invocation-dependent full path to that file.
 
 ``__COUNTER__``
   Defined to an integer value that starts at zero and is incremented each time
@@ -716,7 +719,7 @@ See also :ref:`langext-__builtin_shufflevector`, :ref:`langext-__builtin_convert
   a NEON vector or an SVE vector, it's only available in C++ and uses normal bool
   conversions (that is, != 0).
   If it's an extension (OpenCL) vector, it's only available in C and OpenCL C.
-  And it selects base on signedness of the condition operands (OpenCL v1.1 s6.3.9).
+  And it selects based on signedness of the condition operands (OpenCL v1.1 s6.3.9).
 .. [#] sizeof can only be used on vector length specific SVE types.
 .. [#] Clang does not allow the address of an element to be taken while GCC
    allows this. This is intentional for vectors with a boolean element type and
@@ -848,6 +851,14 @@ of different sizes and signs is forbidden in binary and ternary builtins.
                                                 semantics, see `LangRef
                                                 <http://llvm.org/docs/LangRef.html#llvm-min-intrinsics-comparation>`_
                                                 for the comparison.
+ T __builtin_elementwise_maximumnum(T x, T y)   return x or y, whichever is larger. Follows IEEE 754-2019              floating point types
+                                                semantics, see `LangRef
+                                                <http://llvm.org/docs/LangRef.html#llvm-min-intrinsics-comparation>`_
+                                                for the comparison.
+ T __builtin_elementwise_minimumnum(T x, T y)   return x or y, whichever is smaller. Follows IEEE 754-2019             floating point types
+                                                semantics, see `LangRef
+                                                <http://llvm.org/docs/LangRef.html#llvm-min-intrinsics-comparation>`_
+                                                for the comparison.
 ============================================== ====================================================================== =========================================
 
 
@@ -857,7 +868,7 @@ Each builtin returns a scalar equivalent to applying the specified
 operation(x, y) as recursive even-odd pairwise reduction to all vector
 elements. ``operation(x, y)`` is repeatedly applied to each non-overlapping
 even-odd element pair with indices ``i * 2`` and ``i * 2 + 1`` with
-``i in [0, Number of elements / 2)``. If the numbers of elements is not a
+``i in [0, Number of elements / 2)``. If the number of elements is not a
 power of 2, the vector is widened with neutral elements for the reduction
 at the end to the next power of 2.
 
@@ -1491,7 +1502,7 @@ C++14 digit separators
 
 Use ``__cpp_digit_separators`` to determine if support for digit separators
 using single quotes (for instance, ``10'000``) is enabled. At this time, there
-is no corresponding ``__has_feature`` name
+is no corresponding ``__has_feature`` name.
 
 C++14 generalized lambda capture
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -1545,7 +1556,7 @@ C++ type aware allocators
 ^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Use ``__has_extension(cxx_type_aware_allocators)`` to determine the existence of
-support for the future C++2d type aware allocator feature. For full details see
+support for the future C++2d type aware allocator feature. For full details, see
 :doc:`C++ Type Aware Allocators <CXXTypeAwareAllocators>` for additional details.
 
 C11
@@ -1643,7 +1654,7 @@ Modules
 Use ``__has_feature(modules)`` to determine if Modules have been enabled.
 For example, compiling code with ``-fmodules`` enables the use of Modules.
 
-More information could be found `here <https://clang.llvm.org/docs/Modules.html>`_.
+More information can be found `here <https://clang.llvm.org/docs/Modules.html>`_.
 
 Language Extensions Back-ported to Previous Standards
 =====================================================
@@ -1878,7 +1889,7 @@ The following type trait primitives are supported by Clang. Those traits marked
   C++26 relocatable types, and types which
   were made trivially relocatable via the ``clang::trivial_abi`` attribute.
   This trait is deprecated and should be replaced by
-  ``__builtin_is_cpp_trivially_relocatable``. Note however that it is generally
+  ``__builtin_is_cpp_trivially_relocatable``. Note, however, that it is generally
   unsafe to relocate a C++-relocatable type with ``memcpy`` or ``memmove``;
   use ``__builtin_trivially_relocate``.
 * ``__builtin_is_cpp_trivially_relocatable`` (C++): Returns true if an object
diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst
index 986aaab..58cd10a 100644
--- a/clang/docs/OpenMPSupport.rst
+++ b/clang/docs/OpenMPSupport.rst
@@ -1,497 +1,506 @@
-.. raw:: html
-
-  <style type="text/css">
-    .none { background-color: #FFCCCC }
-    .part { background-color: #FFFF99 }
-    .good { background-color: #CCFF99 }
-  </style>
-
-.. role:: none
-.. role:: part
-.. role:: good
-
-.. contents::
-   :local:
-
-==============
-OpenMP Support
-==============
-
-Clang fully supports OpenMP 4.5, almost all of 5.0 and most of 5.1/2.
-Clang supports offloading to X86_64, AArch64, PPC64[LE], NVIDIA GPUs (all models) and AMD GPUs (all models).
-
-In addition, the LLVM OpenMP runtime `libomp` supports the OpenMP Tools
-Interface (OMPT) on x86, x86_64, AArch64, and PPC64 on Linux, Windows, and macOS.
-OMPT is also supported for NVIDIA and AMD GPUs.
-
-For the list of supported features from OpenMP 5.0 and 5.1
-see `OpenMP implementation details`_ and `OpenMP 51 implementation details`_.
-
-General improvements
-====================
-- New collapse clause scheme to avoid expensive remainder operations.
-  Compute loop index variables after collapsing a loop nest via the
-  collapse clause by replacing the expensive remainder operation with
-  multiplications and additions.
-
-- When using the collapse clause on a loop nest the default behavior
-  is to automatically extend the representation of the loop counter to
-  64 bits for the cases where the sizes of the collapsed loops are not
-  known at compile time. To prevent this conservative choice and use
-  at most 32 bits, compile your program with the
-  `-fopenmp-optimistic-collapse`.
-
-
-GPU devices support
-===================
-
-Data-sharing modes
-------------------
-
-Clang supports two data-sharing models for Cuda devices: `Generic` and `Cuda`
-modes. The default mode is `Generic`. `Cuda` mode can give an additional
-performance and can be activated using the `-fopenmp-cuda-mode` flag. In
-`Generic` mode all local variables that can be shared in the parallel regions
-are stored in the global memory. In `Cuda` mode local variables are not shared
-between the threads and it is user responsibility to share the required data
-between the threads in the parallel regions. Often, the optimizer is able to
-reduce the cost of `Generic` mode to the level of `Cuda` mode, but the flag,
-as well as other assumption flags, can be used for tuning.
-
-Features not supported or with limited support for Cuda devices
----------------------------------------------------------------
-
-- Cancellation constructs are not supported.
-
-- Doacross loop nest is not supported.
-
-- User-defined reductions are supported only for trivial types.
-
-- Nested parallelism: inner parallel regions are executed sequentially.
-
-- Debug information for OpenMP target regions is supported, but sometimes it may
-  be required to manually specify the address class of the inspected variables.
-  In some cases the local variables are actually allocated in the global memory,
-  but the debug info may be not aware of it.
-
-
-.. _OpenMP implementation details:
-
-OpenMP 5.0 Implementation Details
-=================================
-
-The following table provides a quick overview over various OpenMP 5.0 features
-and their implementation status. Please post on the
-`Discourse forums (Runtimes - OpenMP category)`_ for more
-information or if you want to help with the
-implementation.
-
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-|Category                      | Feature                                                      | Status                   | Reviews                                                               |
-+==============================+==============================================================+==========================+=======================================================================+
-| loop                         | support != in the canonical loop form                        | :good:`done`             | D54441                                                                |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| loop                         | #pragma omp loop (directive)                                 | :part:`partial`          | D145823 (combined forms)                                              |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| loop                         | #pragma omp loop bind                                        | :part:`worked on`        | D144634 (needs review)                                                |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| loop                         | collapse imperfectly nested loop                             | :good:`done`             |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| loop                         | collapse non-rectangular nested loop                         | :good:`done`             |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| loop                         | C++ range-base for loop                                      | :good:`done`             |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| loop                         | clause: if for SIMD directives                               | :good:`done`             |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| loop                         | inclusive scan (matching C++17 PSTL)                         | :good:`done`             |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| memory management            | memory allocators                                            | :good:`done`             | r341687,r357929                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| memory management            | allocate directive and allocate clause                       | :good:`done`             | r355614,r335952                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| OMPD                         | OMPD interfaces                                              | :good:`done`             | https://reviews.llvm.org/D99914   (Supports only HOST(CPU) and Linux  |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| OMPT                         | OMPT interfaces (callback support)                           | :good:`done`             |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| thread affinity              | thread affinity                                              | :good:`done`             |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| task                         | taskloop reduction                                           | :good:`done`             |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| task                         | task affinity                                                | :part:`not upstream`     | https://github.com/jklinkenberg/openmp/tree/task-affinity             |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| task                         | clause: depend on the taskwait construct                     | :good:`done`             | D113540 (regular codegen only)                                        |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| task                         | depend objects and detachable tasks                          | :good:`done`             |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| task                         | mutexinoutset dependence-type for tasks                      | :good:`done`             | D53380,D57576                                                         |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| task                         | combined taskloop constructs                                 | :good:`done`             |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| task                         | master taskloop                                              | :good:`done`             |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| task                         | parallel master taskloop                                     | :good:`done`             |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| task                         | master taskloop simd                                         | :good:`done`             |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| task                         | parallel master taskloop simd                                | :good:`done`             |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| SIMD                         | atomic and simd constructs inside SIMD code                  | :good:`done`             |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| SIMD                         | SIMD nontemporal                                             | :good:`done`             |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | infer target functions from initializers                     | :part:`worked on`        |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | infer target variables from initializers                     | :good:`done`             | D146418                                                               |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | OMP_TARGET_OFFLOAD environment variable                      | :good:`done`             | D50522                                                                |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | support full 'defaultmap' functionality                      | :good:`done`             | D69204                                                                |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | device specific functions                                    | :good:`done`             |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | clause: device_type                                          | :good:`done`             |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | clause: extended device                                      | :good:`done`             |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | clause: uses_allocators clause                               | :good:`done`             |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | clause: in_reduction                                         | :part:`worked on`        | r308768                                                               |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | omp_get_device_num()                                         | :good:`done`             | D54342,D128347                                                        |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | structure mapping of references                              | :none:`unclaimed`        |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | nested target declare                                        | :good:`done`             | D51378                                                                |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | implicitly map 'this' (this[:1])                             | :good:`done`             | D55982                                                                |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | allow access to the reference count (omp_target_is_present)  | :good:`done`             |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | requires directive                                           | :good:`done`             |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | clause: unified_shared_memory                                | :good:`done`             | D52625,D52359                                                         |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | clause: unified_address                                      | :part:`partial`          |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | clause: reverse_offload                                      | :part:`partial`          | D52780,D155003                                                        |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | clause: atomic_default_mem_order                             | :good:`done`             | D53513                                                                |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | clause: dynamic_allocators                                   | :part:`unclaimed parts`  | D53079                                                                |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | user-defined mappers                                         | :good:`done`             | D56326,D58638,D58523,D58074,D60972,D59474                             |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | map array-section with implicit mapper                       | :good:`done`             |  https://github.com/llvm/llvm-project/pull/101101                     |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | mapping lambda expression                                    | :good:`done`             | D51107                                                                |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | clause: use_device_addr for target data                      | :good:`done`             |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | support close modifier on map clause                         | :good:`done`             | D55719,D55892                                                         |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | teams construct on the host device                           | :good:`done`             | r371553                                                               |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | support non-contiguous array sections for target update      | :good:`done`             |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | pointer attachment                                           | :good:`done`             |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| atomic                       | hints for the atomic construct                               | :good:`done`             | D51233                                                                |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| base language                | C11 support                                                  | :good:`done`             |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| base language                | C++11/14/17 support                                          | :good:`done`             |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| base language                | lambda support                                               | :good:`done`             |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| misc                         | array shaping                                                | :good:`done`             | D74144                                                                |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| misc                         | library shutdown (omp_pause_resource[_all])                  | :good:`done`             | D55078                                                                |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| misc                         | metadirectives                                               | :part:`mostly done`      | D91944, https://github.com/llvm/llvm-project/pull/128640              |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| misc                         | conditional modifier for lastprivate clause                  | :good:`done`             |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| misc                         | iterator and multidependences                                | :good:`done`             |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| misc                         | depobj directive and depobj dependency kind                  | :good:`done`             |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| misc                         | user-defined function variants                               | :good:`done`.            | D67294, D64095, D71847, D71830, D109635                               |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| misc                         | pointer/reference to pointer based array reductions          | :good:`done`             |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| misc                         | prevent new type definitions in clauses                      | :good:`done`             |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| memory model                 | memory model update (seq_cst, acq_rel, release, acquire,...) | :good:`done`             |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-
-
-.. _OpenMP 51 implementation details:
-
-OpenMP 5.1 Implementation Details
-=================================
-
-The following table provides a quick overview over various OpenMP 5.1 features
-and their implementation status.
-Please post on the
-`Discourse forums (Runtimes - OpenMP category)`_ for more
-information or if you want to help with the
-implementation.
-
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-|Category                      | Feature                                                      | Status                   | Reviews                                                               |
-+==============================+==============================================================+==========================+=======================================================================+
-| atomic                       | 'compare' clause on atomic construct                         | :good:`done`             | D120290, D120007, D118632, D120200, D116261, D118547, D116637         |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| atomic                       | 'fail' clause on atomic construct                            | :part:`worked on`        | D123235 (in progress)                                                 |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| base language                | C++ attribute specifier syntax                               | :good:`done`             | D105648                                                               |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | 'present' map type modifier                                  | :good:`done`             | D83061, D83062, D84422                                                |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | 'present' motion modifier                                    | :good:`done`             | D84711, D84712                                                        |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | 'present' in defaultmap clause                               | :good:`done`             | D92427                                                                |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | map clause reordering based on 'present' modifier            | :none:`unclaimed`        |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | device-specific environment variables                        | :none:`unclaimed`        |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | omp_target_is_accessible routine                             | :none:`unclaimed`        |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | omp_get_mapped_ptr routine                                   | :good:`done`             | D141545                                                               |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | new async target memory copy routines                        | :good:`done`             | D136103                                                               |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | thread_limit clause on target construct                      | :part:`partial`          | D141540 (offload), D152054 (host, in progress)                        |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | has_device_addr clause on target construct                   | :none:`unclaimed`        |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | iterators in map clause or motion clauses                    | :none:`unclaimed`        |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | indirect clause on declare target directive                  | :none:`unclaimed`        |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | allow virtual functions calls for mapped object on device    | :part:`partial`          |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | interop construct                                            | :part:`partial`          | parsing/sema done: D98558, D98834, D98815                             |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device                       | assorted routines for querying interoperable properties      | :part:`partial`          | D106674                                                               |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| loop                         | Loop tiling transformation                                   | :good:`done`             | D76342                                                                |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| loop                         | Loop unrolling transformation                                | :good:`done`             | D99459                                                                |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| loop                         | 'reproducible'/'unconstrained' modifiers in 'order' clause   | :part:`partial`          | D127855                                                               |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| memory management            | alignment for allocate directive and clause                  | :good:`done`             | D115683                                                               |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| memory management            | 'allocator' modifier for allocate clause                     | :good:`done`             | https://github.com/llvm/llvm-project/pull/114883                      |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| memory management            | 'align' modifier for allocate clause                         | :good:`done`             | https://github.com/llvm/llvm-project/pull/121814                      |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| memory management            | new memory management routines                               | :none:`unclaimed`        |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| memory management            | changes to omp_alloctrait_key enum                           | :none:`unclaimed`        |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| memory model                 | seq_cst clause on flush construct                            | :good:`done`             | https://github.com/llvm/llvm-project/pull/114072                      |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| misc                         | 'omp_all_memory' keyword and use in 'depend' clause          | :good:`done`             | D125828, D126321                                                      |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| misc                         | error directive                                              | :good:`done`             | D139166                                                               |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| misc                         | scope construct                                              | :good:`done`             | D157933, https://github.com/llvm/llvm-project/pull/109197             |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| misc                         | routines for controlling and querying team regions           | :part:`partial`          | D95003 (libomp only)                                                  |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| misc                         | changes to ompt_scope_endpoint_t enum                        | :none:`unclaimed`        |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| misc                         | omp_display_env routine                                      | :good:`done`             | D74956                                                                |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| misc                         | extended OMP_PLACES syntax                                   | :none:`unclaimed`        |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| misc                         | OMP_NUM_TEAMS and OMP_TEAMS_THREAD_LIMIT env vars            | :good:`done`             | D138769                                                               |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| misc                         | 'target_device' selector in context specifier                | :none:`worked on`        |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| misc                         | begin/end declare variant                                    | :good:`done`             | D71179                                                                |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| misc                         | dispatch construct and function variant argument adjustment  | :part:`worked on`        | D99537, D99679                                                        |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| misc                         | assumes directives                                           | :part:`worked on`        |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| misc                         | assume directive                                             | :good:`done`             |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| misc                         | nothing directive                                            | :good:`done`             | D123286                                                               |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| misc                         | masked construct and related combined constructs             | :good:`done`             | D99995, D100514, PR-121741(parallel_masked_taskloop)                  |
-|                              |                                                              |                          | PR-121746(parallel_masked_task_loop_simd),PR-121914(masked_taskloop)  |
-|                              |                                                              |                          | PR-121916(masked_taskloop_simd)                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| misc                         | default(firstprivate) & default(private)                     | :good:`done`             | D75591 (firstprivate), D125912 (private)                              |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| other                        | deprecating master construct                                 | :none:`unclaimed`        |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| OMPT                         | new barrier types added to ompt_sync_region_t enum           | :none:`unclaimed`        |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| OMPT                         | async data transfers added to ompt_target_data_op_t enum     | :none:`unclaimed`        |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| OMPT                         | new barrier state values added to ompt_state_t enum          | :none:`unclaimed`        |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| OMPT                         | new 'emi' callbacks for external monitoring interfaces       | :good:`done`             |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| OMPT                         | device tracing interface                                     | :none:`unclaimed`        |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| task                         | 'strict' modifier for taskloop construct                     | :none:`unclaimed`        |                                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| task                         | inoutset in depend clause                                    | :good:`done`             | D97085, D118383                                                       |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| task                         | nowait clause on taskwait                                    | :part:`partial`          | parsing/sema done: D131830, D141531                                   |
-+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-
-
-.. _OpenMP 6.0 implementation details:
-
-OpenMP 6.0 Implementation Details
-=================================
-
-The following table provides a quick overview over various OpenMP 6.0 features
-and their implementation status. Please post on the
-`Discourse forums (Runtimes - OpenMP category)`_ for more
-information or if you want to help with the
-implementation.
-
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-|Feature                                                      | C/C++ Status              |  Fortran Status           | Reviews                                                                  |
-+=============================================================+===========================+===========================+==========================================================================+
-| free-agent threads                                          | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| threadset clause                                            | :`worked on`              | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| Recording of task graphs                                    | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| Parallel inductions                                         | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| init_complete for scan directive                            | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| Loop transformation constructs                              | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| loop stripe transformation                                  | :good:`done`              | https://github.com/llvm/llvm-project/pull/119891                                                     |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| work distribute construct                                   | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| task_iteration                                              | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| memscope clause for atomic and flush                        | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| transparent clause (hull tasks)                             | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| rule-based compound directives                              | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| C23, C++23                                                  | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| Fortran 2023                                                | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| decl attribute for declarative directives                   | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| C attribute syntax                                          | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| pure directives in DO CONCURRENT                            | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| Optional argument for all clauses                           | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| Function references for locator list items                  | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| All clauses accept directive name modifier                  | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| Extensions to depobj construct                              | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| Extensions to atomic construct                              | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ 
-| Private reductions                                          | :good:`mostly`            | :none:`unclaimed`         | Parse/Sema:https://github.com/llvm/llvm-project/pull/129938              |
-|                                                             |                           |                           | Codegen: https://github.com/llvm/llvm-project/pull/134709                |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| Self maps                                                   | :part:`partial`           | :none:`unclaimed`         | parsing/sema done: https://github.com/llvm/llvm-project/pull/129888      |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| Release map type for declare mapper                         | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| Extensions to interop construct                             | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| no_openmp_constructs                                        | :good:`done`              | :none:`unclaimed`         | https://github.com/llvm/llvm-project/pull/125933                         |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| safe_sync and progress with identifier and API              | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| OpenMP directives in concurrent loop regions                | :good:`done`              | :none:`unclaimed`         | https://github.com/llvm/llvm-project/pull/125621                         |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| atomics constructs on concurrent loop regions               | :good:`done`              | :none:`unclaimed`         | https://github.com/llvm/llvm-project/pull/125621                         |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| Loop construct with DO CONCURRENT                           | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| device_type clause for target construct                     | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| nowait for ancestor target directives                       | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| New API for devices' num_teams/thread_limit                 | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| Host and device environment variables                       | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| num_threads ICV and clause accepts list                     | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| Numeric names for environment variables                     | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| Increment between places for OMP_PLACES                     | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| OMP_AVAILABLE_DEVICES envirable                             | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| Traits for default device envirable                         | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| Optionally omit array length expression                     | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| Canonical loop sequences                                    | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| Clarifications to Fortran map semantics                     | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| default clause at target construct                          | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| ref count update use_device_{ptr, addr}                     | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| Clarifications to implicit reductions                       | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| ref modifier for map clauses                                | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| map-type modifiers in arbitrary position                    | :good:`done`              | :none:`unclaimed`         | https://github.com/llvm/llvm-project/pull/90499                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| Lift nesting restriction on concurrent loop                 | :good:`done`              | :none:`unclaimed`         | https://github.com/llvm/llvm-project/pull/125621                         |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| priority clause for target constructs                       | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| changes to target_data construct                            | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| Non-const do_not_sync for nowait/nogroup                    | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
-+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-
-OpenMP Extensions
-=================
-
-The following table provides a quick overview over various OpenMP
-extensions and their implementation status.  These extensions are not
-currently defined by any standard, so links to associated LLVM
-documentation are provided.  As these extensions mature, they will be
-considered for standardization. Please post on the
-`Discourse forums (Runtimes - OpenMP category)`_ to provide feedback.
-
-+------------------------------+-----------------------------------------------------------------------------------+--------------------------+--------------------------------------------------------+
-|Category                      | Feature                                                                           | Status                   | Reviews                                                |
-+==============================+===================================================================================+==========================+========================================================+
-| atomic extension             | `'atomic' strictly nested within 'teams'                                          | :good:`prototyped`       | D126323                                                |
-|                              | <https://openmp.llvm.org/docs/openacc/OpenMPExtensions.html#atomicWithinTeams>`_  |                          |                                                        |
-+------------------------------+-----------------------------------------------------------------------------------+--------------------------+--------------------------------------------------------+
-| device extension             | `'ompx_hold' map type modifier                                                    | :good:`prototyped`       | D106509, D106510                                       |
-|                              | <https://openmp.llvm.org/docs/openacc/OpenMPExtensions.html#ompx-hold>`_          |                          |                                                        |
-+------------------------------+-----------------------------------------------------------------------------------+--------------------------+--------------------------------------------------------+
-| device extension             | `'ompx_bare' clause on 'target teams' construct                                   | :good:`prototyped`       | #66844, #70612                                         |
-|                              | <https://www.osti.gov/servlets/purl/2205717>`_                                    |                          |                                                        |
-+------------------------------+-----------------------------------------------------------------------------------+--------------------------+--------------------------------------------------------+
-| device extension             | Multi-dim 'num_teams' and 'thread_limit' clause on 'target teams ompx_bare'       | :good:`partial`          | #99732, #101407, #102715                               |
-|                              | construct                                                                         |                          |                                                        |
-+------------------------------+-----------------------------------------------------------------------------------+--------------------------+--------------------------------------------------------+
-
-.. _Discourse forums (Runtimes - OpenMP category): https://discourse.llvm.org/c/runtimes/openmp/35
+.. raw:: html
+
+  <style type="text/css">
+    .none { background-color: #FFCCCC }
+    .part { background-color: #FFFF99 }
+    .good { background-color: #CCFF99 }
+  </style>
+
+.. role:: none
+.. role:: part
+.. role:: good
+
+.. contents::
+   :local:
+
+==============
+OpenMP Support
+==============
+
+Clang fully supports OpenMP 4.5, almost all of 5.0 and most of 5.1/2.
+Clang supports offloading to X86_64, AArch64, PPC64[LE], NVIDIA GPUs (all models) and AMD GPUs (all models).
+
+In addition, the LLVM OpenMP runtime `libomp` supports the OpenMP Tools
+Interface (OMPT) on x86, x86_64, AArch64, and PPC64 on Linux, Windows, and macOS.
+OMPT is also supported for NVIDIA and AMD GPUs.
+
+For the list of supported features from OpenMP 5.0 and 5.1
+see `OpenMP implementation details`_ and `OpenMP 51 implementation details`_.
+
+General improvements
+====================
+- New collapse clause scheme to avoid expensive remainder operations.
+  Compute loop index variables after collapsing a loop nest via the
+  collapse clause by replacing the expensive remainder operation with
+  multiplications and additions.
+
+- When using the collapse clause on a loop nest the default behavior
+  is to automatically extend the representation of the loop counter to
+  64 bits for the cases where the sizes of the collapsed loops are not
+  known at compile time. To prevent this conservative choice and use
+  at most 32 bits, compile your program with the
+  `-fopenmp-optimistic-collapse`.
+
+
+GPU devices support
+===================
+
+Data-sharing modes
+------------------
+
+Clang supports two data-sharing models for Cuda devices: `Generic` and `Cuda`
+modes. The default mode is `Generic`. `Cuda` mode can give an additional
+performance and can be activated using the `-fopenmp-cuda-mode` flag. In
+`Generic` mode all local variables that can be shared in the parallel regions
+are stored in the global memory. In `Cuda` mode local variables are not shared
+between the threads and it is user responsibility to share the required data
+between the threads in the parallel regions. Often, the optimizer is able to
+reduce the cost of `Generic` mode to the level of `Cuda` mode, but the flag,
+as well as other assumption flags, can be used for tuning.
+
+Features not supported or with limited support for Cuda devices
+---------------------------------------------------------------
+
+- Cancellation constructs are not supported.
+
+- Doacross loop nest is not supported.
+
+- User-defined reductions are supported only for trivial types.
+
+- Nested parallelism: inner parallel regions are executed sequentially.
+
+- Debug information for OpenMP target regions is supported, but sometimes it may
+  be required to manually specify the address class of the inspected variables.
+  In some cases the local variables are actually allocated in the global memory,
+  but the debug info may be not aware of it.
+
+
+.. _OpenMP implementation details:
+
+OpenMP 5.0 Implementation Details
+=================================
+
+The following table provides a quick overview over various OpenMP 5.0 features
+and their implementation status. Please post on the
+`Discourse forums (Runtimes - OpenMP category)`_ for more
+information or if you want to help with the
+implementation.
+
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+|Category                      | Feature                                                      | Status                   | Reviews                                                               |
++==============================+==============================================================+==========================+=======================================================================+
+| loop                         | support != in the canonical loop form                        | :good:`done`             | D54441                                                                |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| loop                         | #pragma omp loop (directive)                                 | :part:`partial`          | D145823 (combined forms)                                              |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| loop                         | #pragma omp loop bind                                        | :part:`worked on`        | D144634 (needs review)                                                |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| loop                         | collapse imperfectly nested loop                             | :good:`done`             |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| loop                         | collapse non-rectangular nested loop                         | :good:`done`             |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| loop                         | C++ range-base for loop                                      | :good:`done`             |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| loop                         | clause: if for SIMD directives                               | :good:`done`             |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| loop                         | inclusive scan (matching C++17 PSTL)                         | :good:`done`             |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| memory management            | memory allocators                                            | :good:`done`             | r341687,r357929                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| memory management            | allocate directive and allocate clause                       | :good:`done`             | r355614,r335952                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| OMPD                         | OMPD interfaces                                              | :good:`done`             | https://reviews.llvm.org/D99914   (Supports only HOST(CPU) and Linux  |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| OMPT                         | OMPT interfaces (callback support)                           | :good:`done`             |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| thread affinity              | thread affinity                                              | :good:`done`             |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| task                         | taskloop reduction                                           | :good:`done`             |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| task                         | task affinity                                                | :part:`not upstream`     | https://github.com/jklinkenberg/openmp/tree/task-affinity             |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| task                         | clause: depend on the taskwait construct                     | :good:`done`             | D113540 (regular codegen only)                                        |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| task                         | depend objects and detachable tasks                          | :good:`done`             |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| task                         | mutexinoutset dependence-type for tasks                      | :good:`done`             | D53380,D57576                                                         |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| task                         | combined taskloop constructs                                 | :good:`done`             |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| task                         | master taskloop                                              | :good:`done`             |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| task                         | parallel master taskloop                                     | :good:`done`             |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| task                         | master taskloop simd                                         | :good:`done`             |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| task                         | parallel master taskloop simd                                | :good:`done`             |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| SIMD                         | atomic and simd constructs inside SIMD code                  | :good:`done`             |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| SIMD                         | SIMD nontemporal                                             | :good:`done`             |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | infer target functions from initializers                     | :part:`worked on`        |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | infer target variables from initializers                     | :good:`done`             | D146418                                                               |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | OMP_TARGET_OFFLOAD environment variable                      | :good:`done`             | D50522                                                                |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | support full 'defaultmap' functionality                      | :good:`done`             | D69204                                                                |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | device specific functions                                    | :good:`done`             |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | clause: device_type                                          | :good:`done`             |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | clause: extended device                                      | :good:`done`             |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | clause: uses_allocators clause                               | :good:`done`             |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | clause: in_reduction                                         | :part:`worked on`        | r308768                                                               |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | omp_get_device_num()                                         | :good:`done`             | D54342,D128347                                                        |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | structure mapping of references                              | :none:`unclaimed`        |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | nested target declare                                        | :good:`done`             | D51378                                                                |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | implicitly map 'this' (this[:1])                             | :good:`done`             | D55982                                                                |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | allow access to the reference count (omp_target_is_present)  | :good:`done`             |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | requires directive                                           | :good:`done`             |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | clause: unified_shared_memory                                | :good:`done`             | D52625,D52359                                                         |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | clause: unified_address                                      | :part:`partial`          |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | clause: reverse_offload                                      | :part:`partial`          | D52780,D155003                                                        |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | clause: atomic_default_mem_order                             | :good:`done`             | D53513                                                                |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | clause: dynamic_allocators                                   | :part:`unclaimed parts`  | D53079                                                                |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | user-defined mappers                                         | :good:`done`             | D56326,D58638,D58523,D58074,D60972,D59474                             |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | map array-section with implicit mapper                       | :good:`done`             |  https://github.com/llvm/llvm-project/pull/101101                     |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | mapping lambda expression                                    | :good:`done`             | D51107                                                                |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | clause: use_device_addr for target data                      | :good:`done`             |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | support close modifier on map clause                         | :good:`done`             | D55719,D55892                                                         |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | teams construct on the host device                           | :good:`done`             | r371553                                                               |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | support non-contiguous array sections for target update      | :good:`done`             |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | pointer attachment                                           | :good:`done`             |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| atomic                       | hints for the atomic construct                               | :good:`done`             | D51233                                                                |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| base language                | C11 support                                                  | :good:`done`             |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| base language                | C++11/14/17 support                                          | :good:`done`             |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| base language                | lambda support                                               | :good:`done`             |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| misc                         | array shaping                                                | :good:`done`             | D74144                                                                |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| misc                         | library shutdown (omp_pause_resource[_all])                  | :good:`done`             | D55078                                                                |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| misc                         | metadirectives                                               | :part:`mostly done`      | D91944, https://github.com/llvm/llvm-project/pull/128640              |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| misc                         | conditional modifier for lastprivate clause                  | :good:`done`             |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| misc                         | iterator and multidependences                                | :good:`done`             |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| misc                         | depobj directive and depobj dependency kind                  | :good:`done`             |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| misc                         | user-defined function variants                               | :good:`done`.            | D67294, D64095, D71847, D71830, D109635                               |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| misc                         | pointer/reference to pointer based array reductions          | :good:`done`             |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| misc                         | prevent new type definitions in clauses                      | :good:`done`             |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| memory model                 | memory model update (seq_cst, acq_rel, release, acquire,...) | :good:`done`             |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+
+
+.. _OpenMP 51 implementation details:
+
+OpenMP 5.1 Implementation Details
+=================================
+
+The following table provides a quick overview over various OpenMP 5.1 features
+and their implementation status.
+Please post on the
+`Discourse forums (Runtimes - OpenMP category)`_ for more
+information or if you want to help with the
+implementation.
+
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+|Category                      | Feature                                                      | Status                   | Reviews                                                               |
++==============================+==============================================================+==========================+=======================================================================+
+| atomic                       | 'compare' clause on atomic construct                         | :good:`done`             | D120290, D120007, D118632, D120200, D116261, D118547, D116637         |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| atomic                       | 'fail' clause on atomic construct                            | :part:`worked on`        | D123235 (in progress)                                                 |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| base language                | C++ attribute specifier syntax                               | :good:`done`             | D105648                                                               |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | 'present' map type modifier                                  | :good:`done`             | D83061, D83062, D84422                                                |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | 'present' motion modifier                                    | :good:`done`             | D84711, D84712                                                        |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | 'present' in defaultmap clause                               | :good:`done`             | D92427                                                                |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | map clause reordering based on 'present' modifier            | :none:`unclaimed`        |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | device-specific environment variables                        | :none:`unclaimed`        |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | omp_target_is_accessible routine                             | :none:`unclaimed`        |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | omp_get_mapped_ptr routine                                   | :good:`done`             | D141545                                                               |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | new async target memory copy routines                        | :good:`done`             | D136103                                                               |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | thread_limit clause on target construct                      | :part:`partial`          | D141540 (offload), D152054 (host, in progress)                        |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | has_device_addr clause on target construct                   | :none:`unclaimed`        |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | iterators in map clause or motion clauses                    | :none:`unclaimed`        |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | indirect clause on declare target directive                  | :part:`In Progress`      |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | allow virtual functions calls for mapped object on device    | :part:`partial`          |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | interop construct                                            | :part:`partial`          | parsing/sema done: D98558, D98834, D98815                             |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| device                       | assorted routines for querying interoperable properties      | :part:`partial`          | D106674                                                               |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| loop                         | Loop tiling transformation                                   | :good:`done`             | D76342                                                                |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| loop                         | Loop unrolling transformation                                | :good:`done`             | D99459                                                                |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| loop                         | 'reproducible'/'unconstrained' modifiers in 'order' clause   | :part:`partial`          | D127855                                                               |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| memory management            | alignment for allocate directive and clause                  | :good:`done`             | D115683                                                               |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| memory management            | 'allocator' modifier for allocate clause                     | :good:`done`             | https://github.com/llvm/llvm-project/pull/114883                      |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| memory management            | 'align' modifier for allocate clause                         | :good:`done`             | https://github.com/llvm/llvm-project/pull/121814                      |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| memory management            | new memory management routines                               | :none:`unclaimed`        |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| memory management            | changes to omp_alloctrait_key enum                           | :none:`unclaimed`        |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| memory model                 | seq_cst clause on flush construct                            | :good:`done`             | https://github.com/llvm/llvm-project/pull/114072                      |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| misc                         | 'omp_all_memory' keyword and use in 'depend' clause          | :good:`done`             | D125828, D126321                                                      |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| misc                         | error directive                                              | :good:`done`             | D139166                                                               |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| misc                         | scope construct                                              | :good:`done`             | D157933, https://github.com/llvm/llvm-project/pull/109197             |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| misc                         | routines for controlling and querying team regions           | :part:`partial`          | D95003 (libomp only)                                                  |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| misc                         | changes to ompt_scope_endpoint_t enum                        | :none:`unclaimed`        |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| misc                         | omp_display_env routine                                      | :good:`done`             | D74956                                                                |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| misc                         | extended OMP_PLACES syntax                                   | :none:`unclaimed`        |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| misc                         | OMP_NUM_TEAMS and OMP_TEAMS_THREAD_LIMIT env vars            | :good:`done`             | D138769                                                               |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| misc                         | 'target_device' selector in context specifier                | :none:`worked on`        |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| misc                         | begin/end declare variant                                    | :good:`done`             | D71179                                                                |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| misc                         | dispatch construct and function variant argument adjustment  | :part:`worked on`        | D99537, D99679                                                        |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| misc                         | assumes directives                                           | :part:`worked on`        |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| misc                         | assume directive                                             | :good:`done`             |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| misc                         | nothing directive                                            | :good:`done`             | D123286                                                               |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| misc                         | masked construct and related combined constructs             | :good:`done`             | D99995, D100514, PR-121741(parallel_masked_taskloop)                  |
+|                              |                                                              |                          | PR-121746(parallel_masked_task_loop_simd),PR-121914(masked_taskloop)  |
+|                              |                                                              |                          | PR-121916(masked_taskloop_simd)                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| misc                         | default(firstprivate) & default(private)                     | :good:`done`             | D75591 (firstprivate), D125912 (private)                              |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| other                        | deprecating master construct                                 | :none:`unclaimed`        |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| OMPT                         | new barrier types added to ompt_sync_region_t enum           | :none:`unclaimed`        |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| OMPT                         | async data transfers added to ompt_target_data_op_t enum     | :none:`unclaimed`        |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| OMPT                         | new barrier state values added to ompt_state_t enum          | :none:`unclaimed`        |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| OMPT                         | new 'emi' callbacks for external monitoring interfaces       | :good:`done`             |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| OMPT                         | device tracing interface                                     | :none:`unclaimed`        |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| task                         | 'strict' modifier for taskloop construct                     | :none:`unclaimed`        |                                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| task                         | inoutset in depend clause                                    | :good:`done`             | D97085, D118383                                                       |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+| task                         | nowait clause on taskwait                                    | :part:`partial`          | parsing/sema done: D131830, D141531                                   |
++------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
+
+
+.. _OpenMP 6.0 implementation details:
+
+OpenMP 6.0 Implementation Details
+=================================
+
+The following table provides a quick overview over various OpenMP 6.0 features
+and their implementation status. Please post on the
+`Discourse forums (Runtimes - OpenMP category)`_ for more
+information or if you want to help with the
+implementation.
+
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+|Feature                                                      | C/C++ Status              |  Fortran Status           | Reviews                                                                  |
++=============================================================+===========================+===========================+==========================================================================+
+| free-agent threads                                          | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| threadset clause                                            | :`worked on`              | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| Recording of task graphs                                    | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| Parallel inductions                                         | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| init_complete for scan directive                            | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| Loop transformation constructs                              | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| loop stripe transformation                                  | :good:`done`              | https://github.com/llvm/llvm-project/pull/119891                                                     |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| work distribute construct                                   | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| task_iteration                                              | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| memscope clause for atomic and flush                        | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| transparent clause (hull tasks)                             | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| rule-based compound directives                              | :none:`unclaimed`         | :part:`In Progress`       | Testing for Fortran missing                                              |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| C23, C++23                                                  | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| Fortran 2023                                                | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| decl attribute for declarative directives                   | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| C attribute syntax                                          | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| pure directives in DO CONCURRENT                            | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| Optional argument for all clauses                           | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| Function references for locator list items                  | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| All clauses accept directive name modifier                  | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| Extensions to depobj construct                              | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| Extensions to atomic construct                              | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+ 
+| Private reductions                                          | :good:`mostly`            | :none:`unclaimed`         | Parse/Sema:https://github.com/llvm/llvm-project/pull/129938              |
+|                                                             |                           |                           | Codegen: https://github.com/llvm/llvm-project/pull/134709                |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| Self maps                                                   | :part:`partial`           | :none:`unclaimed`         | parsing/sema done: https://github.com/llvm/llvm-project/pull/129888      |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| Release map type for declare mapper                         | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| Extensions to interop construct                             | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| no_openmp_constructs                                        | :good:`done`              | :none:`unclaimed`         | https://github.com/llvm/llvm-project/pull/125933                         |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| safe_sync and progress with identifier and API              | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| OpenMP directives in concurrent loop regions                | :good:`done`              | :none:`unclaimed`         | https://github.com/llvm/llvm-project/pull/125621                         |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| atomics constructs on concurrent loop regions               | :good:`done`              | :none:`unclaimed`         | https://github.com/llvm/llvm-project/pull/125621                         |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| Loop construct with DO CONCURRENT                           | :none:`unclaimed`         | :part:`In Progress`       |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| device_type clause for target construct                     | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| nowait for ancestor target directives                       | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| New API for devices' num_teams/thread_limit                 | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| Host and device environment variables                       | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| num_threads ICV and clause accepts list                     | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| Numeric names for environment variables                     | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| Increment between places for OMP_PLACES                     | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| OMP_AVAILABLE_DEVICES envirable                             | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| Traits for default device envirable                         | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| Optionally omit array length expression                     | :good:`done`              | :none:`unclaimed`         | https://github.com/llvm/llvm-project/pull/148048                         |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| Canonical loop sequences                                    | :none:`unclaimed`         | :part:`In Progress`       |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| Clarifications to Fortran map semantics                     | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| default clause at target construct                          | :part:`In Progress`         | :none:`unclaimed`         |                                                                        |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| ref count update use_device_{ptr, addr}                     | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| Clarifications to implicit reductions                       | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| ref modifier for map clauses                                | :part:`In Progress`       | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| map-type modifiers in arbitrary position                    | :good:`done`              | :none:`unclaimed`         | https://github.com/llvm/llvm-project/pull/90499                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| Lift nesting restriction on concurrent loop                 | :good:`done`              | :none:`unclaimed`         | https://github.com/llvm/llvm-project/pull/125621                         |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| priority clause for target constructs                       | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| changes to target_data construct                            | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| Non-const do_not_sync for nowait/nogroup                    | :none:`unclaimed`         | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| need_device_addr modifier for adjust_args clause            | :part:`partial`           | :none:`unclaimed`         | Parsing/Sema: https://github.com/llvm/llvm-project/pull/143442           |
+|                                                             |                           |                           |               https://github.com/llvm/llvm-project/pull/149586           |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| Prescriptive num_threads                                    | :part:`In Progress`       | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| Message and severity clauses                                | :part:`In Progress`       | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+| Local clause on declare target                              | :part:`In Progress`       | :none:`unclaimed`         |                                                                          |
++-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
+
+OpenMP Extensions
+=================
+
+The following table provides a quick overview over various OpenMP
+extensions and their implementation status.  These extensions are not
+currently defined by any standard, so links to associated LLVM
+documentation are provided.  As these extensions mature, they will be
+considered for standardization. Please post on the
+`Discourse forums (Runtimes - OpenMP category)`_ to provide feedback.
+
++------------------------------+-----------------------------------------------------------------------------------+--------------------------+--------------------------------------------------------+
+|Category                      | Feature                                                                           | Status                   | Reviews                                                |
++==============================+===================================================================================+==========================+========================================================+
+| atomic extension             | `'atomic' strictly nested within 'teams'                                          | :good:`prototyped`       | D126323                                                |
+|                              | <https://openmp.llvm.org/docs/openacc/OpenMPExtensions.html#atomicWithinTeams>`_  |                          |                                                        |
++------------------------------+-----------------------------------------------------------------------------------+--------------------------+--------------------------------------------------------+
+| device extension             | `'ompx_hold' map type modifier                                                    | :good:`prototyped`       | D106509, D106510                                       |
+|                              | <https://openmp.llvm.org/docs/openacc/OpenMPExtensions.html#ompx-hold>`_          |                          |                                                        |
++------------------------------+-----------------------------------------------------------------------------------+--------------------------+--------------------------------------------------------+
+| device extension             | `'ompx_bare' clause on 'target teams' construct                                   | :good:`prototyped`       | #66844, #70612                                         |
+|                              | <https://www.osti.gov/servlets/purl/2205717>`_                                    |                          |                                                        |
++------------------------------+-----------------------------------------------------------------------------------+--------------------------+--------------------------------------------------------+
+| device extension             | Multi-dim 'num_teams' and 'thread_limit' clause on 'target teams ompx_bare'       | :good:`partial`          | #99732, #101407, #102715                               |
+|                              | construct                                                                         |                          |                                                        |
++------------------------------+-----------------------------------------------------------------------------------+--------------------------+--------------------------------------------------------+
+
+.. _Discourse forums (Runtimes - OpenMP category): https://discourse.llvm.org/c/runtimes/openmp/35
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 46a7767..4a2edae 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -34,72 +34,28 @@ latest release, please see the `Clang Web Site <https://clang.llvm.org>`_ or the
 Potentially Breaking Changes
 ============================
 
-- The Objective-C ARC migrator (ARCMigrate) has been removed.
-- Fix missing diagnostics for uses of declarations when performing typename access,
-  such as when performing member access on a ``[[deprecated]]`` type alias.
-  (#GH58547)
-- For ARM targets when compiling assembly files, the features included in the selected CPU
-  or Architecture's FPU are included. If you wish not to use a specific feature,
-  the relevant ``+no`` option will need to be amended to the command line option.
-- When compiling with branch target enforcement, ``asm goto``
-  statements will no longer guarantee to place a ``bti`` or
-  ``endbr64`` instruction at the labels named as possible branch
-  destinations, so it is not safe to use a register-controlled branch
-  instruction to branch to one. (In line with gcc.)
-- Added a sugar type `PredefinedSugarType` to improve diagnostic messages. (#GH143653)
-
 C/C++ Language Potentially Breaking Changes
 -------------------------------------------
 
-- New LLVM optimizations have been implemented that optimize pointer arithmetic on
-  null pointers more aggressively.  As part of this, clang has implemented a special
-  case for old-style offsetof idioms like ``((int)(&(((struct S *)0)->field)))``, to
-  ensure they are not caught by these optimizations.  It is also possible to use
-  ``-fwrapv-pointer`` or   ``-fno-delete-null-pointer-checks`` to make pointer arithmetic
-  on null pointers well-defined. (#GH130734, #GH130742, #GH130952)
+- The ``__has_builtin`` function now only considers the currently active target when being used with target offloading.
 
 C++ Specific Potentially Breaking Changes
 -----------------------------------------
-
-- The type trait builtin ``__is_referenceable`` has been removed, since it has
-  very few users and all the type traits that could benefit from it in the
-  standard library already have their own bespoke builtins.
-- A workaround for libstdc++4.7 has been removed. Note that 4.8.3 remains the oldest
-  supported libstdc++ version.
-- Added ``!nonnull/!align`` metadata to load of references for better codegen.
-- Checking for integer to enum conversions in constant expressions is more
-  strict; in particular, ``const E x = (E)-1;`` is not treated as a constant
-  if it's out of range. The Boost numeric_conversion library is impacted by
-  this; it was fixed in Boost 1.81. (#GH143034)
-
-- Fully implemented `CWG400 Using-declarations and the `
-  `"struct hack" <https://wg21.link/CWG400>`_. Invalid member using-declaration
-  whose nested-name-specifier doesn't refer to a base class such as
-  ``using CurrentClass::Foo;`` is now rejected in C++98 mode.
+- For C++20 modules, the Reduced BMI mode will be the default option. This may introduce
+  regressions if your build system supports two-phase compilation model but haven't support
+  reduced BMI or it is a compiler bug or a bug in users code.
 
 ABI Changes in This Version
 ---------------------------
 
-- Return larger CXX records in memory instead of using AVX registers. Code compiled with older clang will be incompatible with newer version of the clang unless -fclang-abi-compat=20 is provided. (#GH120670)
-
 AST Dumping Potentially Breaking Changes
 ----------------------------------------
 
-- Added support for dumping template arguments of structural value kinds.
-
 Clang Frontend Potentially Breaking Changes
 -------------------------------------------
 
-- The ``-Wglobal-constructors`` flag now applies to ``[[gnu::constructor]]`` and
-  ``[[gnu::destructor]]`` attributes.
-
 Clang Python Bindings Potentially Breaking Changes
 --------------------------------------------------
-- ``Cursor.from_location`` now returns ``None`` instead of a null cursor.
-  This eliminates the last known source of null cursors.
-- Almost all ``Cursor`` methods now assert that they are called on non-null cursors.
-  Most of the time null cursors were mapped to ``None``,
-  so no widespread breakages are expected.
 
 What's New in Clang |release|?
 ==============================
@@ -107,45 +63,14 @@ What's New in Clang |release|?
 C++ Language Changes
 --------------------
 
-- Added a :ref:`__builtin_structured_binding_size <builtin_structured_binding_size-doc>` (T)
-  builtin that returns the number of structured bindings that would be produced by destructuring ``T``.
-
-- Similarly to GCC, Clang now supports constant expressions in
-  the strings of a GNU ``asm`` statement.
-
-  .. code-block:: c++
-
-    int foo() {
-      asm((std::string_view("nop")) ::: (std::string_view("memory")));
-    }
-
-- Clang now implements the changes to overload resolution proposed by section 1 and 2 of
-  `P3606 <https://wg21.link/P3606R0>`_. If a non-template candidate exists in an overload set that is
-  a perfect match (all conversion sequences are identity conversions) template candidates are not instantiated.
-  Diagnostics that would have resulted from the instantiation of these template candidates are no longer
-  produced. This aligns Clang closer to the behavior of GCC, and fixes (#GH62096), (#GH74581), and (#GH74581).
-
 C++2c Feature Support
 ^^^^^^^^^^^^^^^^^^^^^
 
-- Implemented `P1061R10 Structured Bindings can introduce a Pack <https://wg21.link/P1061R10>`_.
-- Implemented `P2786R13 Trivial Relocatability <https://wg21.link/P2786R13>`_.
-
-
-- Implemented `P0963R3 Structured binding declaration as a condition <https://wg21.link/P0963R3>`_.
-
-- Implemented `P3618R0 Allow attaching main to the global module <https://wg21.link/P3618>`_.
-
 C++23 Feature Support
 ^^^^^^^^^^^^^^^^^^^^^
 
 C++20 Feature Support
 ^^^^^^^^^^^^^^^^^^^^^
-- Fixed a crash with a defaulted spaceship (``<=>``) operator when the class
-  contains a member declaration of vector type. Vector types cannot yet be
-  compared directly, so this causes the operator to be deleted. (#GH137452)
-- Implement constant evaluation of lambdas that capture structured bindings.
-  (#GH145956)
 
 C++17 Feature Support
 ^^^^^^^^^^^^^^^^^^^^^
@@ -153,229 +78,31 @@ C++17 Feature Support
 Resolutions to C++ Defect Reports
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-- The flag `-frelaxed-template-template-args`
-  and its negation have been removed, having been deprecated since the previous
-  two releases. The improvements to template template parameter matching implemented
-  in the previous release, as described in P3310 and P3579, made this flag unnecessary.
-
-- Implemented `CWG2918 Consideration of constraints for address of overloaded `
-  `function <https://cplusplus.github.io/CWG/issues/2918.html>`_
-
-- Bumped the ``__cpp_constexpr`` feature-test macro to ``202002L`` in C++20 mode as indicated in
-  `P2493R0 <https://wg21.link/P2493R0>`_.
-
-- Implemented `CWG2517 Useless restriction on use of parameter in `
-  `constraint-expression <https://cplusplus.github.io/CWG/issues/2517.html>`_.
-- Implemented `CWG3005 Function parameters should never be name-independent <https://wg21.link/CWG3005>`_.
-
-- Implemented `CWG2496 ref-qualifiers and virtual overriding <https://wg21.link/CWG2496>`_.
-
 C Language Changes
 ------------------
 
-- Clang now allows an ``inline`` specifier on a typedef declaration of a
-  function type in Microsoft compatibility mode. #GH124869
-- Clang now allows ``restrict`` qualifier for array types with pointer elements (#GH92847).
-- Clang now diagnoses ``const``-qualified object definitions without an
-  initializer. If the object is a variable or field which is zero-initialized,
-  it will be diagnosed under the new warning ``-Wdefault-const-init-var`` or
-  ``-Wdefault-const-init-field``, respectively. Similarly, if the variable or
-  field is not zero-initialized, it will be diagnosed under the new diagnostic
-  ``-Wdefault-const-init-var-unsafe`` or ``-Wdefault-const-init-field-unsafe``,
-  respectively. The unsafe diagnostic variants are grouped under a new
-  diagnostic ``-Wdefault-const-init-unsafe``, which itself is grouped under the
-  new diagnostic ``-Wdefault-const-init``. Finally, ``-Wdefault-const-init`` is
-  grouped under ``-Wc++-compat`` because these constructs are not compatible
-  with C++. #GH19297
-- Added ``-Wimplicit-void-ptr-cast``, grouped under ``-Wc++-compat``, which
-  diagnoses implicit conversion from ``void *`` to another pointer type as
-  being incompatible with C++. (#GH17792)
-- Added ``-Wc++-keyword``, grouped under ``-Wc++-compat``, which diagnoses when
-  a C++ keyword is used as an identifier in C. (#GH21898)
-- Added ``-Wc++-hidden-decl``, grouped under ``-Wc++-compat``, which diagnoses
-  use of tag types which are visible in C but not visible in C++ due to scoping
-  rules. e.g.,
-
-  .. code-block:: c
-
-    struct S {
-      struct T {
-        int x;
-      } t;
-    };
-    struct T t; // Invalid C++, valid C, now diagnosed
-- Added ``-Wimplicit-int-enum-cast``, grouped under ``-Wc++-compat``, which
-  diagnoses implicit conversion from integer types to an enumeration type in C,
-  which is not compatible with C++. #GH37027
-- Split "implicit conversion from enum type to different enum type" diagnostic
-  from ``-Wenum-conversion`` into its own diagnostic group,
-  ``-Wimplicit-enum-enum-cast``, which is grouped under both
-  ``-Wenum-conversion`` and ``-Wimplicit-int-enum-cast``. This conversion is an
-  int-to-enum conversion because the enumeration on the right-hand side is
-  promoted to ``int`` before the assignment.
-- Added ``-Wtentative-definition-compat``, grouped under ``-Wc++-compat``,
-  which diagnoses tentative definitions in C with multiple declarations as
-  being incompatible with C++. e.g.,
-
-  .. code-block:: c
-
-    // File scope
-    int i;
-    int i; // Vaild C, invalid C++, now diagnosed
-- Added ``-Wunterminated-string-initialization``, grouped under ``-Wextra``,
-  which diagnoses an initialization from a string literal where only the null
-  terminator cannot be stored. e.g.,
-
-  .. code-block:: c
-
-
-    char buf1[3] = "foo"; // -Wunterminated-string-initialization
-    char buf2[3] = "flarp"; // -Wexcess-initializers
-    char buf3[3] = "fo\0";  // This is fine, no warning.
-
-  This diagnostic can be suppressed by adding the new ``nonstring`` attribute
-  to the field or variable being initialized. #GH137705
-- Added ``-Wc++-unterminated-string-initialization``, grouped under
-  ``-Wc++-compat``, which also diagnoses the same cases as
-  ``-Wunterminated-string-initialization``. However, this diagnostic is not
-  silenced by the ``nonstring`` attribute as these initializations are always
-  incompatible with C++.
-- Added ``-Wjump-misses-init``, which is off by default and grouped under
-  ``-Wc++-compat``. It diagnoses when a jump (``goto`` to its label, ``switch``
-  to its ``case``) will bypass the initialization of a local variable, which is
-  invalid in C++.
-- Added the existing ``-Wduplicate-decl-specifier`` diagnostic, which is on by
-  default, to ``-Wc++-compat`` because duplicated declaration specifiers are
-  not valid in C++.
-- The ``[[clang::assume()]]`` attribute is now correctly recognized in C. The
-  ``__attribute__((assume()))`` form has always been supported, so the fix is
-  specific to the attribute syntax used.
-- The ``clang-cl`` driver now recognizes ``/std:clatest`` and sets the language
-  mode to C23. (#GH147233)
-
 C2y Feature Support
 ^^^^^^^^^^^^^^^^^^^
-- Implement `WG14 N3409 <https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3409.pdf>`_
-  which removes UB around use of ``void`` expressions. In practice, this means
-  that ``_Generic`` selection associations may now have ``void`` type, but it
-  also removes UB with code like ``(void)(void)1;``.
-- Implemented `WG14 N3411 <https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3411.pdf>`_
-  which allows a source file to not end with a newline character. Note,
-  ``-pedantic`` will no longer diagnose this in either C or C++ modes. This
-  feature was adopted as applying to obsolete versions of C in WG14 and as a
-  defect report in WG21 (CWG787).
-- Implemented `WG14 N3353 <https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3353.htm>`_
-  which adds the new ``0o`` and ``0O`` ocal literal prefixes and deprecates
-  octal literals other than ``0`` which do not start with the new prefix. This
-  feature is exposed in earlier language modes and in C++ as an extension. The
-  paper also introduced octal and hexadecimal delimited escape sequences (e.g.,
-  ``"\x{12}\o{12}"``) which are also supported as an extension in older C
-  language modes.
-- Implemented `WG14 N3369 <https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3369.pdf>`_
-  which introduces the ``_Lengthof`` operator, and `WG14 N3469 <https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3469.htm>`_
-  which renamed ``_Lengthof`` to ``_Countof``. This feature is implemented as
-  a conforming extension in earlier C language modes, but not in C++ language
-  modes (``std::extent`` and ``std::size`` already provide the same
-  functionality but with more granularity). The feature can be tested via
-  ``__has_feature(c_countof)`` or ``__has_extension(c_countof)``. This also
-  adds the ``<stdcountof.h>`` header file which exposes the ``countof`` macro
-  which expands to ``_Countof``.
 
 C23 Feature Support
 ^^^^^^^^^^^^^^^^^^^
-- Clang now accepts ``-std=iso9899:2024`` as an alias for C23.
-- Added ``__builtin_c23_va_start()`` for compatibility with GCC and to enable
-  better diagnostic behavior for the ``va_start()`` macro in C23 and later.
-  This also updates the definition of ``va_start()`` in ``<stdarg.h>`` to use
-  the new builtin. Fixes #GH124031.
-- Implemented `WG14 N2819 <https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2819.pdf>`_
-  which clarified that a compound literal used within a function prototype is
-  treated as if the compound literal were within the body rather than at file
-  scope.
-- Fixed a bug where you could not cast a null pointer constant to type
-  ``nullptr_t``. Fixes #GH133644.
-- Implemented `WG14 N3037 <https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3037.pdf>`_
-  which allows tag types to be redefined within the same translation unit so
-  long as both definitions are structurally equivalent (same tag types, same
-  tag names, same tag members, etc). As a result of this paper, ``-Wvisibility``
-  is no longer diagnosed in C23 if the parameter is a complete tag type (it
-  does still fire when the parameter is an incomplete tag type as that cannot
-  be completed).
-- Fixed a failed assertion with an invalid parameter to the ``#embed``
-  directive. Fixes #GH126940.
-- Fixed a crash when a declaration of a ``constexpr`` variable with an invalid
-  type. Fixes #GH140887
-- Documented `WG14 N3006 <https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3006.htm>`_
-  which clarified how Clang is handling underspecified object declarations.
-- Clang now accepts single variadic parameter in type-name. It's a part of
-  `WG14 N2975 <https://open-std.org/JTC1/SC22/WG14/www/docs/n2975.pdf>`_
-- Fixed a bug with handling the type operand form of ``typeof`` when it is used
-  to specify a fixed underlying type for an enumeration. #GH146351
-- Fixed a rejects-valid bug where Clang would reject an enumeration with an
-  ``_Atomic`` underlying type. The underlying type is the non-atomic,
-  unqualified version of the specified type. Due to the perhaps surprising lack
-  of atomic behavior, this is diagnosed under
-  ``-Wunderlying-atomic-qualifier-ignored``, which defaults to an error. This
-  can be downgraded with ``-Wno-underlying-atomic-qualifier-ignored`` or
-  ``-Wno-error=underlying-atomic-qualifier-ignored``. Clang now also diagnoses
-  cv-qualifiers as being ignored, but that warning does not default to an error.
-  It can be controlled by ``-Wunderlying-cv-qualifier-ignore``. (#GH147736)
-
-C11 Feature Support
-^^^^^^^^^^^^^^^^^^^
-- Implemented `WG14 N1285 <https://www.open-std.org/jtc1/sc22/wg14/www/docs/n1285.htm>`_
-  which introduces the notion of objects with a temporary lifetime. When an
-  expression resulting in an rvalue with structure or union type and that type
-  contains a member of array type, the expression result is an automatic storage
-  duration object with temporary lifetime which begins when the expression is
-  evaluated and ends at the evaluation of the containing full expression. This
-  functionality is also implemented for earlier C language modes because the
-  C99 semantics will never be implemented (it would require dynamic allocations
-  of memory which leaks, which users would not appreciate).
 
 Non-comprehensive list of changes in this release
 -------------------------------------------------
+- Added ``__builtin_elementwise_minnumnum`` and ``__builtin_elementwise_maxnumnum``.
 
-- Support parsing the `cc` operand modifier and alias it to the `c` modifier (#GH127719).
-- Added `__builtin_elementwise_exp10`.
-- For AMDPGU targets, added `__builtin_v_cvt_off_f32_i4` that maps to the `v_cvt_off_f32_i4` instruction.
-- Added `__builtin_elementwise_minnum` and `__builtin_elementwise_maxnum`.
-- No longer crashing on invalid Objective-C categories and extensions when
-  dumping the AST as JSON. (#GH137320)
-- Clang itself now uses split stacks instead of threads for allocating more
-  stack space when running on Apple AArch64 based platforms. This means that
-  stack traces of Clang from debuggers, crashes, and profilers may look
-  different than before.
-- Fixed a crash when a VLA with an invalid size expression was used within a
-  ``sizeof`` or ``typeof`` expression. (#GH138444)
-- ``__builtin_invoke`` has been added to improve the compile time of ``std::invoke``.
-- Deprecation warning is emitted for the deprecated ``__reference_binds_to_temporary`` intrinsic.
-  ``__reference_constructs_from_temporary`` should be used instead. (#GH44056)
-- Added `__builtin_get_vtable_pointer` to directly load the primary vtable pointer from a
-  polymorphic object.
-- ``libclang`` receives a family of new bindings to query basic facts about
-  GCC-style inline assembly blocks, including whether the block is ``volatile``
-  and its template string following the LLVM IR ``asm`` format. (#GH143424)
-- Clang no longer rejects reinterpret_cast conversions between indirect
-  ARC-managed pointers and other pointer types. The prior behavior was overly
-  strict and inconsistent with the ARC specification.
+- Trapping UBSan (e.g. ``-fsanitize-trap=undefined``) now emits a string describing the reason for 
+  trapping into the generated debug info. This feature allows debuggers (e.g. LLDB) to display 
+  the reason for trapping if the trap is reached. The string is currently encoded in the debug 
+  info as an artificial frame that claims to be inlined at the trap location. The function used 
+  for the artificial frame is an artificial function whose name encodes the reason for trapping. 
+  The encoding used is currently the same as ``__builtin_verbose_trap`` but might change in the future. 
+  This feature is enabled by default but can be disabled by compiling with 
+  ``-fno-sanitize-annotate-debug-info-traps``.
 
 New Compiler Flags
 ------------------
-
-- New option ``-Wundef-true`` added and enabled by default to warn when `true` is used in the C preprocessor without being defined before C23.
-
-- New option ``-fprofile-continuous`` added to enable continuous profile syncing to file (#GH124353, `docs <https://clang.llvm.org/docs/UsersManual.html#cmdoption-fprofile-continuous>`_).
-  The feature has `existed <https://clang.llvm.org/docs/SourceBasedCodeCoverage.html#running-the-instrumented-program>`_)
-  for a while and this is just a user facing option.
-
-- New option ``-ftime-report-json`` added which outputs the same timing data as ``-ftime-report`` but formatted as JSON.
-
-- New option ``-Wnrvo`` added and disabled by default to warn about missed NRVO opportunities.
-
-- New option ``-ignore-pch`` added to disable precompiled headers. It overrides ``-emit-pch`` and ``-include-pch``. (#GH142409, `PCHDocs <https://clang.llvm.org/docs/UsersManual.html#ignoring-a-pch-file>`_).
-
-- New options ``-g[no-]key-instructions`` added, disabled by default. Reduces jumpiness of debug stepping for optimized code in some debuggers (not LLDB at this time). Not recommended for use without optimizations. DWARF only. Note both the positive and negative flags imply ``-g``.
+- New option ``-fno-sanitize-annotate-debug-info-traps`` added to disable emitting trap reasons into the debug info when compiling with trapping UBSan (e.g. ``-fsanitize-trap=undefined``).
 
 Deprecated Compiler Flags
 -------------------------
@@ -383,334 +110,14 @@ Deprecated Compiler Flags
 Modified Compiler Flags
 -----------------------
 
-- The ARM AArch32 ``-mtp`` option accepts and defaults to ``auto``, a value of ``auto`` uses the best available method of providing the frame pointer supported by the hardware. This matches
-  the behavior of ``-mtp`` in gcc. This changes the default behavior for ARM targets that provide the ``TPIDRURO`` register as this will be used instead of a call to the ``__aeabi_read_tp``.
-  Programs that use ``__aeabi_read_tp`` but do not use the ``TPIDRURO`` register must use ``-mtp=soft``. Fixes #123864
-
-- The compiler flag `-fbracket-depth` default value is increased from 256 to 2048. (#GH94728)
-
-- `-Wpadded` option implemented for the `x86_64-windows-msvc` target. Fixes #61702
-
-- The ``-mexecute-only`` and ``-mpure-code`` flags are now accepted for AArch64 targets. (#GH125688)
-
-- The ``-fchar8_t`` flag is no longer considered in non-C++ languages modes. (#GH55373)
-
-- The ``-fveclib=libmvec`` option now supports AArch64 targets (requires GLIBC 2.40 or newer).
-
-- The ``-Og`` optimization flag now sets ``-fextend-variable-liveness``,
-  reducing performance slightly while reducing the number of optimized-out
-  variables. (#GH118026)
-
 Removed Compiler Flags
 -------------------------
 
 Attribute Changes in Clang
 --------------------------
-Adding [[clang::unsafe_buffer_usage]] attribute to a method definition now turns off all -Wunsafe-buffer-usage
-related warnings within the method body.
-
-- The ``no_sanitize`` attribute now accepts both ``gnu`` and ``clang`` names.
-- The ``ext_vector_type(n)`` attribute can now be used as a generic type attribute.
-- Clang now diagnoses use of declaration attributes on void parameters. (#GH108819)
-- Clang now allows ``__attribute__((model("small")))`` and
-  ``__attribute__((model("large")))`` on non-TLS globals in x86-64 compilations.
-  This forces the global to be considered small or large in regards to the
-  x86-64 code model, regardless of the code model specified for the compilation.
-- Clang now emits a warning ``-Wreserved-init-priority`` instead of a hard error
-  when ``__attribute__((init_priority(n)))`` is used with values of n in the
-  reserved range [0, 100]. The warning will be treated as an error by default.
-
-- There is a new ``format_matches`` attribute to complement the existing
-  ``format`` attribute. ``format_matches`` allows the compiler to verify that
-  a format string argument is equivalent to a reference format string: it is
-  useful when a function accepts a format string without its accompanying
-  arguments to format. For instance:
-
-  .. code-block:: c
-
-    static int status_code;
-    static const char *status_string;
-
-    void print_status(const char *fmt) {
-      fprintf(stderr, fmt, status_code, status_string);
-      // ^ warning: format string is not a string literal [-Wformat-nonliteral]
-    }
-
-    void stuff(void) {
-      print_status("%s (%#08x)\n");
-      // order of %s and %x is swapped but there is no diagnostic
-    }
-
-  Before the introducion of ``format_matches``, this code cannot be verified
-  at compile-time. ``format_matches`` plugs that hole:
-
-  .. code-block:: c
-
-    __attribute__((format_matches(printf, 1, "%x %s")))
-    void print_status(const char *fmt) {
-      fprintf(stderr, fmt, status_code, status_string);
-      // ^ `fmt` verified as if it was "%x %s" here; no longer triggers
-      //   -Wformat-nonliteral, would warn if arguments did not match "%x %s"
-    }
-
-    void stuff(void) {
-      print_status("%s (%#08x)\n");
-      // warning: format specifier 's' is incompatible with 'x'
-      // warning: format specifier 'x' is incompatible with 's'
-    }
-
-  Like with ``format``, the first argument is the format string flavor and the
-  second argument is the index of the format string parameter.
-  ``format_matches`` accepts an example valid format string as its third
-  argument. For more information, see the Clang attributes documentation.
-
-- Introduced a new statement attribute ``[[clang::atomic]]`` that enables
-  fine-grained control over atomic code generation on a per-statement basis.
-  Supported options include ``[no_]remote_memory``,
-  ``[no_]fine_grained_memory``, and ``[no_]ignore_denormal_mode``. These are
-  particularly relevant for AMDGPU targets, where they map to corresponding IR
-  metadata.
-
-- Clang now disallows the use of attributes applied before an
-  ``extern template`` declaration (#GH79893).
-
-- Clang will print the "reason" string argument passed on to
-  ``[[clang::warn_unused_result("reason")]]`` as part of the warning diagnostic.
 
 Improvements to Clang's diagnostics
 -----------------------------------
-
-- Improve the diagnostics for deleted default constructor errors for C++ class
-  initializer lists that don't explicitly list a class member and thus attempt
-  to implicitly default construct that member.
-- The ``-Wunique-object-duplication`` warning has been added to warn about objects
-  which are supposed to only exist once per program, but may get duplicated when
-  built into a shared library.
-- Fixed a bug where Clang's Analysis did not correctly model the destructor behavior of ``union`` members (#GH119415).
-- A statement attribute applied to a ``case`` label no longer suppresses
-  'bypassing variable initialization' diagnostics (#84072).
-- The ``-Wunsafe-buffer-usage`` warning has been updated to warn
-  about unsafe libc function calls.  Those new warnings are emitted
-  under the subgroup ``-Wunsafe-buffer-usage-in-libc-call``.
-- Diagnostics on chained comparisons (``a < b < c``) are now an error by default. This can be disabled with
-  ``-Wno-error=parentheses``.
-- Similarly, fold expressions over a comparison operator are now an error by default.
-- Clang now better preserves the sugared types of pointers to member.
-- Clang now better preserves the presence of the template keyword with dependent
-  prefixes.
-- Clang now in more cases avoids printing 'type-parameter-X-X' instead of the name of
-  the template parameter.
-- Clang now respects the current language mode when printing expressions in
-  diagnostics. This fixes a bunch of `bool` being printed as `_Bool`, and also
-  a bunch of HLSL types being printed as their C++ equivalents.
-- Clang now consistently quotes expressions in diagnostics.
-- When printing types for diagnostics, clang now doesn't suppress the scopes of
-  template arguments contained within nested names.
-- The ``-Wshift-bool`` warning has been added to warn about shifting a boolean. (#GH28334)
-- Fixed diagnostics adding a trailing ``::`` when printing some source code
-  constructs, like base classes.
-- The :doc:`ThreadSafetyAnalysis` now supports ``-Wthread-safety-pointer``,
-  which enables warning on passing or returning pointers to guarded variables
-  as function arguments or return value respectively. Note that
-  :doc:`ThreadSafetyAnalysis` still does not perform alias analysis. The
-  feature will be default-enabled with ``-Wthread-safety`` in a future release.
-- The :doc:`ThreadSafetyAnalysis` now supports reentrant capabilities.
-- Clang will now do a better job producing common nested names, when producing
-  common types for ternary operator, template argument deduction and multiple return auto deduction.
-- The ``-Wsign-compare`` warning now treats expressions with bitwise not(~) and minus(-) as signed integers
-  except for the case where the operand is an unsigned integer
-  and throws warning if they are compared with unsigned integers (##18878).
-- The ``-Wunnecessary-virtual-specifier`` warning (included in ``-Wextra``) has
-  been added to warn about methods which are marked as virtual inside a
-  ``final`` class, and hence can never be overridden.
-
-- Improve the diagnostics for chained comparisons to report actual expressions and operators (#GH129069).
-
-- Improve the diagnostics for shadows template parameter to report correct location (#GH129060).
-
-- Improve the ``-Wundefined-func-template`` warning when a function template is not instantiated due to being unreachable in modules.
-
-- Fixed an assertion when referencing an out-of-bounds parameter via a function
-  attribute whose argument list refers to parameters by index and the function
-  is variadic. e.g.,
-
-  .. code-block:: c
-
-    __attribute__ ((__format_arg__(2))) void test (int i, ...) { }
-
-  Fixes #GH61635
-
-- Split diagnosing base class qualifiers from the ``-Wignored-Qualifiers`` diagnostic group into a new ``-Wignored-base-class-qualifiers`` diagnostic group (which is grouped under ``-Wignored-qualifiers``). Fixes #GH131935.
-
-- ``-Wc++98-compat`` no longer diagnoses use of ``__auto_type`` or
-  ``decltype(auto)`` as though it was the extension for ``auto``. (#GH47900)
-- Clang now issues a warning for missing return in ``main`` in C89 mode. (#GH21650)
-
-- Now correctly diagnose a tentative definition of an array with static
-  storage duration in pedantic mode in C. (#GH50661)
-- No longer diagnosing idiomatic function pointer casts on Windows under
-  ``-Wcast-function-type-mismatch`` (which is enabled by ``-Wextra``). Clang
-  would previously warn on this construct, but will no longer do so on Windows:
-
-  .. code-block:: c
-
-    typedef void (WINAPI *PGNSI)(LPSYSTEM_INFO);
-    HMODULE Lib = LoadLibrary("kernel32");
-    PGNSI FnPtr = (PGNSI)GetProcAddress(Lib, "GetNativeSystemInfo");
-
-
-- An error is now emitted when a ``musttail`` call is made to a function marked with the ``not_tail_called`` attribute. (#GH133509).
-
-- ``-Whigher-precision-for-complex-divison`` warns when:
-
-  -	The divisor is complex.
-  -	When the complex division happens in a higher precision type due to arithmetic promotion.
-  -	When using the divide and assign operator (``/=``).
-
-  Fixes #GH131127
-
-- ``-Wuninitialized`` now diagnoses when a class does not declare any
-  constructors to initialize their non-modifiable members. The diagnostic is
-  not new; being controlled via a warning group is what's new. Fixes #GH41104
-
-- Analysis-based diagnostics (like ``-Wconsumed`` or ``-Wunreachable-code``)
-  can now be correctly controlled by ``#pragma clang diagnostic``. #GH42199
-
-- Improved Clang's error recovery for invalid function calls.
-
-- Improved bit-field diagnostics to consider the type specified by the
-  ``preferred_type`` attribute. These diagnostics are controlled by the flags
-  ``-Wpreferred-type-bitfield-enum-conversion`` and
-  ``-Wpreferred-type-bitfield-width``. These warnings are on by default as they
-  they're only triggered if the authors are already making the choice to use
-  ``preferred_type`` attribute.
-
-- ``-Winitializer-overrides`` and ``-Wreorder-init-list`` are now grouped under
-  the ``-Wc99-designator`` diagnostic group, as they also are about the
-  behavior of the C99 feature as it was introduced into C++20. Fixes #GH47037
-- ``-Wreserved-identifier`` now fires on reserved parameter names in a function
-  declaration which is not a definition.
-- Clang now prints the namespace for an attribute, if any,
-  when emitting an unknown attribute diagnostic.
-
-- ``-Wvolatile`` now warns about volatile-qualified class return types
-  as well as volatile-qualified scalar return types. Fixes #GH133380
-
-- Several compatibility diagnostics that were incorrectly being grouped under
-  ``-Wpre-c++20-compat`` are now part of ``-Wc++20-compat``. (#GH138775)
-
-- Improved the ``-Wtautological-overlap-compare`` diagnostics to warn about overlapping and non-overlapping ranges involving character literals and floating-point literals.
-  The warning message for non-overlapping cases has also been improved (#GH13473).
-
-- Fixed a duplicate diagnostic when performing typo correction on function template
-  calls with explicit template arguments. (#GH139226)
-
-- Explanatory note is printed when ``assert`` fails during evaluation of a
-  constant expression. Prior to this, the error inaccurately implied that assert
-  could not be used at all in a constant expression (#GH130458)
-
-- A new off-by-default warning ``-Wms-bitfield-padding`` has been added to alert to cases where bit-field
-  packing may differ under the MS struct ABI (#GH117428).
-
-- ``-Watomic-access`` no longer fires on unreachable code. e.g.,
-
-  .. code-block:: c
-
-    _Atomic struct S { int a; } s;
-    void func(void) {
-      if (0)
-        s.a = 12; // Previously diagnosed with -Watomic-access, now silenced
-      s.a = 12; // Still diagnosed with -Watomic-access
-      return;
-      s.a = 12; // Previously diagnosed, now silenced
-    }
-
-
-- A new ``-Wcharacter-conversion`` warns where comparing or implicitly converting
-  between different Unicode character types (``char8_t``, ``char16_t``, ``char32_t``).
-  This warning only triggers in C++ as these types are aliases in C. (#GH138526)
-
-- Fixed a crash when checking a ``__thread``-specified variable declaration
-  with a dependent type in C++. (#GH140509)
-
-- Clang now suggests corrections for unknown attribute names.
-
-- ``-Wswitch`` will now diagnose unhandled enumerators in switches also when
-  the enumerator is deprecated. Warnings about using deprecated enumerators in
-  switch cases have moved behind a new ``-Wdeprecated-declarations-switch-case``
-  flag.
-
-  For example:
-
-  .. code-block:: c
-
-    enum E {
-      Red,
-      Green,
-      Blue [[deprecated]]
-    };
-    void example(enum E e) {
-      switch (e) {
-      case Red:   // stuff...
-      case Green: // stuff...
-      }
-    }
-
-  will result in a warning about ``Blue`` not being handled in the switch.
-
-  The warning can be fixed either by adding a ``default:``, or by adding
-  ``case Blue:``. Since the enumerator is deprecated, the latter approach will
-  trigger a ``'Blue' is deprecated`` warning, which can be turned off with
-  ``-Wno-deprecated-declarations-switch-case``.
-
-- Split diagnosis of implicit integer comparison on negation to a new
-  diagnostic group ``-Wimplicit-int-comparison-on-negation``, grouped under
-  ``-Wimplicit-int-conversion``, so user can turn it off independently.
-
-- Improved the FixIts for unused lambda captures.
-
-- Delayed typo correction was removed from the compiler; immediate typo
-  correction behavior remains the same. Delayed typo correction facilities were
-  fragile and unmaintained, and the removal closed the following issues:
-  #GH142457, #GH139913, #GH138850, #GH137867, #GH137860, #GH107840, #GH93308,
-  #GH69470, #GH59391, #GH58172, #GH46215, #GH45915, #GH45891, #GH44490,
-  #GH36703, #GH32903, #GH23312, #GH69874.
-
-- Clang no longer emits a spurious -Wdangling-gsl warning in C++23 when
-  iterating over an element of a temporary container in a range-based
-  for loop.(#GH109793, #GH145164)
-
-- Fixed false positives in ``-Wformat-truncation`` and ``-Wformat-overflow``
-  diagnostics when floating-point numbers had both width field and plus or space
-  prefix specified. (#GH143951)
-
-- A warning is now emitted when ``main`` is attached to a named module,
-  which can be turned off with ``-Wno-main-attached-to-named-module``. (#GH146247)
-
-- Clang now avoids issuing `-Wreturn-type` warnings in some cases where
-  the final statement of a non-void function is a `throw` expression, or
-  a call to a function that is trivially known to always throw (i.e., its
-  body consists solely of a `throw` statement). This avoids certain
-  false positives in exception-heavy code, though only simple patterns
-  are currently recognized.
-
-- Clang now accepts ``@tparam`` comments on variable template partial
-  specializations. (#GH144775)
-
-- Fixed a bug that caused diagnostic line wrapping to not function correctly on
-  some systems. (#GH139499)
-
-- Clang now tries to avoid printing file paths that contain ``..``, instead preferring
-  the canonical file path if it ends up being shorter.
-
-- Improve the diagnostics for placement new expression when const-qualified
-  object was passed as the storage argument. (#GH143708)
-
-- Clang now does not issue a warning about returning from a function declared with
-  the ``[[noreturn]]`` attribute when the function body is ended with a call via
-  pointer, provided it can be proven that the pointer only points to
-  ``[[noreturn]]`` functions.
-
 - Added a separate diagnostic group ``-Wfunction-effect-redeclarations``, for the more pedantic
   diagnostics for function effects (``[[clang::nonblocking]]`` and ``[[clang::nonallocating]]``).
   Moved the warning for a missing (though implied) attribute on a redeclaration into this group.
@@ -725,283 +132,40 @@ Improvements to Coverage Mapping
 
 Bug Fixes in This Version
 -------------------------
-
-- Clang now outputs correct values when #embed data contains bytes with negative
-  signed char values (#GH102798).
-- Fixed a crash when merging named enumerations in modules (#GH114240).
-- Fixed rejects-valid problem when #embed appears in std::initializer_list or
-  when it can affect template argument deduction (#GH122306).
-- Fix crash on code completion of function calls involving partial order of function templates
-  (#GH125500).
-- Fixed clang crash when #embed data does not fit into an array
-  (#GH128987).
-- Non-local variable and non-variable declarations in the first clause of a ``for`` loop in C are no longer incorrectly
-  considered an error in C23 mode and are allowed as an extension in earlier language modes.
-
-- Remove the ``static`` specifier for the value of ``_FUNCTION_`` for static functions, in MSVC compatibility mode.
-- Fixed a modules crash where exception specifications were not propagated properly (#GH121245, relanded in #GH129982)
-- Fixed a problematic case with recursive deserialization within ``FinishedDeserializing()`` where
-  ``PassInterestingDeclsToConsumer()`` was called before the declarations were safe to be passed. (#GH129982)
-- Fixed a modules crash where an explicit Constructor was deserialized. (#GH132794)
-- Defining an integer literal suffix (e.g., ``LL``) before including
-  ``<stdint.h>`` in a freestanding build no longer causes invalid token pasting
-  when using the ``INTn_C`` macros. (#GH85995)
-- Fixed an assertion failure in the expansion of builtin macros like ``__has_embed()`` with line breaks before the
-  closing paren. (#GH133574)
-- Fixed a crash in error recovery for expressions resolving to templates. (#GH135621)
-- Clang no longer accepts invalid integer constants which are too large to fit
-  into any (standard or extended) integer type when the constant is unevaluated.
-  Merely forming the token is sufficient to render the program invalid. Code
-  like this was previously accepted and is now rejected (#GH134658):
-  .. code-block:: c
-
-    #if 1 ? 1 : 999999999999999999999
-    #endif
-- ``#embed`` directive now diagnoses use of a non-character file (device file)
-  such as ``/dev/urandom`` as an error. This restriction may be relaxed in the
-  future. See (#GH126629).
-- Fixed a clang 20 regression where diagnostics attached to some calls to member functions
-  using C++23 "deducing this" did not have a diagnostic location (#GH135522)
-
-- Fixed a crash when a ``friend`` function is redefined as deleted. (#GH135506)
-- Fixed a crash when ``#embed`` appears as a part of a failed constant
-  evaluation. The crashes were happening during diagnostics emission due to
-  unimplemented statement printer. (#GH132641)
-- Fixed visibility calculation for template functions. (#GH103477)
-- Fixed a bug where an attribute before a ``pragma clang attribute`` or
-  ``pragma clang __debug`` would cause an assertion. Instead, this now diagnoses
-  the invalid attribute location appropriately. (#GH137861)
-- Fixed a crash when a malformed ``_Pragma`` directive appears as part of an
-  ``#include`` directive. (#GH138094)
-- Fixed a crash during constant evaluation involving invalid lambda captures
-  (#GH138832)
-- Fixed compound literal is not constant expression inside initializer list
-  (#GH87867)
-- Fixed a crash when instantiating an invalid dependent friend template specialization.
-  (#GH139052)
-- Fixed a crash with an invalid member function parameter list with a default
-  argument which contains a pragma. (#GH113722)
-- Fixed assertion failures when generating name lookup table in modules. (#GH61065, #GH134739)
-- Fixed an assertion failure in constant compound literal statements. (#GH139160)
-- Fix crash due to unknown references and pointer implementation and handling of
-  base classes. (GH139452)
-- Fixed an assertion failure in serialization of constexpr structs containing unions. (#GH140130)
-- Fixed duplicate entries in TableGen that caused the wrong attribute to be selected. (GH#140701)
-- Fixed type mismatch error when 'builtin-elementwise-math' arguments have different qualifiers, this should be well-formed. (#GH141397)
-- Constant evaluation now correctly runs the destructor of a variable declared in
-  the second clause of a C-style ``for`` loop. (#GH139818)
-- Fixed a bug with constexpr evaluation for structs containing unions in case of C++ modules. (#GH143168)
-- Fixed incorrect token location when emitting diagnostics for tokens expanded from macros. (#GH143216)
-- Fixed an infinite recursion when checking constexpr destructors. (#GH141789)
-- Fixed a crash when a malformed using declaration appears in a ``constexpr`` function. (#GH144264)
-- Fixed a bug when use unicode character name in macro concatenation. (#GH145240)
-- Clang doesn't erroneously inject a ``static_assert`` macro in ms-compatibility and
-  -std=c99 mode. This resulted in deletion of ``-W/Wno-microsoft-static-assert``
-  flag and diagnostic because the macro injection was used to emit this warning.
-  Unfortunately there is no other good way to diagnose usage of ``static_assert``
-  macro without inclusion of ``<assert.h>``.
-- In C23, something like ``[[/*possible attributes*/]];`` is an attribute
-  declaration, not a statement. So it is not allowed by the syntax in places
-  where a statement is required, specifically as the secondary block of a
-  selection or iteration statement. This differs from C++, since C++ allows
-  declaration statements. Clang now emits a warning for these patterns. (#GH141659)
-- Fixed false positives for redeclaration errors of using enum in
-  nested scopes. (#GH147495)
-- Fixed a failed assertion with an operator call expression which comes from a
-  macro expansion when performing analysis for nullability attributes. (#GH138371)
-- Fixed a concept equivalent checking crash due to untransformed constraint expressions. (#GH146614)
+- Fix a crash when marco name is empty in ``#pragma push_macro("")`` or
+  ``#pragma pop_macro("")``. (#GH149762).
+- `-Wunreachable-code`` now diagnoses tautological or contradictory
+  comparisons such as ``x != 0 || x != 1.0`` and ``x == 0 && x == 1.0`` on
+  targets that treat ``_Float16``/``__fp16`` as native scalar types. Previously
+  the warning was silently lost because the operands differed only by an implicit
+  cast chain. (#GH149967).
 
 Bug Fixes to Compiler Builtins
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-- The behaviour of ``__add_pointer`` and ``__remove_pointer`` for Objective-C++'s ``id`` and interfaces has been fixed.
-
-- The signature for ``__builtin___clear_cache`` was changed from
-  ``void(char *, char *)`` to ``void(void *, void *)`` to match GCC's signature
-  for the same builtin. (#GH47833)
-
-- ``__has_unique_object_representations(Incomplete[])`` is no longer accepted, per
-  `LWG4113 <https://cplusplus.github.io/LWG/issue4113>`_.
-
-- ``__builtin_is_cpp_trivially_relocatable``, ``__builtin_is_replaceable`` and
-  ``__builtin_trivially_relocate`` have been added to support standard C++26 relocation.
-
-- ``__is_trivially_relocatable`` has been deprecated, and uses should be replaced by
-  ``__builtin_is_cpp_trivially_relocatable``.
-  Note that, it is generally unsafe to ``memcpy`` non-trivially copyable types that
-  are ``__builtin_is_cpp_trivially_relocatable``. It is recommended to use
-  ``__builtin_trivially_relocate`` instead.
-
-- ``__reference_binds_to_temporary``, ``__reference_constructs_from_temporary``
-  and ``__reference_converts_from_temporary`` intrinsics no longer consider
-  function references can bind to temporary objects. (#GH114344)
-
-- ``__reference_constructs_from_temporary`` and
-  ``__reference_converts_from_temporary`` intrinsics detect reference binding
-  to prvalue instead of xvalue now if the second operand is an object type, per
-  `LWG3819 <https://cplusplus.github.io/LWG/issue3819>`_.
-
 Bug Fixes to Attribute Support
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- - Fixed crash when a parameter to the ``clang::annotate`` attribute evaluates to ``void``. See #GH119125
 
-- Clang now emits a warning instead of an error when using the one or two
-  argument form of GCC 11's ``__attribute__((malloc(deallocator)))``
-  or ``__attribute__((malloc(deallocator, ptr-index)))``
-  (`#51607 <https://github.com/llvm/llvm-project/issues/51607>`_).
-
-- Corrected the diagnostic for the ``callback`` attribute when passing too many
-  or too few attribute argument indicies for the specified callback function.
-  (#GH47451)
-
-- No longer crashing on ``__attribute__((align_value(N)))`` during template
-  instantiation when the function parameter type is not a pointer or reference.
-  (#GH26612)
-- Now allowing the ``[[deprecated]]``, ``[[maybe_unused]]``, and
-  ``[[nodiscard]]`` to be applied to a redeclaration after a definition in both
-  C and C++ mode for the standard spellings (other spellings, such as
-  ``__attribute__((unused))`` are still ignored after the definition, though
-  this behavior may be relaxed in the future). (#GH135481)
-
-- Clang will warn if a complete type specializes a deprecated partial specialization.
-  (#GH44496)
+- ``[[nodiscard]]`` is now respected on Objective-C and Objective-C++ methods.
+  (#GH141504)
 
 Bug Fixes to C++ Support
 ^^^^^^^^^^^^^^^^^^^^^^^^
-
-- Clang now supports implicitly defined comparison operators for friend declarations. (#GH132249)
-- Clang now diagnoses copy constructors taking the class by value in template instantiations. (#GH130866)
-- Clang is now better at keeping track of friend function template instance contexts. (#GH55509)
-- Clang now prints the correct instantiation context for diagnostics suppressed
-  by template argument deduction.
-- Errors that occur during evaluation of certain type traits and builtins are
-  no longer incorrectly emitted when they are used in an SFINAE context. The
-  type traits are:
-
-  - ``__is_constructible`` and variants,
-  - ``__is_convertible`` and variants,
-  - ``__is_assignable`` and variants,
-  - ``__reference_binds_to_temporary``,
-    ``__reference_constructs_from_temporary``,
-    ``__reference_converts_from_temporary``,
-  - ``__is_trivially_equality_comparable``.
-
-  The builtin is ``__builtin_common_type``. (#GH132044)
-- Clang is now better at instantiating the function definition after its use inside
-  of a constexpr lambda. (#GH125747)
-- Fixed a local class member function instantiation bug inside dependent lambdas. (#GH59734), (#GH132208)
-- Clang no longer crashes when trying to unify the types of arrays with
-  certain differences in qualifiers (this could happen during template argument
-  deduction or when building a ternary operator). (#GH97005)
-- Fixed type alias CTAD issues involving default template arguments. (#GH133132), (#GH134471)
-- Fixed CTAD issues when initializing anonymous fields with designated initializers. (#GH67173)
-- The initialization kind of elements of structured bindings
-  direct-list-initialized from an array is corrected to direct-initialization.
-- Clang no longer crashes when a coroutine is declared ``[[noreturn]]``. (#GH127327)
-- Clang now uses the parameter location for abbreviated function templates in ``extern "C"``. (#GH46386)
-- Clang will emit an error instead of crash when use co_await or co_yield in
-  C++26 braced-init-list template parameter initialization. (#GH78426)
-- Improved fix for an issue with pack expansions of type constraints, where this
-  now also works if the constraint has non-type or template template parameters.
-  (#GH131798)
-- Fixes to partial ordering of non-type template parameter packs. (#GH132562)
-- Fix crash when evaluating the trailing requires clause of generic lambdas which are part of
-  a pack expansion.
-- Fixes matching of nested template template parameters. (#GH130362)
-- Correctly diagnoses template template parameters which have a pack parameter
-  not in the last position.
-- Disallow overloading on struct vs class on dependent types, which is IFNDR, as
-  this makes the problem diagnosable.
-- Improved preservation of the presence or absence of typename specifier when
-  printing types in diagnostics.
-- Clang now correctly parses ``if constexpr`` expressions in immediate function context. (#GH123524)
-- Fixed an assertion failure affecting code that uses C++23 "deducing this". (#GH130272)
-- Clang now properly instantiates destructors for initialized members within non-delegating constructors. (#GH93251)
-- Correctly diagnoses if unresolved using declarations shadows template parameters (#GH129411)
-- Fixed C++20 aggregate initialization rules being incorrectly applied in certain contexts. (#GH131320)
-- Clang was previously coalescing volatile writes to members of volatile base class subobjects.
-  The issue has been addressed by propagating qualifiers during derived-to-base conversions in the AST. (#GH127824)
-- Correctly propagates the instantiated array type to the ``DeclRefExpr`` that refers to it. (#GH79750), (#GH113936), (#GH133047)
-- Fixed a Clang regression in C++20 mode where unresolved dependent call expressions were created inside non-dependent contexts (#GH122892)
-- Clang now emits the ``-Wunused-variable`` warning when some structured bindings are unused
-  and the ``[[maybe_unused]]`` attribute is not applied. (#GH125810)
-- Fixed ``static_cast`` not performing access or ambiguity checks when converting to an rvalue reference to a base class. (#GH121429)
-- Declarations using class template argument deduction with redundant
-  parentheses around the declarator are no longer rejected. (#GH39811)
-- Fixed a crash caused by invalid declarations of ``std::initializer_list``. (#GH132256)
-- Clang no longer crashes when establishing subsumption between some constraint expressions. (#GH122581)
-- Clang now issues an error when placement new is used to modify a const-qualified variable
-  in a ``constexpr`` function. (#GH131432)
-- Fixed an incorrect TreeTransform for calls to ``consteval`` functions if a conversion template is present. (#GH137885)
-- Clang now emits a warning when class template argument deduction for alias templates is used in C++17. (#GH133806)
-- Fixed a missed initializer instantiation bug for variable templates. (#GH134526), (#GH138122)
-- Fix a crash when checking the template template parameters of a dependent lambda appearing in an alias declaration.
-  (#GH136432), (#GH137014), (#GH138018)
-- Fixed an assertion when trying to constant-fold various builtins when the argument
-  referred to a reference to an incomplete type. (#GH129397)
-- Fixed a crash when a cast involved a parenthesized aggregate initialization in dependent context. (#GH72880)
-- No longer crashes when instantiating invalid variable template specialization
-  whose type depends on itself. (#GH51347), (#GH55872)
-- Improved parser recovery of invalid requirement expressions. In turn, this
-  fixes crashes from follow-on processing of the invalid requirement. (#GH138820)
-- Fixed the handling of pack indexing types in the constraints of a member function redeclaration. (#GH138255)
-- Clang now correctly parses arbitrary order of ``[[]]``, ``__attribute__`` and ``alignas`` attributes for declarations (#GH133107)
-- Fixed a crash when forming an invalid function type in a dependent context. (#GH138657) (#GH115725) (#GH68852)
-- Fixed a function declaration mismatch that caused inconsistencies between concepts and variable template declarations. (#GH139476)
-- Fixed an out-of-line declaration mismatch involving nested template parameters. (#GH145521)
-- Clang no longer segfaults when there is a configuration mismatch between modules and their users (http://crbug.com/400353616).
-- Fix an incorrect deduction when calling an explicit object member function template through an overload set address.
-- Fixed bug in constant evaluation that would allow using the value of a
-  reference in its own initializer in C++23 mode (#GH131330).
-- Clang could incorrectly instantiate functions in discarded contexts (#GH140449)
-- Fix instantiation of default-initialized variable template specialization. (#GH140632) (#GH140622)
-- Clang modules now allow a module and its user to differ on TrivialAutoVarInit*
-- Fixed an access checking bug when initializing non-aggregates in default arguments (#GH62444), (#GH83608)
-- Fixed a pack substitution bug in deducing class template partial specializations. (#GH53609)
-- Fixed a crash when constant evaluating some explicit object member assignment operators. (#GH142835)
-- Fixed an access checking bug when substituting into concepts (#GH115838)
-- Fix a bug where private access specifier of overloaded function not respected. (#GH107629)
-- Correctly handles calling an explicit object member function template overload set
-  through its address (``(&Foo::bar<baz>)()``).
-- Fix a crash when using an explicit object parameter in a non-member function. (#GH113185)
-- Fix a crash when forming an invalid call to an operator with an explicit object member. (#GH147121)
-- Correctly handle allocations in the condition of a ``if constexpr``.(#GH120197) (#GH134820)
-- Fixed a crash when handling invalid member using-declaration in C++20+ mode. (#GH63254)
-- Fixed parsing of lambda expressions that appear after ``*`` or ``&`` in contexts where a declaration can appear. (#GH63880)
-- Fix name lookup in lambda appearing in the body of a requires expression. (#GH147650)
-- Fix a crash when trying to instantiate an ambiguous specialization. (#GH51866)
-- Improved handling of variables with ``consteval`` constructors, to
-  consistently treat the initializer as manifestly constant-evaluated.
-  (#GH135281)
-- Fix a crash in the presence of invalid base classes. (#GH147186)
-- Fix a crash with NTTP when instantiating local class.
-- Fixed a crash involving list-initialization of an empty class with a
-  non-empty initializer list. (#GH147949)
-- Fixed constant evaluation of equality comparisons of constexpr-unknown references. (#GH147663)
 - Diagnose binding a reference to ``*nullptr`` during constant evaluation. (#GH48665)
+- Suppress ``-Wdeprecated-declarations`` in implicitly generated functions. (#GH147293)
+- Fix a crash when deleting a pointer to an incomplete array (#GH150359).
+- Fix an assertion failure when expression in assumption attribute
+  (``[[assume(expr)]]``) creates temporary objects.
 
 Bug Fixes to AST Handling
 ^^^^^^^^^^^^^^^^^^^^^^^^^
-- Fixed type checking when a statement expression ends in an l-value of atomic type. (#GH106576)
-- Fixed uninitialized use check in a lambda within CXXOperatorCallExpr. (#GH129198)
-- Fixed a malformed printout of ``CXXParenListInitExpr`` in certain contexts.
-- Fixed a malformed printout of certain calling convention function attributes. (#GH143160)
-- Fixed dependency calculation for TypedefTypes (#GH89774)
-- The ODR checker now correctly hashes the names of conversion operators. (#GH143152)
-- Fixed the right parenthesis source location of ``CXXTemporaryObjectExpr``. (#GH143711)
-- Fixed a crash when performing an ``IgnoreUnlessSpelledInSource`` traversal of ASTs containing ``catch(...)`` statements. (#GH146103)
 
 Miscellaneous Bug Fixes
 ^^^^^^^^^^^^^^^^^^^^^^^
 
-- HTML tags in comments that span multiple lines are now parsed correctly by Clang's comment parser. (#GH120843)
-
 Miscellaneous Clang Crashes Fixed
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-- Fixed crash when ``-print-stats`` is enabled in compiling IR files. (#GH131608)
-- Fix code completion crash involving PCH serialized templates. (#GH139019)
-
 OpenACC Specific Changes
 ------------------------
 
@@ -1011,123 +175,30 @@ Target Specific Changes
 AMDGPU Support
 ^^^^^^^^^^^^^^
 
-- Bump the default code object version to 6. ROCm 6.3 is required to run any program compiled with COV6.
-
 NVPTX Support
 ^^^^^^^^^^^^^^
 
-Hexagon Support
-^^^^^^^^^^^^^^^
-
--  The default compilation target has been changed from V60 to V68.
-
 X86 Support
 ^^^^^^^^^^^
 
-- The 256-bit maximum vector register size control was removed from
-  `AVX10 whitepaper <https://cdrdv2.intel.com/v1/dl/getContent/784343>_`.
-  * Re-target ``m[no-]avx10.1`` to enable AVX10.1 with 512-bit maximum vector register size.
-  * Emit warning for ``mavx10.x-256``, noting AVX10/256 is not supported.
-  * Emit warning for ``mavx10.x-512``, noting to use ``m[no-]avx10.x`` instead.
-  * Emit warning for ``m[no-]evex512``, noting AVX10/256 is not supported.
-  * The features avx10.x-256/512 keep unchanged and will be removed in the next release.
-
 Arm and AArch64 Support
 ^^^^^^^^^^^^^^^^^^^^^^^
 
-- Implementation of modal 8-bit floating point intrinsics in accordance with
-  the Arm C Language Extensions (ACLE)
-  `as specified here <https://github.com/ARM-software/acle/blob/main/main/acle.md#modal-8-bit-floating-point-extensions>`_
-  is now available.
-- Support has been added for the following processors (command-line identifiers in parentheses):
-
-  - Arm Cortex-A320 (``cortex-a320``)
-
-- For ARM targets, cc1as now considers the FPU's features for the selected CPU or Architecture.
-- The ``+nosimd`` attribute is now fully supported for ARM. Previously, this had no effect when being used with
-  ARM targets, however this will now disable NEON instructions being generated. The ``simd`` option is
-  also now printed when the ``--print-supported-extensions`` option is used.
-- When a feature that depends on NEON (``simd``) is used, NEON is now automatically enabled.
-- When NEON is disabled (``+nosimd``), all features that depend on NEON will now be disabled.
-
-- Pointer authentication
-
-  - Support for __ptrauth type qualifier has been added.
-  - Objective-C adoption of pointer authentication
-
-    - ``isa`` and ``super`` pointers are protected with address diversity and separate
-      usage specific discriminators.
-    - methodlist pointers and content are protected with address diversity and methodlist
-      pointers have a usage specific discriminator.
-    - ``class_ro_t`` pointers are protected with address diversity and usage specific
-      discriminators.
-    - ``SEL`` typed ivars are protected with address diversity and usage specific
-      discriminators.
-
-- For AArch64, added support for generating executable-only code sections by using the
-  ``-mexecute-only`` or ``-mpure-code`` compiler flags. (#GH125688)
-- Added ``-msve-streaming-vector-bits=`` flag, which allows specifying the
-  SVE vector width in streaming mode.
-
 Android Support
 ^^^^^^^^^^^^^^^
 
 Windows Support
 ^^^^^^^^^^^^^^^
 
-- Clang now defines ``_CRT_USE_BUILTIN_OFFSETOF`` macro in MSVC-compatible mode,
-  which makes ``offsetof`` provided by Microsoft's ``<stddef.h>`` to be defined
-  correctly. (#GH59689)
-
-- Clang now can process the `i128` and `ui128` integral suffixes when MSVC
-  extensions are enabled. This allows for properly processing ``intsafe.h`` in
-  the Windows SDK.
-
 LoongArch Support
 ^^^^^^^^^^^^^^^^^
 
-- Add support for OHOS on loongarch64.
-
-- Add target attribute support for function. Supported formats include:
-  * `arch=<arch>` strings - specifies architecture features for a function (equivalent to `-march=<arch>`).
-  * `tune=<cpu>` strings - specifies the tune CPU for a function (equivalent to `-mtune`).
-  * `<feature>`/`no-<feature>` - enables/disables specific features.
-
-- Add support for the `_Float16` type. And fix incorrect ABI lowering of `_Float16`
-  in the case of structs containing fp16 that are eligible for passing via `GPR+FPR`
-  or `FPR+FPR`. Also fix `int16` -> `__fp16` conversion code gen, which uses generic LLVM
-  IR rather than `llvm.convert.to.fp16` intrinsics.
-
-- Add support for the `__bf16` type.
-
-- Fix incorrect _BitInt(N>64) alignment. Now consistently uses 16-byte alignment for all
-  `_BitInt(N)` where N > 64.
-
 RISC-V Support
 ^^^^^^^^^^^^^^
 
-- Add support for `-mtune=generic-ooo` (a generic out-of-order model).
-- Adds support for `__attribute__((interrupt("SiFive-CLIC-preemptible")))` and
-  `__attribute__((interrupt("SiFive-CLIC-stack-swap")))`. The former
-  automatically saves some interrupt CSRs before re-enabling interrupts in the
-  function prolog, the latter swaps `sp` with the value in a CSR before it is
-  used or modified. These two can also be combined, and can be combined with
-  `interrupt("machine")`.
-
-- Adds support for `__attribute__((interrupt("qci-nest")))` and
-  `__attribute__((interrupt("qci-nonest")))`. These use instructions from
-  Qualcomm's `Xqciint` extension to save and restore some GPRs in interrupt
-  service routines.
-
-- `Zicsr` / `Zifencei` are allowed to be duplicated in the presence of `g` in `-march`.
-
-- Add support for the `__builtin_riscv_pause()` intrinsic from the `Zihintpause` extension.
-
 CUDA/HIP Language Changes
 ^^^^^^^^^^^^^^^^^^^^^^^^^
 
-* Provide a __device__ version of std::__glibcxx_assert_fail() in a header wrapper.
-
 CUDA Support
 ^^^^^^^^^^^^
 
@@ -1154,164 +225,54 @@ Fixed Point Support in Clang
 
 AST Matchers
 ------------
-
 - Ensure ``hasBitWidth`` doesn't crash on bit widths that are dependent on template
   parameters.
-- Ensure ``isDerivedFrom`` matches the correct base in case more than one alias exists.
-- Extend ``templateArgumentCountIs`` to support function and variable template
-  specialization.
 
 clang-format
 ------------
 
-- Adds ``BreakBeforeTemplateCloser`` option.
-- Adds ``BinPackLongBracedList`` option to override bin packing options in
-  long (20 item or more) braced list initializer lists.
-- Add the C language instead of treating it like C++.
-- Allow specifying the language (C, C++, or Objective-C) for a ``.h`` file by
-  adding a special comment (e.g. ``// clang-format Language: ObjC``) near the
-  top of the file.
-- Add ``EnumTrailingComma`` option for inserting/removing commas at the end of
-  ``enum`` enumerator lists.
-- Add ``OneLineFormatOffRegex`` option for turning formatting off for one line.
-- Add ``SpaceAfterOperatorKeyword`` option.
-- Add ``MacrosSkippedByRemoveParentheses`` option so that their invocations are
-  skipped by ``RemoveParentheses``.
-
-clang-refactor
---------------
-- Reject `0` as column or line number in 1-based command-line source locations.
-  Fixes crash caused by `0` input in `-selection=<file>:<line>:<column>[-<line>:<column>]`. (#GH139457)
-
 libclang
 --------
-- Fixed a bug in ``clang_File_isEqual`` that sometimes led to different
-  in-memory files to be considered as equal.
-- Added ``clang_visitCXXMethods``, which allows visiting the methods
-  of a class.
-- Added ``clang_getFullyQualifiedName``, which provides fully qualified type names as
-  instructed by a PrintingPolicy.
-
-- Fixed a buffer overflow in ``CXString`` implementation. The fix may result in
-  increased memory allocation.
-
-- Deprecate ``clang_Cursor_GetBinaryOpcode`` and ``clang_Cursor_getBinaryOpcodeStr``
-  implementations, which are duplicates of ``clang_getCursorBinaryOperatorKind``
-  and ``clang_getBinaryOperatorKindSpelling`` respectively.
 
 Code Completion
 ---------------
-- Reject `0` as column or line number in 1-based command-line source locations.
-  Fixes crash caused by `0` input in `-code-completion-at=<file>:<line>:<column>`. (#GH139457)
 
 Static Analyzer
 ---------------
-- Fixed a crash when C++20 parenthesized initializer lists are used. This issue
-  was causing a crash in clang-tidy. (#GH136041)
 - The Clang Static Analyzer now handles parenthesized initialization.
   (#GH148875)
 
 New features
 ^^^^^^^^^^^^
 
-- A new flag - `-static-libclosure` was introduced to support statically linking
-  the runtime for the Blocks extension on Windows. This flag currently only
-  changes the code generation, and even then, only on Windows. This does not
-  impact the linker behaviour like the other `-static-*` flags.
-- OpenACC support, enabled via `-fopenacc` has reached a level of completeness
-  to finally be at least notionally usable. Currently, the OpenACC 3.4
-  specification has been completely implemented for Sema and AST creation, so
-  nodes will show up in the AST after having been properly checked. Lowering is
-  currently a work in progress, with compute, loop, and combined constructs
-  partially implemented, plus a handful of data and executable constructs
-  implemented. Lowering will only work in Clang-IR mode (so only with a compiler
-  built with Clang-IR enabled, and with `-fclangir` used on the command line).
-  However, note that the Clang-IR implementation status is also quite partial,
-  so frequent 'not yet implemented' diagnostics should be expected.  Also, the
-  ACC MLIR dialect does not currently implement any lowering to LLVM-IR, so no
-  code generation is possible for OpenACC.
-- Implemented `P2719R5 Type-aware allocation and deallocation functions <https://wg21.link/P2719>`_
-  as an extension in all C++ language modes.
-
-
 Crash and bug fixes
 ^^^^^^^^^^^^^^^^^^^
 
-- Fixed a crash in ``UnixAPIMisuseChecker`` and ``MallocChecker`` when analyzing
-  code with non-standard ``getline`` or ``getdelim`` function signatures. (#GH144884)
-
 Improvements
 ^^^^^^^^^^^^
 
-- The checker option ``optin.cplusplus.VirtualCall:PureOnly`` was removed,
-  because it had been deprecated since 2019 and it is completely useless (it
-  was kept only for compatibility with pre-2019 versions, setting it to true is
-  equivalent to completely disabling the checker).
-
 Moved checkers
 ^^^^^^^^^^^^^^
 
-- After lots of improvements, the checker ``alpha.security.ArrayBoundV2`` is
-  renamed to ``security.ArrayBound``. As this checker is stable now, the old
-  checker ``alpha.security.ArrayBound`` (which was searching for the same kind
-  of bugs with an different, simpler and less accurate algorithm) is removed.
-
 .. _release-notes-sanitizers:
 
 Sanitizers
 ----------
 
-- ``-fsanitize=vptr`` is no longer a part of ``-fsanitize=undefined``.
-- Sanitizer ignorelists now support the syntax ``src:*=sanitize``,
-  ``type:*=sanitize``, ``fun:*=sanitize``, ``global:*=sanitize``,
-  and ``mainfile:*=sanitize``.
-
 Python Binding Changes
 ----------------------
-- Made ``Cursor`` hashable.
-- Added ``Cursor.has_attrs``, a binding for ``clang_Cursor_hasAttrs``, to check
-  whether a cursor has any attributes.
-- Added ``Cursor.specialized_template``, a binding for
-  ``clang_getSpecializedCursorTemplate``, to retrieve the primary template that
-  the cursor is a specialization of.
-- Added ``Type.get_methods``, a binding for ``clang_visitCXXMethods``, which
-  allows visiting the methods of a class.
-- Added ``Type.get_fully_qualified_name``, which provides fully qualified type names as
-  instructed by a PrintingPolicy.
-- Add equality comparison operators for ``File`` type
 
 OpenMP Support
 --------------
-- Added support 'no_openmp_constructs' assumption clause.
-- Added support for 'self_maps' in map and requirement clause.
-- Added support for 'omp stripe' directive.
-- Fixed a crashing bug with ``omp unroll partial`` if the argument to
-  ``partial`` was an invalid expression. (#GH139267)
-- Fixed a crashing bug with ``omp tile sizes`` if the argument to ``sizes`` was
-  an invalid expression. (#GH139073)
-- Fixed a crashing bug with ``omp simd collapse`` if the argument to
-  ``collapse`` was an invalid expression. (#GH138493)
-- Fixed a crashing bug with a malformed ``cancel`` directive. (#GH139360)
-- Fixed a crashing bug with ``omp distribute dist_schedule`` if the argument to
-  ``dist_schedule`` was not strictly positive. (#GH139266)
-- Fixed two crashing bugs with a malformed ``metadirective`` directive. One was
-  a crash if the next token after ``metadirective`` was a paren, bracket, or
-  brace. The other was if the next token after the meta directive was not an
-  open parenthesis. (#GH139665)
-- An error is now emitted when OpenMP ``collapse`` and ``ordered`` clauses have
-  an argument larger than what can fit within a 64-bit integer.
-- Added support for private variable reduction.
-- Fixed mapping of arrays of structs containing nested structs with user defined
-  mappers, by using compiler-generated default mappers for the outer structs for
-  such maps.
-- Deprecation warning has been emitted for deprecated delimited form of ``declare target``.
+- Added parsing and semantic analysis support for the ``need_device_addr``
+  modifier in the ``adjust_args`` clause.
+- Allow array length to be omitted in array section subscript expression.
 
 Improvements
 ^^^^^^^^^^^^
 
 Additional Information
-
-===================
+======================
 
 A wide variety of additional information is available on the `Clang web
 page <https://clang.llvm.org/>`_. The web page contains versions of the
diff --git a/clang/docs/ShadowCallStack.rst b/clang/docs/ShadowCallStack.rst
index fc8bea8..9b104cc 100644
--- a/clang/docs/ShadowCallStack.rst
+++ b/clang/docs/ShadowCallStack.rst
@@ -61,7 +61,7 @@ The instrumentation makes use of the platform register ``x18`` on AArch64,
 ``x3`` (``gp``) on RISC-V with software shadow stack and ``ssp`` on RISC-V with
 hardware shadow stack, which needs `Zicfiss`_ and ``-fcf-protection=return``.
 Users can choose between the software and hardware based shadow stack
-implementation on RISC-V backend by passing ``-fsanitize=shadowcallstack``
+implementation on RISC-V backend by passing ``-fsanitize=shadow-call-stack``
 or ``Zicfiss`` with ``-fcf-protection=return``.
 For simplicity we will refer to this as the ``SCSReg``. On some platforms,
 ``SCSReg`` is reserved, and on others, it is designated as a scratch register.
diff --git a/clang/docs/StandardCPlusPlusModules.rst b/clang/docs/StandardCPlusPlusModules.rst
index 933a57f..31d0a5e 100644
--- a/clang/docs/StandardCPlusPlusModules.rst
+++ b/clang/docs/StandardCPlusPlusModules.rst
@@ -687,16 +687,12 @@ fails to instantiate. For such issues, users can add references to ``N::g`` in
 the `module purview <https://eel.is/c++draft/module.unit#5>`_ of ``M.cppm`` to
 ensure it is reachable, e.g. ``using N::g;``.
 
-Support for Reduced BMIs is still experimental, but it may become the default
-in the future. The expected roadmap for Reduced BMIs as of Clang 19.x is:
-
-1. ``-fexperimental-modules-reduced-bmi`` was introduced in v19.x
-2. For v20.x, ``-fmodules-reduced-bmi`` is introduced as an equivalent non-experimental
-   option. It is expected to stay opt-in for 1~2 releases, though the period depends
-   on user feedback and may be extended.
-3. Finally, ``-fmodules-reduced-bmi`` will be the default. When that time
-   comes, the term BMI will refer to the Reduced BMI and the Full BMI will only
-   be meaningful to build systems which elect to support two-phase compilation.
+As of Clang 22.x, the Reduced BMI is enabled by default. You may still want to
+use Full BMI with ``-fno-modules-reduced-bmi`` in the following case:
+1. Your build system uses two-phase compilation but it haven't adjusted the
+implementation for reduced BMI.
+2. You meet a regression with Reduced BMI that you cannot work around. Please
+report an issue for this case.
 
 Experimental Non-Cascading Changes
 ----------------------------------
diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst
index 284a404..af0a874 100644
--- a/clang/docs/UsersManual.rst
+++ b/clang/docs/UsersManual.rst
@@ -319,7 +319,7 @@ output format of the diagnostics that it generates.
 
    This option, which defaults to "none", controls whether or not Clang
    prints the category associated with a diagnostic when emitting it.
-   Each diagnostic may or many not have an associated category, if it
+   Each diagnostic may or may not have an associated category, if it
    has one, it is listed in the diagnostic categorization field of the
    diagnostic line (in the []'s).
 
@@ -737,7 +737,7 @@ control the crash diagnostics.
    crash diagnostics files, but with lower precedence than the option.
 
 Clang is also capable of generating preprocessed source file(s) and associated
-run script(s) even without a crash. This is specially useful when trying to
+run script(s) even without a crash. This is especially useful when trying to
 generate a reproducer for warnings or errors while using modules.
 
 .. option:: -gen-reproducer
@@ -1061,7 +1061,7 @@ In this way, the user may only need to specify a root configuration file with
 
 Usually, config file options are placed before command-line options, regardless
 of the actual operation to be performed. The exception is being made for the
-options prefixed with the ``$`` character. These will be used only when linker
+options prefixed with the ``$`` character. These will be used only when the linker
 is being invoked, and added after all of the command-line specified linker
 inputs. Here is some example of ``$``-prefixed options:
 
@@ -1222,7 +1222,7 @@ existed.
 The push and pop pragmas will save and restore the full diagnostic state
 of the compiler, regardless of how it was set. It should be noted that while Clang
 supports the GCC pragma, Clang and GCC do not support the exact same set
-of warnings, so even when using GCC compatible #pragmas there is no
+of warnings, so even when using GCC-compatible #pragmas there is no
 guarantee that they will have identical behaviour on both compilers.
 
 Clang also doesn't yet support GCC behavior for ``#pragma diagnostic pop``
@@ -1681,7 +1681,7 @@ for more details.
    * ``preserve-sign`` - the sign of a flushed-to-zero number is preserved in the sign of 0
    * ``positive-zero`` - denormals are flushed to positive zero
 
-   The default value depends on the target. For most targets, defaults to
+   The default value depends on the target. For most targets, it defaults to
    ``ieee``.
 
 .. option:: -f[no-]strict-float-cast-overflow
@@ -1730,7 +1730,7 @@ for more details.
    the C and C++ standards but can be enabled using ``-ffp-contract=fast``.
 
    Fusion can be controlled with the ``FP_CONTRACT`` and ``clang fp contract``
-   pragmas. Please note that pragmas will be ingored with
+   pragmas. Please note that pragmas will be ignored with
    ``-ffp-contract=fast``, and refer to the pragma documentation for a
    description of how the pragmas interact with the different ``-ffp-contract``
    option values.
@@ -1984,11 +1984,11 @@ for more details.
      call to runtime library functions (generally the case, but the BE might
      sometimes replace the library call if it knows enough about the potential
      range of the inputs). Overflow and non-finite values are handled by the
-     library implementation. For the case of multiplication overflow will occur in
+     library implementation. For the case of multiplication, overflow will occur in
      accordance with normal floating-point rules. This is the default value.
    * ``promoted`` Implementation of complex division using algebraic formulas at
      higher precision. Overflow is handled. Non-finite values are handled in some
-     cases. If the target does not have native support for a higher precision
+     cases. If the target does not have native support for a higher-precision
      data type, the implementation for the complex operation using the Smith
      algorithm will be used. Overflow may still occur in some cases. NaN and
      infinite values are not handled.
diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index 26c5028..4e8b318 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -1103,7 +1103,16 @@ To override this threshold to e.g. 4 bytes, use the
 
 optin.portability.UnixAPI
 """""""""""""""""""""""""
-Finds implementation-defined behavior in UNIX/Posix functions.
+Reports situations where 0 is passed as the "size" argument of various
+allocation functions ( ``calloc``, ``malloc``, ``realloc``, ``reallocf``,
+``alloca``, ``__builtin_alloca``, ``__builtin_alloca_with_align``, ``valloc``).
+
+Note that similar functionality is also supported by :ref:`unix-Malloc` which
+reports code that *uses* memory allocated with size zero.
+
+(The name of this checker is motivated by the fact that it was originally
+introduced with the vague goal that it "Finds implementation-defined behavior
+in UNIX/Posix functions.")
 
 
 optin.taint
diff --git a/clang/docs/analyzer/checkers/unix_malloc_example.c b/clang/docs/analyzer/checkers/unix_malloc_example.c
index 68c5a4a..30df074 100644
--- a/clang/docs/analyzer/checkers/unix_malloc_example.c
+++ b/clang/docs/analyzer/checkers/unix_malloc_example.c
@@ -2,7 +2,7 @@
 void test() {
   int *p = malloc(1);
   free(p);
-  free(p); // warn: attempt to free released memory
+  free(p); // warn: attempt to release already released memory
 }
 
 void test() {
diff --git a/clang/docs/analyzer/user-docs/CrossTranslationUnit.rst b/clang/docs/analyzer/user-docs/CrossTranslationUnit.rst
index 700dac0..a04b9f8 100644
--- a/clang/docs/analyzer/user-docs/CrossTranslationUnit.rst
+++ b/clang/docs/analyzer/user-docs/CrossTranslationUnit.rst
@@ -132,7 +132,7 @@ Once we have set up the `PATH` environment variable and we activated the python
 
 .. code-block:: bash
 
-  $ CodeChecker analyze --ctu compile_commands.json -o reports
+  $ CodeChecker analyze --ctu --ctu-ast-mode load-from-pch compile_commands.json -o reports
   $ ls -F
   compile_commands.json  foo.cpp  foo.cpp.ast  main.cpp  reports/
   $ tree reports
@@ -318,7 +318,7 @@ Once we have set up the `PATH` environment variable and we activated the python
 
 .. code-block:: bash
 
-  $ CodeChecker analyze --ctu --ctu-ast-loading-mode on-demand compile_commands.json -o reports
+  $ CodeChecker analyze --ctu compile_commands.json -o reports
   $ ls -F
   compile_commands.json  foo.cpp main.cpp  reports/
   $ tree reports
diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h
index b929585..be038d9 100644
--- a/clang/include/clang-c/Index.h
+++ b/clang/include/clang-c/Index.h
@@ -6953,7 +6953,7 @@ clang_getCursorUnaryOperatorKind(CXCursor cursor);
  * @}
  */
 
-CINDEX_DEPRECATED
+/* CINDEX_DEPRECATED - disabled to silence MSVC deprecation warnings */
 typedef void *CXRemapping;
 
 CINDEX_DEPRECATED CINDEX_LINKAGE CXRemapping clang_getRemappings(const char *);
diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h
index 17cbfb2..3b98274a 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -238,9 +238,9 @@ class ASTContext : public RefCountedBase<ASTContext> {
   mutable llvm::FoldingSet<ElaboratedType> ElaboratedTypes{
       GeneralTypesLog2InitSize};
   mutable llvm::FoldingSet<DependentNameType> DependentNameTypes;
-  mutable llvm::ContextualFoldingSet<DependentTemplateSpecializationType,
-                                     ASTContext&>
-    DependentTemplateSpecializationTypes;
+  mutable llvm::DenseMap<llvm::FoldingSetNodeID,
+                         DependentTemplateSpecializationType *>
+      DependentTemplateSpecializationTypes;
   mutable llvm::FoldingSet<PackExpansionType> PackExpansionTypes;
   mutable llvm::FoldingSet<ObjCObjectTypeImpl> ObjCObjectTypes;
   mutable llvm::FoldingSet<ObjCObjectPointerType> ObjCObjectPointerTypes;
@@ -1818,7 +1818,7 @@ public:
         NumPositiveBits = std::max({NumPositiveBits, ActiveBits, 1u});
       } else {
         NumNegativeBits =
-            std::max(NumNegativeBits, (unsigned)InitVal.getSignificantBits());
+            std::max(NumNegativeBits, InitVal.getSignificantBits());
       }
 
       MembersRepresentableByInt &= isRepresentableIntegerValue(InitVal, IntTy);
diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 523c032..6c124aa 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -57,6 +57,7 @@ namespace clang {
   class StringLiteral;
   class TargetInfo;
   class ValueDecl;
+  class WarnUnusedResultAttr;
 
 /// A simple array of base specifiers.
 typedef SmallVector<CXXBaseSpecifier*, 4> CXXCastPath;
@@ -262,6 +263,12 @@ public:
                               SourceRange &R1, SourceRange &R2,
                               ASTContext &Ctx) const;
 
+  /// Returns the WarnUnusedResultAttr that is declared on the callee
+  /// or its return type declaration, together with a NamedDecl that
+  /// refers to the declaration the attribute is attached to.
+  static std::pair<const NamedDecl *, const WarnUnusedResultAttr *>
+  getUnusedResultAttrImpl(const Decl *Callee, QualType ReturnType);
+
   /// isLValue - True if this expression is an "l-value" according to
   /// the rules of the current language.  C and C++ give somewhat
   /// different rules for this concept, but in general, the result of
@@ -3190,11 +3197,13 @@ public:
   /// type.
   QualType getCallReturnType(const ASTContext &Ctx) const;
 
-  /// Returns the WarnUnusedResultAttr that is either declared on the called
-  /// function, or its return type declaration, together with a NamedDecl that
-  /// refers to the declaration the attribute is attached onto.
-  std::pair<const NamedDecl *, const Attr *>
-  getUnusedResultAttr(const ASTContext &Ctx) const;
+  /// Returns the WarnUnusedResultAttr that is declared on the callee
+  /// or its return type declaration, together with a NamedDecl that
+  /// refers to the declaration the attribute is attached to.
+  std::pair<const NamedDecl *, const WarnUnusedResultAttr *>
+  getUnusedResultAttr(const ASTContext &Ctx) const {
+    return getUnusedResultAttrImpl(getCalleeDecl(), getCallReturnType(Ctx));
+  }
 
   /// Returns true if this call expression should warn on unused results.
   bool hasUnusedResultAttr(const ASTContext &Ctx) const {
diff --git a/clang/include/clang/AST/ExprObjC.h b/clang/include/clang/AST/ExprObjC.h
index 8210be3..b923230 100644
--- a/clang/include/clang/AST/ExprObjC.h
+++ b/clang/include/clang/AST/ExprObjC.h
@@ -13,6 +13,7 @@
 #ifndef LLVM_CLANG_AST_EXPROBJC_H
 #define LLVM_CLANG_AST_EXPROBJC_H
 
+#include "clang/AST/Attr.h"
 #include "clang/AST/ComputeDependence.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclObjC.h"
@@ -1234,6 +1235,19 @@ public:
   /// of `instancetype` (in that case it's an expression type).
   QualType getCallReturnType(ASTContext &Ctx) const;
 
+  /// Returns the WarnUnusedResultAttr that is declared on the callee
+  /// or its return type declaration, together with a NamedDecl that
+  /// refers to the declaration the attribute is attached to.
+  std::pair<const NamedDecl *, const WarnUnusedResultAttr *>
+  getUnusedResultAttr(ASTContext &Ctx) const {
+    return getUnusedResultAttrImpl(getMethodDecl(), getCallReturnType(Ctx));
+  }
+
+  /// Returns true if this message send should warn on unused results.
+  bool hasUnusedResultAttr(ASTContext &Ctx) const {
+    return getUnusedResultAttr(Ctx).second != nullptr;
+  }
+
   /// Source range of the receiver.
   SourceRange getReceiverRange() const;
 
diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index 764e9d50..98810fb 100644
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -7276,8 +7276,7 @@ public:
 /// Represents a template specialization type whose template cannot be
 /// resolved, e.g.
 ///   A<T>::template B<T>
-class DependentTemplateSpecializationType : public TypeWithKeyword,
-                                            public llvm::FoldingSetNode {
+class DependentTemplateSpecializationType : public TypeWithKeyword {
   friend class ASTContext; // ASTContext creates these
 
   DependentTemplateStorage Name;
diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h
index 9998702..1c00558 100644
--- a/clang/include/clang/Analysis/Analyses/LifetimeSafety.h
+++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety.h
@@ -17,14 +17,105 @@
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H
 #define LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H
-#include "clang/AST/DeclBase.h"
 #include "clang/Analysis/AnalysisDeclContext.h"
 #include "clang/Analysis/CFG.h"
-namespace clang {
+#include "llvm/ADT/ImmutableSet.h"
+#include "llvm/ADT/StringMap.h"
+#include <memory>
 
-void runLifetimeSafetyAnalysis(const DeclContext &DC, const CFG &Cfg,
-                               AnalysisDeclContext &AC);
+namespace clang::lifetimes {
 
-} // namespace clang
+/// The main entry point for the analysis.
+void runLifetimeSafetyAnalysis(AnalysisDeclContext &AC);
+
+namespace internal {
+// Forward declarations of internal types.
+class Fact;
+class FactManager;
+class LoanPropagationAnalysis;
+class ExpiredLoansAnalysis;
+struct LifetimeFactory;
+
+/// A generic, type-safe wrapper for an ID, distinguished by its `Tag` type.
+/// Used for giving ID to loans and origins.
+template <typename Tag> struct ID {
+  uint32_t Value = 0;
+
+  bool operator==(const ID<Tag> &Other) const { return Value == Other.Value; }
+  bool operator!=(const ID<Tag> &Other) const { return !(*this == Other); }
+  bool operator<(const ID<Tag> &Other) const { return Value < Other.Value; }
+  ID<Tag> operator++(int) {
+    ID<Tag> Tmp = *this;
+    ++Value;
+    return Tmp;
+  }
+  void Profile(llvm::FoldingSetNodeID &IDBuilder) const {
+    IDBuilder.AddInteger(Value);
+  }
+};
+template <typename Tag>
+inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, ID<Tag> ID) {
+  return OS << ID.Value;
+}
+
+using LoanID = ID<struct LoanTag>;
+using OriginID = ID<struct OriginTag>;
+
+// Using LLVM's immutable collections is efficient for dataflow analysis
+// as it avoids deep copies during state transitions.
+// TODO(opt): Consider using a bitset to represent the set of loans.
+using LoanSet = llvm::ImmutableSet<LoanID>;
+using OriginSet = llvm::ImmutableSet<OriginID>;
+
+/// A `ProgramPoint` identifies a location in the CFG by pointing to a specific
+/// `Fact`. identified by a lifetime-related event (`Fact`).
+///
+/// A `ProgramPoint` has "after" semantics: it represents the location
+/// immediately after its corresponding `Fact`.
+using ProgramPoint = const Fact *;
+
+/// Running the lifetime safety analysis and querying its results. It
+/// encapsulates the various dataflow analyses.
+class LifetimeSafetyAnalysis {
+public:
+  LifetimeSafetyAnalysis(AnalysisDeclContext &AC);
+  ~LifetimeSafetyAnalysis();
+
+  void run();
+
+  /// Returns the set of loans an origin holds at a specific program point.
+  LoanSet getLoansAtPoint(OriginID OID, ProgramPoint PP) const;
+
+  /// Returns the set of loans that have expired at a specific program point.
+  LoanSet getExpiredLoansAtPoint(ProgramPoint PP) const;
+
+  /// Finds the OriginID for a given declaration.
+  /// Returns a null optional if not found.
+  std::optional<OriginID> getOriginIDForDecl(const ValueDecl *D) const;
+
+  /// Finds the LoanID's for the loan created with the specific variable as
+  /// their Path.
+  std::vector<LoanID> getLoanIDForVar(const VarDecl *VD) const;
+
+  /// Retrieves program points that were specially marked in the source code
+  /// for testing.
+  ///
+  /// The analysis recognizes special function calls of the form
+  /// `void("__lifetime_test_point_<name>")` as test points. This method returns
+  /// a map from the annotation string (<name>) to the corresponding
+  /// `ProgramPoint`. This allows test harnesses to query the analysis state at
+  /// user-defined locations in the code.
+  /// \note This is intended for testing only.
+  llvm::StringMap<ProgramPoint> getTestPoints() const;
+
+private:
+  AnalysisDeclContext &AC;
+  std::unique_ptr<LifetimeFactory> Factory;
+  std::unique_ptr<FactManager> FactMgr;
+  std::unique_ptr<LoanPropagationAnalysis> LoanPropagation;
+  std::unique_ptr<ExpiredLoansAnalysis> ExpiredLoans;
+};
+} // namespace internal
+} // namespace clang::lifetimes
 
 #endif // LLVM_CLANG_ANALYSIS_ANALYSES_LIFETIMESAFETY_H
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index fefdaba..76747d2 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -9417,9 +9417,9 @@ def NonStringDocs : Documentation {
   let Category = DocCatDecl;
   let Content = [{
 The ``nonstring`` attribute can be applied to the declaration of a variable or
-a field whose type is a character array to specify that the character array is
-not intended to behave like a null-terminated string. This will silence
-diagnostics with code like:
+a field whose type is a character pointer or character array to specify that
+the buffer is not intended to behave like a null-terminated string. This will
+silence diagnostics with code like:
 
 .. code-block:: c
 
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 5ebb821..c81714e 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -1334,6 +1334,18 @@ def ElementwiseMinimum : Builtin {
   let Prototype = "void(...)";
 }
 
+def ElementwiseMaximumNum : Builtin {
+  let Spellings = ["__builtin_elementwise_maximumnum"];
+  let Attributes = [NoThrow, Const, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
+def ElementwiseMinimumNum : Builtin {
+  let Spellings = ["__builtin_elementwise_minimumnum"];
+  let Attributes = [NoThrow, Const, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
 def ElementwiseCeil : Builtin {
   let Spellings = ["__builtin_elementwise_ceil"];
   let Attributes = [NoThrow, Const, CustomTypeChecking];
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index d4fef5d..172ac46 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -642,6 +642,26 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_f16_f32, "V2hV2hfUiIb", "nc", "f32-to-f16
 // GFX1250+ only builtins.
 //===----------------------------------------------------------------------===//
 
+TARGET_BUILTIN(__builtin_amdgcn_flat_prefetch, "vvC*0Ii", "nc", "vmem-pref-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_prefetch, "vvC*1Ii", "nc", "vmem-pref-insts")
+
+TARGET_BUILTIN(__builtin_amdgcn_global_load_monitor_b32, "ii*1Ii", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_load_monitor_b64, "V2iV2i*1Ii", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_load_monitor_b128, "V4iV4i*1Ii", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_flat_load_monitor_b32, "ii*0Ii", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_flat_load_monitor_b64, "V2iV2i*0Ii", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_flat_load_monitor_b128, "V4iV4i*0Ii", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_load_async_to_lds_b8, "vc*1c*3IiIi", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_load_async_to_lds_b32, "vi*1i*3IiIi", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_load_async_to_lds_b64, "vV2i*1V2i*3IiIi", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_load_async_to_lds_b128, "vV4i*1V4i*3IiIi", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_store_async_from_lds_b8, "vc*1c*3IiIi", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_store_async_from_lds_b32, "vi*1i*3IiIi", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_store_async_from_lds_b64, "vV2i*1V2i*3IiIi", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_store_async_from_lds_b128, "vV4i*1V4i*3IiIi", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_async_barrier_arrive_b64, "vLi*3", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_barrier_arrive_rtn_b64, "LiLi*3Li", "nc", "gfx1250-insts")
+
 TARGET_BUILTIN(__builtin_amdgcn_tensor_load_to_lds, "vV4iV8iV4iV4iIi", "nc", "gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_tensor_load_to_lds_d2, "vV4iV8iIi", "nc", "gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_tensor_store_from_lds, "vV4iV8iV4iV4iIi", "nc", "gfx1250-insts")
@@ -660,9 +680,6 @@ TARGET_BUILTIN(__builtin_amdgcn_ds_load_tr16_b128_v8i16, "V8sV8s*3", "nc", "gfx1
 TARGET_BUILTIN(__builtin_amdgcn_ds_load_tr16_b128_v8f16, "V8hV8h*3", "nc", "gfx1250-insts,wavefrontsize32")
 TARGET_BUILTIN(__builtin_amdgcn_ds_load_tr16_b128_v8bf16, "V8yV8y*3", "nc", "gfx1250-insts,wavefrontsize32")
 
-TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_async_barrier_arrive_b64, "vLi*3", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_barrier_arrive_rtn_b64, "LiLi*3Li", "nc", "gfx1250-insts")
-
 TARGET_BUILTIN(__builtin_amdgcn_s_setprio_inc_wg, "vIs", "n", "setprio-inc-wg-inst")
 TARGET_BUILTIN(__builtin_amdgcn_s_monitor_sleep,  "vIs", "n", "gfx1250-insts")
 
@@ -680,10 +697,15 @@ TARGET_BUILTIN(__builtin_amdgcn_exp2_bf16, "yy", "nc", "bf16-trans-insts")
 TARGET_BUILTIN(__builtin_amdgcn_sin_bf16, "yy", "nc", "bf16-trans-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cos_bf16, "yy", "nc", "bf16-trans-insts")
 
+TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_pk_bf16_f32, "V2yffi", "nc", "bf16-cvt-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_fp8, "hiIi", "nc", "gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_bf8, "hiIi", "nc", "gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_fp8, "V2hs", "nc", "gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_bf8, "V2hs", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f16, "sV2h", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_bf8_f16, "sV2h", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f16, "ihiUiIi", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f16, "ihiUiIi", "nc", "gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_sat_pk4_i4_i8, "UsUi", "nc", "gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_sat_pk4_u4_u8, "UsUi", "nc", "gfx1250-insts")
 
@@ -705,6 +727,7 @@ TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8, "V8hV16iV16iIsV8hIbI
 TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8, "V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
 TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8, "V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
 TARGET_BUILTIN(__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8, "V8hV16iV16iIsV8hIbIb", "nc", "gfx1250-insts,wavefrontsize32")
+TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x128_f8f6f4, "V8fIiV16iIiV16iIsV8f", "nc", "gfx1250-insts,wavefrontsize32")
 TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8, "V8fV16iV16iIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32")
 TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8, "V8fV16iV16iIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32")
 TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8, "V8fV16iV16iIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32")
diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def
index e2afcc0..d31b726 100644
--- a/clang/include/clang/Basic/BuiltinsWebAssembly.def
+++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def
@@ -199,6 +199,12 @@ TARGET_BUILTIN(__builtin_wasm_ref_is_null_extern, "ii", "nct", "reference-types"
 // return type.
 TARGET_BUILTIN(__builtin_wasm_ref_null_func, "i", "nct", "reference-types")
 
+// Check if the runtime type of a function pointer matches its static type. Used
+// to avoid "function signature mismatch" traps. Takes a function pointer, uses
+// table.get to look up the pointer in __indirect_function_table and then
+// ref.test to test the type.
+TARGET_BUILTIN(__builtin_wasm_test_function_pointer_signature, "i.", "nct", "gc")
+
 // Table builtins
 TARGET_BUILTIN(__builtin_wasm_table_set,  "viii", "t", "reference-types")
 TARGET_BUILTIN(__builtin_wasm_table_get,  "iii", "t", "reference-types")
diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def
index cfffeb7..423b696 100644
--- a/clang/include/clang/Basic/CodeGenOptions.def
+++ b/clang/include/clang/Basic/CodeGenOptions.def
@@ -307,15 +307,17 @@ CODEGENOPT(SanitizeBinaryMetadataAtomics, 1, 0, Benign) ///< Emit PCs for atomic
 CODEGENOPT(SanitizeBinaryMetadataUAR, 1, 0, Benign) ///< Emit PCs for start of functions
                                                     ///< that are subject for use-after-return checking.
 CODEGENOPT(SanitizeStats     , 1, 0, Benign) ///< Collect statistics for sanitizers.
+CODEGENOPT(SanitizeDebugTrapReasons, 1, 1 , Benign) ///< Enable UBSan trapping messages
 CODEGENOPT(SimplifyLibCalls  , 1, 1, Benign) ///< Set when -fbuiltin is enabled.
 CODEGENOPT(SoftFloat         , 1, 0, Benign) ///< -soft-float.
 CODEGENOPT(SpeculativeLoadHardening, 1, 0, Benign) ///< Enable speculative load hardening.
 CODEGENOPT(FineGrainedBitfieldAccesses, 1, 0, Benign) ///< Enable fine-grained bitfield accesses.
 CODEGENOPT(StrictEnums       , 1, 0, Benign) ///< Optimize based on strict enum definition.
 CODEGENOPT(StrictVTablePointers, 1, 0, Benign) ///< Optimize based on the strict vtable pointers
-CODEGENOPT(TimePasses        , 1, 0, Benign) ///< Set when -ftime-report or -ftime-report= or -ftime-report-json is enabled.
+CODEGENOPT(TimePasses        , 1, 0, Benign) ///< Set when -ftime-report, -ftime-report=, -ftime-report-json, or -stats-file-timers is enabled.
 CODEGENOPT(TimePassesPerRun  , 1, 0, Benign) ///< Set when -ftime-report=per-pass-run is enabled.
 CODEGENOPT(TimePassesJson    , 1, 0, Benign) ///< Set when -ftime-report-json is enabled.
+CODEGENOPT(TimePassesStatsFile     , 1, 0, Benign) ///< Set when -stats-file-timers is enabled.
 CODEGENOPT(TimeTrace         , 1, 0, Benign) ///< Set when -ftime-trace is enabled.
 VALUE_CODEGENOPT(TimeTraceGranularity, 32, 500, Benign) ///< Minimum time granularity (in microseconds),
                                                         ///< traced by time profiler
diff --git a/clang/include/clang/Basic/CustomizableOptional.h b/clang/include/clang/Basic/CustomizableOptional.h
index 2d6ae6a..8559eaa 100644
--- a/clang/include/clang/Basic/CustomizableOptional.h
+++ b/clang/include/clang/Basic/CustomizableOptional.h
@@ -70,15 +70,6 @@ public:
 
   void reset() { Storage.reset(); }
 
-  LLVM_DEPRECATED("Use &*X instead.", "&*X")
-  constexpr const T *getPointer() const { return &Storage.value(); }
-  LLVM_DEPRECATED("Use &*X instead.", "&*X")
-  T *getPointer() { return &Storage.value(); }
-  LLVM_DEPRECATED("std::optional::value is throwing. Use *X instead", "*X")
-  constexpr const T &value() const & { return Storage.value(); }
-  LLVM_DEPRECATED("std::optional::value is throwing. Use *X instead", "*X")
-  T &value() & { return Storage.value(); }
-
   constexpr explicit operator bool() const { return has_value(); }
   constexpr bool has_value() const { return Storage.has_value(); }
   constexpr const T *operator->() const { return &Storage.value(); }
@@ -90,8 +81,6 @@ public:
     return has_value() ? operator*() : std::forward<U>(alt);
   }
 
-  LLVM_DEPRECATED("std::optional::value is throwing. Use *X instead", "*X")
-  T &&value() && { return std::move(Storage.value()); }
   T &&operator*() && { return std::move(Storage.value()); }
 
   template <typename U> T value_or(U &&alt) && {
diff --git a/clang/include/clang/Basic/Diagnostic.h b/clang/include/clang/Basic/Diagnostic.h
index c7a6276..cee5bed 100644
--- a/clang/include/clang/Basic/Diagnostic.h
+++ b/clang/include/clang/Basic/Diagnostic.h
@@ -895,7 +895,10 @@ public:
   /// \param FormatString A fixed diagnostic format string that will be hashed
   /// and mapped to a unique DiagID.
   template <unsigned N>
-  // TODO: Deprecate this once all uses are removed from Clang.
+  // FIXME: this API should almost never be used; custom diagnostics do not
+  // have an associated diagnostic group and thus cannot be controlled by users
+  // like other diagnostics. The number of times this API is used in Clang
+  // should only ever be reduced, not increased.
   // [[deprecated("Use a CustomDiagDesc instead of a Level")]]
   unsigned getCustomDiagID(Level L, const char (&FormatString)[N]) {
     return Diags->getCustomDiagID((DiagnosticIDs::Level)L,
diff --git a/clang/include/clang/Basic/DiagnosticASTKinds.td b/clang/include/clang/Basic/DiagnosticASTKinds.td
index 071a38f..46d04b0 100644
--- a/clang/include/clang/Basic/DiagnosticASTKinds.td
+++ b/clang/include/clang/Basic/DiagnosticASTKinds.td
@@ -511,6 +511,14 @@ def note_odr_number_of_bases : Note<
   "class has %0 base %plural{1:class|:classes}0">;
 def note_odr_enumerator : Note<"enumerator %0 with value %1 here">;
 def note_odr_missing_enumerator : Note<"no corresponding enumerator here">;
+def note_odr_incompatible_fixed_underlying_type : Note<
+  "enumeration %0 declared with incompatible fixed underlying types (%1 vs. "
+  "%2)">;
+def note_odr_fixed_underlying_type : Note<
+  "enumeration %0 has fixed underlying type here">;
+def note_odr_missing_fixed_underlying_type : Note<
+  "enumeration %0 missing fixed underlying type here">;
+
 def err_odr_field_type_inconsistent : Error<
   "field %0 declared with incompatible types in different "
   "translation units (%1 vs. %2)">;
diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index 34b6c0d..759ba04 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -116,6 +116,8 @@ def err_drv_cuda_host_arch : Error<
   "unsupported architecture '%0' for host compilation">;
 def err_drv_mix_cuda_hip : Error<
   "mixed CUDA and HIP compilation is not supported">;
+def err_drv_mix_offload : Error<
+  "mixed %0 and %1 offloading compilation is not supported">;
 def err_drv_bad_target_id : Error<
   "invalid target ID '%0'; format is a processor name followed by an optional "
   "colon-delimited list of features followed by an enable/disable sign (e.g., "
diff --git a/clang/include/clang/Basic/DiagnosticIDs.h b/clang/include/clang/Basic/DiagnosticIDs.h
index 2b095f0..f07a003 100644
--- a/clang/include/clang/Basic/DiagnosticIDs.h
+++ b/clang/include/clang/Basic/DiagnosticIDs.h
@@ -283,7 +283,10 @@ public:
   // writing, nearly all callers of this function were invalid.
   unsigned getCustomDiagID(CustomDiagDesc Diag);
 
-  // TODO: Deprecate this once all uses are removed from LLVM
+  // FIXME: this API should almost never be used; custom diagnostics do not
+  // have an associated diagnostic group and thus cannot be controlled by users
+  // like other diagnostics. The number of times this API is used in Clang
+  // should only ever be reduced, not increased.
   // [[deprecated("Use a CustomDiagDesc instead of a Level")]]
   unsigned getCustomDiagID(Level Level, StringRef Message) {
     return getCustomDiagID([&]() -> CustomDiagDesc {
diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td
index 723f5d4..c7fe6e1d 100644
--- a/clang/include/clang/Basic/DiagnosticLexKinds.td
+++ b/clang/include/clang/Basic/DiagnosticLexKinds.td
@@ -694,6 +694,9 @@ def err_pragma_push_pop_macro_malformed : Error<
 def warn_pragma_pop_macro_no_push : Warning<
    "pragma pop_macro could not pop '%0', no matching push_macro">,
   InGroup<IgnoredPragmas>;
+def warn_pargma_push_pop_macro_empty_string : Warning<
+   "'#pragma %select{push_macro|pop_macro}0' expected a non-empty string">,
+  InGroup<IgnoredPragmas>;
 def warn_pragma_message : Warning<"%0">,
    InGroup<PoundPragmaMessage>, DefaultWarnNoWerror;
 def err_pragma_message : Error<"%0">;
diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td
index 35903af..165f015 100644
--- a/clang/include/clang/Basic/DiagnosticParseKinds.td
+++ b/clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -1594,6 +1594,9 @@ def err_omp_unknown_adjust_args_op
 def err_omp_declare_variant_wrong_clause : Error<
   "expected %select{'match'|'match', 'adjust_args', or 'append_args'}0 clause "
   "on 'omp declare variant' directive">;
+def err_omp_non_by_ref_need_device_addr_modifier_argument
+    : Error<"expected reference type argument on 'adjust_args' clause with "
+            "'need_device_addr' modifier">;
 def err_omp_declare_variant_duplicate_nested_trait : Error<
   "nested OpenMP context selector contains duplicated trait '%0'"
   " in selector '%1' and set '%2' with different score">;
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index b2ea65a..27d2152 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -1812,10 +1812,7 @@ def note_unsatisfied_trait_reason
            "%DeletedAssign{has a deleted %select{copy|move}1 "
            "assignment operator}|"
            "%UnionWithUserDeclaredSMF{is a union with a user-declared "
-           "%sub{select_special_member_kind}1}|"
-           "%FunctionType{is a function type}|"
-           "%CVVoidType{is a cv void type}|"
-           "%IncompleteArrayType{is an incomplete array type}"
+           "%sub{select_special_member_kind}1}"
            "}0">;
 
 def warn_consteval_if_always_true : Warning<
@@ -7575,6 +7572,8 @@ def err_typecheck_illegal_increment_decrement : Error<
   "cannot %select{decrement|increment}1 value of type %0">;
 def err_typecheck_expect_int : Error<
   "used type %0 where integer is required">;
+def err_typecheck_expect_function_pointer
+    : Error<"used type %0 where function pointer is required">;
 def err_typecheck_expect_hlsl_resource : Error<
   "used type %0 where __hlsl_resource_t is required">;
 def err_typecheck_arithmetic_incomplete_or_sizeless_type : Error<
@@ -9329,8 +9328,28 @@ def err_atomic_builtin_pointer_size : Error<
   "address argument to atomic builtin must be a pointer to 1,2,4,8 or 16 byte "
   "type (%0 invalid)">;
 def err_atomic_exclusive_builtin_pointer_size : Error<
-  "address argument to load or store exclusive builtin must be a pointer to"
-  " 1,2,4 or 8 byte type (%0 invalid)">;
+  "address argument to load or store exclusive builtin must be a pointer to "
+  // Because the range of legal sizes for load/store exclusive varies with the
+  // Arm architecture version, this error message wants to be able to specify
+  // various different subsets of the sizes 1, 2, 4, 8. Rather than make a
+  // separate diagnostic for each subset, I've arranged here that _this_ error
+  // can display any combination of the sizes. For each size there are two
+  // %select parameters: the first chooses whether you need a "," or " or " to
+  // separate the number from a previous one (or neither), and the second
+  // parameter indicates whether to display the number itself.
+  //
+  // (The very first of these parameters isn't really necessary, since you
+  // never want to start with "," or " or " before the first number in the
+  // list, but it keeps it simple to make it look exactly like the other cases,
+  // and also allows a loop constructing this diagnostic to handle every case
+  // exactly the same.)
+  "%select{|,| or }1%select{|1}2"
+  "%select{|,| or }3%select{|2}4"
+  "%select{|,| or }5%select{|4}6"
+  "%select{|,| or }7%select{|8}8"
+  " byte type (%0 invalid)">;
+def err_atomic_exclusive_builtin_pointer_size_none : Error<
+  "load and store exclusive builtins are not available on this architecture">;
 def err_atomic_builtin_ext_int_size : Error<
   "atomic memory operand must have a power-of-two size">;
 def err_atomic_builtin_bit_int_prohibit : Error<
@@ -12389,6 +12408,13 @@ def err_invalid_module_name : Error<"%0 is an invalid name for a module">;
 def err_extern_def_in_header_unit : Error<
   "non-inline external definitions are not permitted in C++ header units">;
 
+def warn_exposure : Warning <
+  "TU local entity %0 is exposed">,
+  InGroup<DiagGroup<"TU-local-entity-exposure">>;
+def warn_reference_tu_local_entity_in_other_tu : Warning <
+  "instantiation of %0 triggers reference to TU-local entity %1 from other TU '%2'">,
+  InGroup<DiagGroup<"reference-tu-local-entity-in-other-tu">>;
+
 def warn_experimental_header_unit : Warning<
   "the implementation of header units is in an experimental phase">,
   InGroup<DiagGroup<"experimental-header-units">>;
@@ -13202,6 +13228,10 @@ def err_wasm_builtin_arg_must_match_table_element_type : Error <
   "%ordinal0 argument must match the element type of the WebAssembly table in the %ordinal1 argument">;
 def err_wasm_builtin_arg_must_be_integer_type : Error <
   "%ordinal0 argument must be an integer">;
+def err_wasm_builtin_test_fp_sig_cannot_include_reference_type
+    : Error<"not supported for "
+            "function pointers with a reference type %select{return "
+            "value|parameter}0">;
 
 // OpenACC diagnostics.
 def warn_acc_routine_unimplemented
@@ -13257,6 +13287,11 @@ def err_acc_not_a_var_ref
 def err_acc_not_a_var_ref_use_device_declare
     : Error<"OpenACC variable %select{in 'use_device' clause|on 'declare' "
             "construct}0 is not a valid variable name or array name">;
+def ext_acc_array_section_use_device_declare
+    : Extension<
+          "sub-array as a variable %select{in 'use_device' clause|on "
+          "'declare' construct}0 is not a valid variable name or array name">,
+      InGroup<DiagGroup<"openacc-extension">>;
 def err_acc_not_a_var_ref_cache
     : Error<"OpenACC variable in 'cache' directive is not a valid sub-array or "
             "array element">;
@@ -13318,8 +13353,9 @@ def err_acc_reduction_num_gangs_conflict
             "appear on a '%2' construct "
             "with a '%3' clause%select{ with more than 1 argument|}0">;
 def err_acc_reduction_type
-    : Error<"OpenACC 'reduction' variable must be of scalar type, sub-array, or a "
-            "composite of scalar types;%select{| sub-array base}1 type is %0">;
+    : Error<"OpenACC 'reduction' variable must be of scalar type, aggregate, "
+            "sub-array, or a composite of scalar types;%select{| sub-array "
+            "base}1 type is %0">;
 def err_acc_reduction_composite_type
     : Error<"OpenACC 'reduction' variable must be a composite of scalar types; "
             "%1 %select{is not a class or struct|is incomplete|is not an "
@@ -13438,13 +13474,13 @@ def note_acc_atomic_mismatch_operand
     : Note<"left hand side of assignment operation(%0) must match one side "
            "of the sub-operation on the right hand side(%1 and %2)">;
 def note_acc_atomic_mismatch_compound_operand
-    : Note<"variable %select{|in unary expression|on right hand side of "
+    : Note<"sub-expression %select{|in unary expression|on right hand side of "
            "assignment|on left hand side of assignment|on left hand side of "
            "compound assignment|on left hand side of assignment}2(%3) must "
-           "match variable used %select{|in unary expression|on right hand "
-           "side of assignment|<not possible>|on left hand side of compound "
-           "assignment|on left hand side of assignment}0(%1) from the first "
-           "statement">;
+           "match sub-expression used %select{|in unary expression|on right "
+           "hand side of assignment|<not possible>|on left hand side of "
+           "compound assignment|on left hand side of assignment}0(%1) from the "
+           "first statement">;
 def err_acc_declare_required_clauses
     : Error<"no valid clauses specified in OpenACC 'declare' directive">;
 def err_acc_declare_clause_at_global
diff --git a/clang/include/clang/Basic/FileManager.h b/clang/include/clang/Basic/FileManager.h
index e83a61d..337911e 100644
--- a/clang/include/clang/Basic/FileManager.h
+++ b/clang/include/clang/Basic/FileManager.h
@@ -237,11 +237,6 @@ public:
   FileEntryRef getVirtualFileRef(StringRef Filename, off_t Size,
                                  time_t ModificationTime);
 
-  LLVM_DEPRECATED("Functions returning FileEntry are deprecated.",
-                  "getVirtualFileRef()")
-  const FileEntry *getVirtualFile(StringRef Filename, off_t Size,
-                                  time_t ModificationTime);
-
   /// Retrieve a FileEntry that bypasses VFE, which is expected to be a virtual
   /// file entry, to access the real file.  The returned FileEntry will have
   /// the same filename as FE but a different identity and its own stat.
diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def
index 6ac8d49..08d98a7 100644
--- a/clang/include/clang/Basic/LangOptions.def
+++ b/clang/include/clang/Basic/LangOptions.def
@@ -491,6 +491,8 @@ LANGOPT(CheckConstexprFunctionBodies, 1, 1, Benign,
 
 LANGOPT(BoundsSafety, 1, 0, NotCompatible, "Bounds safety extension for C")
 
+LANGOPT(EnableLifetimeSafety, 1, 0, NotCompatible, "Experimental lifetime safety analysis for C++")
+
 LANGOPT(PreserveVec3Type, 1, 0, NotCompatible, "Preserve 3-component vector type")
 
 #undef LANGOPT
diff --git a/clang/include/clang/Basic/Specifiers.h b/clang/include/clang/Basic/Specifiers.h
index 698fd9d..005f261 100644
--- a/clang/include/clang/Basic/Specifiers.h
+++ b/clang/include/clang/Basic/Specifiers.h
@@ -120,7 +120,7 @@ namespace clang {
 
   /// A C++ access specifier (public, private, protected), plus the
   /// special value "none" which means different things in different contexts.
-  enum AccessSpecifier {
+  enum AccessSpecifier : uint8_t {
     AS_public,
     AS_protected,
     AS_private,
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index 8b66c73..ce4677e 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -1071,6 +1071,17 @@ public:
   /// as Custom Datapath.
   uint32_t getARMCDECoprocMask() const { return ARMCDECoprocMask; }
 
+  /// For ARM targets returns a mask defining which data sizes are suitable for
+  /// __builtin_arm_ldrex and __builtin_arm_strex.
+  enum {
+    ARM_LDREX_B = (1 << 0), /// byte (8-bit)
+    ARM_LDREX_H = (1 << 1), /// half (16-bit)
+    ARM_LDREX_W = (1 << 2), /// word (32-bit)
+    ARM_LDREX_D = (1 << 3), /// double (64-bit)
+  };
+
+  virtual unsigned getARMLDREXMask() const { return 0; }
+
   /// Returns whether the passed in string is a valid clobber in an
   /// inline asm statement.
   ///
@@ -1551,8 +1562,8 @@ public:
 
   // Return the target-specific priority for features/cpus/vendors so
   // that they can be properly sorted for checking.
-  virtual uint64_t getFMVPriority(ArrayRef<StringRef> Features) const {
-    return 0;
+  virtual llvm::APInt getFMVPriority(ArrayRef<StringRef> Features) const {
+    return llvm::APInt::getZero(32);
   }
 
   // Validate the contents of the __builtin_cpu_is(const char*)
diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td
index 0daef4a..ef19610 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -964,11 +964,11 @@ def SLI_N : WInst<"vsli_n", "...I", "PlQPl", [ImmCheck<2, ImmCheckShiftLeft, 0>]
 // Right shift narrow high
 def SHRN_HIGH_N    : IOpInst<"vshrn_high_n", "<(<q).I",
                              "HsHiHlHUsHUiHUl", OP_NARROW_HI>;
-def QSHRUN_HIGH_N  : SOpInst<"vqshrun_high_n", "<(<q).I",
+def QSHRUN_HIGH_N  : SOpInst<"vqshrun_high_n", "(<U)(<Uq).I",
                              "HsHiHl", OP_NARROW_HI>;
 def RSHRN_HIGH_N   : IOpInst<"vrshrn_high_n", "<(<q).I",
                              "HsHiHlHUsHUiHUl", OP_NARROW_HI>;
-def QRSHRUN_HIGH_N : SOpInst<"vqrshrun_high_n", "<(<q).I",
+def QRSHRUN_HIGH_N : SOpInst<"vqrshrun_high_n", "(<U)(<Uq).I",
                              "HsHiHl", OP_NARROW_HI>;
 def QSHRN_HIGH_N   : SOpInst<"vqshrn_high_n", "<(<q).I",
                              "HsHiHlHUsHUiHUl", OP_NARROW_HI>;
diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
index 25baf27..b26e558 100644
--- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
+++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
@@ -75,6 +75,18 @@ public:
     return getConstant(loc, cir::IntAttr::get(ty, value));
   }
 
+  mlir::Value getSignedInt(mlir::Location loc, int64_t val, unsigned numBits) {
+    auto type = cir::IntType::get(getContext(), numBits, /*isSigned=*/true);
+    return getConstAPInt(loc, type,
+                         llvm::APInt(numBits, val, /*isSigned=*/true));
+  }
+
+  mlir::Value getUnsignedInt(mlir::Location loc, uint64_t val,
+                             unsigned numBits) {
+    auto type = cir::IntType::get(getContext(), numBits, /*isSigned=*/false);
+    return getConstAPInt(loc, type, llvm::APInt(numBits, val));
+  }
+
   // Creates constant null value for integral type ty.
   cir::ConstantOp getNullValue(mlir::Type ty, mlir::Location loc) {
     return getConstant(loc, getZeroInitAttr(ty));
@@ -435,6 +447,10 @@ public:
     return create<cir::CmpOp>(loc, getBoolTy(), kind, lhs, rhs);
   }
 
+  mlir::Value createIsNaN(mlir::Location loc, mlir::Value operand) {
+    return createCompare(loc, cir::CmpOpKind::ne, operand, operand);
+  }
+
   mlir::Value createShift(mlir::Location loc, mlir::Value lhs, mlir::Value rhs,
                           bool isShiftLeft) {
     return create<cir::ShiftOp>(loc, lhs.getType(), lhs, rhs, isShiftLeft);
diff --git a/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td b/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td
index 29d8aea..588fb0d 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td
@@ -42,7 +42,7 @@ class CIR_TypedAttr<string name, string attrMnemonic, list<Trait> traits = []>
   let assemblyFormat = [{}];
 }
 
-class CIRUnitAttr<string name, string attrMnemonic, list<Trait> traits = []>
+class CIR_UnitAttr<string name, string attrMnemonic, list<Trait> traits = []>
     : CIR_Attr<name, attrMnemonic, traits> {
   let returnType = "bool";
   let defaultValue = "false";
@@ -127,7 +127,7 @@ def CIR_BoolAttr : CIR_Attr<"Bool", "bool", [TypedAttrInterface]> {
 // ZeroAttr
 //===----------------------------------------------------------------------===//
 
-def ZeroAttr : CIR_TypedAttr<"Zero", "zero"> {
+def CIR_ZeroAttr : CIR_TypedAttr<"Zero", "zero"> {
   let summary = "Attribute to represent zero initialization";
   let description = [{
     The ZeroAttr is used to indicate zero initialization on structs.
@@ -138,7 +138,7 @@ def ZeroAttr : CIR_TypedAttr<"Zero", "zero"> {
 // UndefAttr
 //===----------------------------------------------------------------------===//
 
-def UndefAttr : CIR_TypedAttr<"Undef", "undef"> {
+def CIR_UndefAttr : CIR_TypedAttr<"Undef", "undef"> {
   let summary = "Represent an undef constant";
   let description = [{
     The UndefAttr represents an undef constant, corresponding to LLVM's notion
@@ -147,6 +147,18 @@ def UndefAttr : CIR_TypedAttr<"Undef", "undef"> {
 }
 
 //===----------------------------------------------------------------------===//
+// PoisonAttr
+//===----------------------------------------------------------------------===//
+
+def CIR_PoisonAttr : CIR_TypedAttr<"Poison", "poison"> {
+  let summary = "Represent a typed poison constant";
+  let description = [{
+    The PoisonAttr represents a typed poison constant, corresponding to LLVM's
+    notion of poison.
+  }];
+}
+
+//===----------------------------------------------------------------------===//
 // IntegerAttr
 //===----------------------------------------------------------------------===//
 
@@ -252,7 +264,9 @@ def CIR_FPAttr : CIR_Attr<"FP", "fp", [TypedAttrInterface]> {
 // ConstArrayAttr
 //===----------------------------------------------------------------------===//
 
-def ConstArrayAttr : CIR_Attr<"ConstArray", "const_array", [TypedAttrInterface]> {
+def CIR_ConstArrayAttr : CIR_Attr<"ConstArray", "const_array", [
+  TypedAttrInterface
+]> {
   let summary = "A constant array from ArrayAttr or StringRefAttr";
   let description = [{
     An CIR array attribute is an array of literals of the specified attr types.
@@ -298,8 +312,9 @@ def ConstArrayAttr : CIR_Attr<"ConstArray", "const_array", [TypedAttrInterface]>
 // ConstVectorAttr
 //===----------------------------------------------------------------------===//
 
-def ConstVectorAttr : CIR_Attr<"ConstVector", "const_vector",
-                               [TypedAttrInterface]> {
+def CIR_ConstVectorAttr : CIR_Attr<"ConstVector", "const_vector", [
+  TypedAttrInterface
+]> {
   let summary = "A constant vector from ArrayAttr";
   let description = [{
     A CIR vector attribute is an array of literals of the specified attribute
@@ -330,7 +345,7 @@ def ConstVectorAttr : CIR_Attr<"ConstVector", "const_vector",
 // ConstPtrAttr
 //===----------------------------------------------------------------------===//
 
-def ConstPtrAttr : CIR_Attr<"ConstPtr", "ptr", [TypedAttrInterface]> {
+def CIR_ConstPtrAttr : CIR_Attr<"ConstPtr", "ptr", [TypedAttrInterface]> {
   let summary = "Holds a constant pointer value";
   let parameters = (ins
     AttributeSelfTypeParameter<"", "::cir::PointerType">:$type,
@@ -359,8 +374,9 @@ def ConstPtrAttr : CIR_Attr<"ConstPtr", "ptr", [TypedAttrInterface]> {
 // ConstComplexAttr
 //===----------------------------------------------------------------------===//
 
-def ConstComplexAttr : CIR_Attr<"ConstComplex", "const_complex",
-                                [TypedAttrInterface]> {
+def CIR_ConstComplexAttr : CIR_Attr<"ConstComplex", "const_complex", [
+  TypedAttrInterface
+]> {
   let summary = "An attribute that contains a constant complex value";
   let description = [{
     The `#cir.const_complex` attribute contains a constant value of complex
@@ -442,7 +458,7 @@ def CIR_VisibilityAttr : CIR_EnumAttr<CIR_VisibilityKind, "visibility"> {
 // BitfieldInfoAttr
 //===----------------------------------------------------------------------===//
 
-def BitfieldInfoAttr : CIR_Attr<"BitfieldInfo", "bitfield_info"> {
+def CIR_BitfieldInfoAttr : CIR_Attr<"BitfieldInfo", "bitfield_info"> {
   let summary = "Represents info for a bit-field member";
   let description = [{
     Holds the following information about bitfields: name, storage type, size
@@ -500,5 +516,4 @@ def BitfieldInfoAttr : CIR_Attr<"BitfieldInfo", "bitfield_info"> {
   ];
 }
 
-
 #endif // CLANG_CIR_DIALECT_IR_CIRATTRS_TD
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index 01c50554..5ef5b60 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -128,12 +128,12 @@ def CIR_CastKind : CIR_I32EnumAttr<"CastKind", "cast kind", [
   // CK_BlockPointerToObjCPointerCast
   // CK_AnyPointerToBlockPointerCast
   // CK_ObjCObjectLValueCast
-  // I32EnumAttrCase<"float_to_complex", 44>,
-  // I32EnumAttrCase<"float_complex_to_real", 45>,
-  // I32EnumAttrCase<"float_complex_to_bool", 46>,
+  I32EnumAttrCase<"float_to_complex", 44>,
+  I32EnumAttrCase<"float_complex_to_real", 45>,
+  I32EnumAttrCase<"float_complex_to_bool", 46>,
   I32EnumAttrCase<"float_complex", 47>,
-  // I32EnumAttrCase<"float_complex_to_int_complex", 48>,
-  // I32EnumAttrCase<"int_to_complex", 49>,
+  I32EnumAttrCase<"float_complex_to_int_complex", 48>,
+  I32EnumAttrCase<"int_to_complex", 49>,
   I32EnumAttrCase<"int_complex_to_real", 50>,
   I32EnumAttrCase<"int_complex_to_bool", 51>,
   I32EnumAttrCase<"int_complex", 52>,
@@ -607,8 +607,8 @@ def CIR_ConditionOp : CIR_Op<"condition", [
 //===----------------------------------------------------------------------===//
 
 defvar CIR_YieldableScopes = [
-  "CaseOp", "DoWhileOp", "ForOp", "IfOp", "ScopeOp", "SwitchOp",
-  "TernaryOp", "WhileOp"
+  "ArrayCtor", "ArrayDtor", "CaseOp", "DoWhileOp", "ForOp", "IfOp", "ScopeOp",
+  "SwitchOp", "TernaryOp", "WhileOp"
 ];
 
 def CIR_YieldOp : CIR_Op<"yield", [
@@ -1747,7 +1747,7 @@ def CIR_SetBitfieldOp : CIR_Op<"set_bitfield"> {
   let arguments = (ins
     Arg<CIR_PointerType, "the address to store the value", [MemWrite]>:$addr,
     CIR_AnyType:$src,
-    BitfieldInfoAttr:$bitfield_info,
+    CIR_BitfieldInfoAttr:$bitfield_info,
     DefaultValuedOptionalAttr<I64Attr, "0">:$alignment,
     UnitAttr:$is_volatile
   );
@@ -1834,7 +1834,7 @@ def CIR_GetBitfieldOp : CIR_Op<"get_bitfield"> {
 
   let arguments = (ins
     Arg<CIR_PointerType, "the address to load from", [MemRead]>:$addr,
-    BitfieldInfoAttr:$bitfield_info,
+    CIR_BitfieldInfoAttr:$bitfield_info,
     DefaultValuedOptionalAttr<I64Attr, "0">:$alignment,
     UnitAttr:$is_volatile
     );
@@ -1946,6 +1946,10 @@ def CIR_FuncOp : CIR_Op<"func", [
     The function linkage information is specified by `linkage`, as defined by
     `GlobalLinkageKind` attribute.
 
+    The `no_proto` keyword is used to identify functions that were declared
+    without a prototype and, consequently, may contain calls with invalid
+    arguments and undefined behavior.
+
     Example:
 
     ```mlir
@@ -1964,6 +1968,7 @@ def CIR_FuncOp : CIR_Op<"func", [
   let arguments = (ins SymbolNameAttr:$sym_name,
                        CIR_VisibilityAttr:$global_visibility,
                        TypeAttrOf<CIR_FuncType>:$function_type,
+                       UnitAttr:$no_proto,
                        UnitAttr:$dso_local,
                        DefaultValuedAttr<CIR_GlobalLinkageKind,
                                          "cir::GlobalLinkageKind::ExternalLinkage">:$linkage,
@@ -2005,13 +2010,6 @@ def CIR_FuncOp : CIR_Op<"func", [
        return getFunctionType().getReturnTypes();
     }
 
-    // TODO(cir): this should be an operand attribute, but for now we just hard-
-    // wire this as a function. Will later add a $no_proto argument to this op.
-    bool getNoProto() {
-      assert(!cir::MissingFeatures::opFuncNoProto());
-      return false;
-    }
-
     //===------------------------------------------------------------------===//
     // SymbolOpInterface Methods
     //===------------------------------------------------------------------===//
@@ -2229,6 +2227,71 @@ def CIR_TrapOp : CIR_Op<"trap", [Terminator]> {
 }
 
 //===----------------------------------------------------------------------===//
+// ArrayCtor & ArrayDtor
+//===----------------------------------------------------------------------===//
+
+class CIR_ArrayInitDestroy<string mnemonic> : CIR_Op<mnemonic> {
+  let arguments = (ins
+    Arg<CIR_PtrToArray, "array address", [MemWrite, MemRead]>:$addr
+  );
+
+  let regions = (region SizedRegion<1>:$body);
+  let assemblyFormat = [{
+    $addr `:` qualified(type($addr)) $body attr-dict
+  }];
+
+  let builders = [
+    OpBuilder<(ins "mlir::Value":$addr,
+      "llvm::function_ref<void(mlir::OpBuilder &, mlir::Location)>":$regionBuilder), [{
+        assert(regionBuilder && "builder callback expected");
+        mlir::OpBuilder::InsertionGuard guard($_builder);
+        mlir::Region *r = $_state.addRegion();
+        $_state.addOperands(ValueRange{addr});
+        $_builder.createBlock(r);
+        regionBuilder($_builder, $_state.location);
+    }]>
+  ];
+}
+
+def CIR_ArrayCtor : CIR_ArrayInitDestroy<"array.ctor"> {
+  let summary = "Initialize array elements with C++ constructors";
+  let description = [{
+    Initialize each array element using the same C++ constructor. This
+    operation has one region, with one single block. The block has an
+    incoming argument for the current array element to initialize.
+
+    Example:
+
+    ```mlir
+    cir.array.ctor(%0 : !cir.ptr<!cir.array<!rec_S x 42>>) {
+      ^bb0(%arg0: !cir.ptr<!rec_S>):
+        cir.call @some_ctor(%arg0) : (!cir.ptr<!rec_S>) -> ()
+        cir.yield
+    }
+    ```
+  }];
+}
+
+def CIR_ArrayDtor : CIR_ArrayInitDestroy<"array.dtor"> {
+  let summary = "Destroy array elements with C++ dtors";
+  let description = [{
+    Destroy each array element using the same C++ destructor. This
+    operation has one region, with one single block. The block has an
+    incoming argument for the current array element to destruct.
+
+    Example:
+
+    ```mlir
+    cir.array.dtor(%0 : !cir.ptr<!cir.array<!rec_S x 42>>) {
+      ^bb0(%arg0: !cir.ptr<!rec_S>):
+        cir.call @some_dtor(%arg0) : (!cir.ptr<!rec_S>) -> ()
+        cir.yield
+    }
+    ```
+  }];
+}
+
+//===----------------------------------------------------------------------===//
 // VecCreate
 //===----------------------------------------------------------------------===//
 
@@ -2760,6 +2823,53 @@ def CIR_ComplexSubOp : CIR_Op<"complex.sub", [
   }];
 }
 
+//===----------------------------------------------------------------------===//
+// ComplexMulOp
+//===----------------------------------------------------------------------===//
+
+def CIR_ComplexRangeKind : CIR_I32EnumAttr<
+  "ComplexRangeKind", "complex multiplication and division implementation", [
+    I32EnumAttrCase<"Full", 0, "full">,
+    I32EnumAttrCase<"Improved", 1, "improved">,
+    I32EnumAttrCase<"Promoted", 2, "promoted">,
+    I32EnumAttrCase<"Basic", 3, "basic">,
+]>;
+
+def CIR_ComplexMulOp : CIR_Op<"complex.mul", [
+  Pure, SameOperandsAndResultType
+]> {
+  let summary = "Complex multiplication";
+  let description = [{
+    The `cir.complex.mul` operation takes two complex numbers and returns
+    their product.
+
+    Range is used to select the implementation used when the operation
+    is lowered to the LLVM dialect. For multiplication, 'improved',
+    'promoted', and 'basic' are all handled equivalently, producing the
+    algebraic formula with no special handling for NaN value. If 'full' is
+    used, a runtime-library function is called if one of the intermediate
+    calculations produced a NaN value.
+
+    Example:
+
+    ```mlir
+    %2 = cir.complex.mul %0, %1 range(basic) : !cir.complex<!cir.float>
+    %2 = cir.complex.mul %0, %1 range(full) : !cir.complex<!cir.float>
+    ```
+  }];
+
+  let arguments = (ins
+    CIR_ComplexType:$lhs,
+    CIR_ComplexType:$rhs,
+    CIR_ComplexRangeKind:$range
+  );
+
+  let results = (outs CIR_ComplexType:$result);
+
+  let assemblyFormat = [{
+    $lhs `,` $rhs `range` `(` $range `)` `:` qualified(type($result)) attr-dict
+  }];
+}
 
 //===----------------------------------------------------------------------===//
 // Bit Manipulation Operations
@@ -2773,6 +2883,8 @@ class CIR_BitOpBase<string mnemonic, TypeConstraint operandTy>
   let assemblyFormat = [{
     $input `:` type($result) attr-dict
   }];
+
+  let hasFolder = 1;
 }
 
 class CIR_BitZeroCountOpBase<string mnemonic, TypeConstraint operandTy>
@@ -2865,6 +2977,28 @@ def CIR_BitCtzOp : CIR_BitZeroCountOpBase<"ctz",
   }];
 }
 
+def CIR_BitFfsOp : CIR_BitOpBase<"ffs", CIR_SIntOfWidths<[32, 64]>> {
+  let summary = "Get the position of the least significant 1-bit in input";
+  let description = [{
+    Compute the 1-based position of the least significant 1-bit of the input.
+
+    The input integer must be a signed integer. The `cir.ffs` operation returns
+    one plus the index of the least significant 1-bit of the input signed
+    integer. If the input integer is 0, `cir.ffs` yields 0.
+
+    Example:
+
+    ```mlir
+    !s32i = !cir.int<s, 32>
+
+    // %0 = 0x0010_1000
+    %0 = cir.const #cir.int<40> : !s32i
+    // #1 will be 4 since the 4th least significant bit is 1.
+    %1 = cir.ffs %0 : !s32i
+    ```
+  }];
+}
+
 def CIR_BitParityOp : CIR_BitOpBase<"parity", CIR_UIntOfWidths<[32, 64]>> {
   let summary = "Get the parity of input";
   let description = [{
@@ -2980,6 +3114,8 @@ def CIR_RotateOp : CIR_Op<"rotate", [Pure, SameOperandsAndResultType]> {
     bool isRotateLeft() { return getRotateLeft(); }
     bool isRotateRight() { return !getRotateLeft(); }
   }];
+
+  let hasFolder = 1;
 }
 
 //===----------------------------------------------------------------------===//
@@ -3004,6 +3140,27 @@ def CIR_AssumeOp : CIR_Op<"assume"> {
   }];
 }
 
+def CIR_AssumeSepStorageOp : CIR_Op<"assume_separate_storage", [
+  SameTypeOperands
+]> {
+  let summary =
+      "Tell the optimizer that two pointers point to different allocations";
+  let description = [{
+    The `cir.assume_separate_storage` operation takes two pointers as arguments,
+    and the operation tells the optimizer that these two pointers point to
+    different allocations.
+
+    This operation corresponds to the `__builtin_assume_separate_storage`
+    builtin function.
+  }];
+
+  let arguments = (ins CIR_VoidPtrType:$ptr1, CIR_VoidPtrType:$ptr2);
+
+  let assemblyFormat = [{
+    $ptr1 `,` $ptr2 `:` qualified(type($ptr1)) attr-dict
+  }];
+}
+
 //===----------------------------------------------------------------------===//
 // Branch Probability Operations
 //===----------------------------------------------------------------------===//
diff --git a/clang/include/clang/CIR/Dialect/IR/CIRTypeConstraints.td b/clang/include/clang/CIR/Dialect/IR/CIRTypeConstraints.td
index 2bf7758..d7d55df 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIRTypeConstraints.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIRTypeConstraints.td
@@ -166,6 +166,12 @@ def CIR_AnyIntOrFloatType : AnyTypeOf<[CIR_AnyFloatType, CIR_AnyIntType],
 def CIR_AnyComplexType : CIR_TypeBase<"::cir::ComplexType", "complex type">;
 
 //===----------------------------------------------------------------------===//
+// Array Type predicates
+//===----------------------------------------------------------------------===//
+
+def CIR_AnyArrayType : CIR_TypeBase<"::cir::ArrayType", "array type">;
+
+//===----------------------------------------------------------------------===//
 // Pointer Type predicates
 //===----------------------------------------------------------------------===//
 
@@ -216,6 +222,8 @@ def CIR_PtrToIntOrFloatType : CIR_PtrToType<CIR_AnyIntOrFloatType>;
 
 def CIR_PtrToComplexType : CIR_PtrToType<CIR_AnyComplexType>;
 
+def CIR_PtrToArray : CIR_PtrToType<CIR_AnyArrayType>;
+
 //===----------------------------------------------------------------------===//
 // Vector Type predicates
 //===----------------------------------------------------------------------===//
diff --git a/clang/include/clang/CIR/Dialect/Passes.h b/clang/include/clang/CIR/Dialect/Passes.h
index 02210ec..7a202b1 100644
--- a/clang/include/clang/CIR/Dialect/Passes.h
+++ b/clang/include/clang/CIR/Dialect/Passes.h
@@ -25,6 +25,7 @@ std::unique_ptr<Pass> createCIRFlattenCFGPass();
 std::unique_ptr<Pass> createCIRSimplifyPass();
 std::unique_ptr<Pass> createHoistAllocasPass();
 std::unique_ptr<Pass> createLoweringPreparePass();
+std::unique_ptr<Pass> createLoweringPreparePass(clang::ASTContext *astCtx);
 
 void populateCIRPreLoweringPasses(mlir::OpPassManager &pm);
 
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
index 182e4b6..adc7b5f 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -73,14 +73,16 @@ struct MissingFeatures {
   // FuncOp handling
   static bool opFuncOpenCLKernelMetadata() { return false; }
   static bool opFuncAstDeclAttr() { return false; }
+  static bool opFuncAttributesForDefinition() { return false; }
   static bool opFuncCallingConv() { return false; }
-  static bool opFuncExtraAttrs() { return false; }
-  static bool opFuncNoProto() { return false; }
   static bool opFuncCPUAndFeaturesAttributes() { return false; }
-  static bool opFuncSection() { return false; }
-  static bool opFuncMultipleReturnVals() { return false; }
-  static bool opFuncAttributesForDefinition() { return false; }
+  static bool opFuncExceptions() { return false; }
+  static bool opFuncExtraAttrs() { return false; }
   static bool opFuncMaybeHandleStaticInExternC() { return false; }
+  static bool opFuncMultipleReturnVals() { return false; }
+  static bool opFuncOperandBundles() { return false; }
+  static bool opFuncParameterAttributes() { return false; }
+  static bool opFuncSection() { return false; }
   static bool setLLVMFunctionFEnvAttributes() { return false; }
   static bool setFunctionAttributes() { return false; }
 
@@ -96,7 +98,6 @@ struct MissingFeatures {
   static bool opCallReturn() { return false; }
   static bool opCallArgEvaluationOrder() { return false; }
   static bool opCallCallConv() { return false; }
-  static bool opCallNoPrototypeFunc() { return false; }
   static bool opCallMustTail() { return false; }
   static bool opCallVirtual() { return false; }
   static bool opCallInAlloca() { return false; }
@@ -109,6 +110,7 @@ struct MissingFeatures {
   static bool opCallCIRGenFuncInfoExtParamInfo() { return false; }
   static bool opCallLandingPad() { return false; }
   static bool opCallContinueBlock() { return false; }
+  static bool opCallChain() { return false; }
 
   // CXXNewExpr
   static bool exprNewNullCheck() { return false; }
@@ -196,6 +198,8 @@ struct MissingFeatures {
   static bool cxxRecordStaticMembers() { return false; }
   static bool dataLayoutTypeAllocSize() { return false; }
   static bool deferredCXXGlobalInit() { return false; }
+  static bool ehCleanupFlags() { return false; }
+  static bool ehstackBranches() { return false; }
   static bool emitCheckedInBoundsGEP() { return false; }
   static bool emitCondLikelihoodViaExpectIntrinsic() { return false; }
   static bool emitLifetimeMarkers() { return false; }
@@ -213,6 +217,7 @@ struct MissingFeatures {
   static bool intrinsics() { return false; }
   static bool isMemcpyEquivalentSpecialMember() { return false; }
   static bool isTrivialCtorOrDtor() { return false; }
+  static bool lambdaCaptures() { return false; }
   static bool lambdaFieldToName() { return false; }
   static bool loopInfoStack() { return false; }
   static bool lowerAggregateLoadStore() { return false; }
@@ -222,7 +227,6 @@ struct MissingFeatures {
   static bool moduleNameHash() { return false; }
   static bool msabi() { return false; }
   static bool needsGlobalCtorDtor() { return false; }
-  static bool nonFineGrainedBitfields() { return false; }
   static bool objCBlocks() { return false; }
   static bool objCGC() { return false; }
   static bool objCLifetime() { return false; }
@@ -252,9 +256,9 @@ struct MissingFeatures {
   static bool writebacks() { return false; }
   static bool appleKext() { return false; }
   static bool dtorCleanups() { return false; }
-  static bool completeDtors() { return false; }
   static bool vtableInitialization() { return false; }
   static bool msvcBuiltins() { return false; }
+  static bool vlas() { return false; }
 
   // Missing types
   static bool dataMemberType() { return false; }
diff --git a/clang/include/clang/Driver/CudaInstallationDetector.h b/clang/include/clang/Driver/CudaInstallationDetector.h
new file mode 100644
index 0000000..0fecbfe
--- /dev/null
+++ b/clang/include/clang/Driver/CudaInstallationDetector.h
@@ -0,0 +1,76 @@
+//===-- CudaInstallationDetector.h - Cuda Instalation Detector --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_DRIVER_CUDAINSTALLATIONDETECTOR_H
+#define LLVM_CLANG_DRIVER_CUDAINSTALLATIONDETECTOR_H
+
+#include "clang/Basic/Cuda.h"
+#include "clang/Driver/Driver.h"
+#include <bitset>
+
+namespace clang {
+namespace driver {
+
+/// A class to find a viable CUDA installation
+class CudaInstallationDetector {
+private:
+  const Driver &D;
+  bool IsValid = false;
+  CudaVersion Version = CudaVersion::UNKNOWN;
+  std::string InstallPath;
+  std::string BinPath;
+  std::string LibDevicePath;
+  std::string IncludePath;
+  llvm::StringMap<std::string> LibDeviceMap;
+
+  // CUDA architectures for which we have raised an error in
+  // CheckCudaVersionSupportsArch.
+  mutable std::bitset<(int)OffloadArch::LAST> ArchsWithBadVersion;
+
+public:
+  CudaInstallationDetector(const Driver &D, const llvm::Triple &HostTriple,
+                           const llvm::opt::ArgList &Args);
+
+  void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs,
+                          llvm::opt::ArgStringList &CC1Args) const;
+
+  /// Emit an error if Version does not support the given Arch.
+  ///
+  /// If either Version or Arch is unknown, does not emit an error.  Emits at
+  /// most one error per Arch.
+  void CheckCudaVersionSupportsArch(OffloadArch Arch) const;
+
+  /// Check whether we detected a valid Cuda install.
+  bool isValid() const { return IsValid; }
+  /// Print information about the detected CUDA installation.
+  void print(raw_ostream &OS) const;
+
+  /// Get the detected Cuda install's version.
+  CudaVersion version() const {
+    return Version == CudaVersion::NEW ? CudaVersion::PARTIALLY_SUPPORTED
+                                       : Version;
+  }
+  /// Get the detected Cuda installation path.
+  StringRef getInstallPath() const { return InstallPath; }
+  /// Get the detected path to Cuda's bin directory.
+  StringRef getBinPath() const { return BinPath; }
+  /// Get the detected Cuda Include path.
+  StringRef getIncludePath() const { return IncludePath; }
+  /// Get the detected Cuda device library path.
+  StringRef getLibDevicePath() const { return LibDevicePath; }
+  /// Get libdevice file for given architecture
+  std::string getLibDeviceFile(StringRef Gpu) const {
+    return LibDeviceMap.lookup(Gpu);
+  }
+  void WarnIfUnsupportedVersion() const;
+};
+
+} // namespace driver
+} // namespace clang
+
+#endif // LLVM_CLANG_DRIVER_CUDAINSTALLATIONDETECTOR_H
diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h
index d9e328f..4d32552 100644
--- a/clang/include/clang/Driver/Driver.h
+++ b/clang/include/clang/Driver/Driver.h
@@ -337,6 +337,10 @@ private:
   /// "clang" as it's first argument.
   const char *PrependArg;
 
+  /// The default value of -fuse-ld= option. An empty string means the default
+  /// system linker.
+  std::string PreferredLinker;
+
   /// Whether to check that input files exist when constructing compilation
   /// jobs.
   LLVM_PREFERRED_TYPE(bool)
@@ -355,6 +359,9 @@ public:
   phases::ID getFinalPhase(const llvm::opt::DerivedArgList &DAL,
                            llvm::opt::Arg **FinalPhaseArg = nullptr) const;
 
+  llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
+  executeProgram(llvm::ArrayRef<llvm::StringRef> Args) const;
+
 private:
   /// Certain options suppress the 'no input files' warning.
   LLVM_PREFERRED_TYPE(bool)
@@ -367,10 +374,6 @@ private:
   /// stored in it, and will clean them up when torn down.
   mutable llvm::StringMap<std::unique_ptr<ToolChain>> ToolChains;
 
-  /// The associated offloading architectures with each toolchain.
-  llvm::DenseMap<const ToolChain *, llvm::SmallVector<llvm::StringRef>>
-      OffloadArchs;
-
 private:
   /// TranslateInputArgs - Create a new derived argument list from the input
   /// arguments, after applying the standard argument translations.
@@ -450,6 +453,11 @@ public:
     return ClangExecutable.c_str();
   }
 
+  StringRef getPreferredLinker() const { return PreferredLinker; }
+  void setPreferredLinker(std::string Value) {
+    PreferredLinker = std::move(Value);
+  }
+
   bool isSaveTempsEnabled() const { return SaveTemps != SaveTempsNone; }
   bool isSaveTempsObj() const { return SaveTemps == SaveTempsObj; }
 
@@ -537,8 +545,7 @@ public:
   /// empty string.
   llvm::SmallVector<StringRef>
   getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
-                  Action::OffloadKind Kind, const ToolChain *TC,
-                  bool SpecificToolchain = true) const;
+                  Action::OffloadKind Kind, const ToolChain &TC) const;
 
   /// Check that the file referenced by Value exists. If it doesn't,
   /// issue a diagnostic and return false.
diff --git a/clang/lib/Driver/ToolChains/LazyDetector.h b/clang/include/clang/Driver/LazyDetector.h
index 813d00a..5bd1d01 100644
--- a/clang/lib/Driver/ToolChains/LazyDetector.h
+++ b/clang/include/clang/Driver/LazyDetector.h
@@ -6,8 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_LAZYDETECTOR_H
-#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_LAZYDETECTOR_H
+#ifndef LLVM_CLANG_DRIVER_LAZYDETECTOR_H
+#define LLVM_CLANG_DRIVER_LAZYDETECTOR_H
 
 #include "clang/Driver/Tool.h"
 #include "clang/Driver/ToolChain.h"
@@ -42,4 +42,4 @@ public:
 
 } // end namespace clang
 
-#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_LAZYDETECTOR_H
+#endif // LLVM_CLANG_DRIVER_LAZYDETECTOR_H
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 6c22f06..3c04aeb 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1156,7 +1156,7 @@ def offload_arch_EQ : CommaJoined<["--"], "offload-arch=">,
            "If 'native' is used the compiler will detect locally installed architectures. "
            "For HIP offloading, the device architecture can be followed by target ID features "
            "delimited by a colon (e.g. gfx908:xnack+:sramecc-). May be specified more than once.">;
-def no_offload_arch_EQ : Joined<["--"], "no-offload-arch=">,
+def no_offload_arch_EQ : CommaJoined<["--"], "no-offload-arch=">,
   Visibility<[ClangOption, FlangOption]>,
   HelpText<"Remove CUDA/HIP offloading device architecture (e.g. sm_35, gfx906) from the list of devices to compile for. "
            "'all' resets the list to its default value.">;
@@ -1624,7 +1624,7 @@ defm auto_import : BoolFOption<"auto-import",
 // In the future this option will be supported by other offloading
 // languages and accept other values such as CPU/GPU architectures,
 // offload kinds and target aliases.
-def offload_EQ : CommaJoined<["--"], "offload=">, Flags<[NoXarchOption]>,
+def offload_EQ : CommaJoined<["--"], "offload=">, Flags<[NoXarchOption]>, Alias<offload_targets_EQ>,
   HelpText<"Specify comma-separated list of offloading target triples (CUDA and HIP only)">;
 
 // C++ Coroutines
@@ -1904,6 +1904,14 @@ defm bounds_safety : BoolFOption<
   BothFlags<[], [CC1Option],
           " experimental bounds safety extension for C">>;
 
+defm lifetime_safety : BoolFOption<
+  "experimental-lifetime-safety",
+  LangOpts<"EnableLifetimeSafety">, DefaultFalse,
+  PosFlag<SetTrue, [], [CC1Option], "Enable">,
+  NegFlag<SetFalse, [], [CC1Option], "Disable">,
+  BothFlags<[], [CC1Option],
+          " experimental lifetime safety for C++">>;
+
 defm addrsig : BoolFOption<"addrsig",
   CodeGenOpts<"Addrsig">, DefaultFalse,
   PosFlag<SetTrue, [], [ClangOption, CC1Option], "Emit">,
@@ -2312,21 +2320,21 @@ def fsymbol_partition_EQ : Joined<["-"], "fsymbol-partition=">, Group<f_Group>,
 
 defm atomic_remote_memory : BoolFOption<"atomic-remote-memory",
   LangOpts<"AtomicRemoteMemory">, DefaultFalse,
-  PosFlag<SetTrue, [], [ClangOption, CC1Option], "May have">,
-  NegFlag<SetFalse, [], [ClangOption], "Assume no">,
-  BothFlags<[], [ClangOption], " atomic operations on remote memory">>;
+  PosFlag<SetTrue, [], [ClangOption, CC1Option, FlangOption, FC1Option], "May have">,
+  NegFlag<SetFalse, [], [ClangOption, FlangOption], "Assume no">,
+  BothFlags<[], [ClangOption, FlangOption], " atomic operations on remote memory">>;
 
 defm atomic_fine_grained_memory : BoolFOption<"atomic-fine-grained-memory",
   LangOpts<"AtomicFineGrainedMemory">, DefaultFalse,
-  PosFlag<SetTrue, [], [ClangOption, CC1Option], "May have">,
-  NegFlag<SetFalse, [], [ClangOption], "Assume no">,
-  BothFlags<[], [ClangOption], " atomic operations on fine-grained memory">>;
+  PosFlag<SetTrue, [], [ClangOption, CC1Option, FlangOption, FC1Option], "May have">,
+  NegFlag<SetFalse, [], [ClangOption, FlangOption], "Assume no">,
+  BothFlags<[], [ClangOption, FlangOption], " atomic operations on fine-grained memory">>;
 
 defm atomic_ignore_denormal_mode : BoolFOption<"atomic-ignore-denormal-mode",
   LangOpts<"AtomicIgnoreDenormalMode">, DefaultFalse,
-  PosFlag<SetTrue, [], [ClangOption, CC1Option], "Allow">,
-  NegFlag<SetFalse, [], [ClangOption], "Disallow">,
-  BothFlags<[], [ClangOption], " atomic operations to ignore denormal mode">>;
+  PosFlag<SetTrue, [], [ClangOption, CC1Option, FlangOption, FC1Option], "Allow">,
+  NegFlag<SetFalse, [], [ClangOption, FlangOption], "Disallow">,
+  BothFlags<[], [ClangOption, FlangOption], " atomic operations to ignore denormal mode">>;
 
 defm memory_profile : OptInCC1FFlag<"memory-profile", "Enable", "Disable", " heap memory profiling">;
 def fmemory_profile_EQ : Joined<["-"], "fmemory-profile=">,
@@ -2589,6 +2597,16 @@ def fsanitize_undefined_trap_on_error
 def fno_sanitize_undefined_trap_on_error
     : Flag<["-"], "fno-sanitize-undefined-trap-on-error">, Group<f_clang_Group>,
       Alias<fno_sanitize_trap_EQ>, AliasArgs<["undefined"]>;
+defm sanitize_debug_trap_reasons
+    : BoolFOption<
+          "sanitize-debug-trap-reasons",
+          CodeGenOpts<"SanitizeDebugTrapReasons">, DefaultTrue,
+          PosFlag<SetTrue, [], [ClangOption, CC1Option],
+                  "Annotate trap blocks in debug info with UBSan trap reasons">,
+          NegFlag<SetFalse, [], [ClangOption, CC1Option],
+                  "Do not annotate trap blocks in debug info with UBSan trap "
+                  "reasons">>;
+
 defm sanitize_minimal_runtime : BoolOption<"f", "sanitize-minimal-runtime",
   CodeGenOpts<"SanitizeMinimalRuntime">, DefaultFalse,
   PosFlag<SetTrue>,
@@ -3257,13 +3275,14 @@ defm skip_odr_check_in_gmf : BoolOption<"f", "skip-odr-check-in-gmf",
           "Perform ODR checks for decls in the global module fragment.">>,
   Group<f_Group>;
 
-def modules_reduced_bmi : Flag<["-"], "fmodules-reduced-bmi">,
-  Group<f_Group>, Visibility<[ClangOption, CC1Option]>,
-  HelpText<"Generate the reduced BMI">,
-  MarshallingInfoFlag<FrontendOpts<"GenReducedBMI">>;
+defm modules_reduced_bmi : BoolOption<"f", "modules-reduced-bmi",
+  FrontendOpts<"GenReducedBMI">, DefaultFalse,
+  NegFlag<SetFalse>,
+  PosFlag<SetTrue, [], [ClangOption, CC1Option],
+          "Generate the reduced BMI">>;
 
 def experimental_modules_reduced_bmi : Flag<["-"], "fexperimental-modules-reduced-bmi">,
-  Group<f_Group>, Visibility<[ClangOption, CC1Option]>, Alias<modules_reduced_bmi>;
+  Group<f_Group>, Visibility<[ClangOption, CC1Option]>, Alias<fmodules_reduced_bmi>;
 
 def fmodules_embed_all_files : Joined<["-"], "fmodules-embed-all-files">,
   Visibility<[ClangOption, CC1Option, CLOption]>,
@@ -5293,6 +5312,8 @@ def mextended_const : Flag<["-"], "mextended-const">, Group<m_wasm_Features_Grou
 def mno_extended_const : Flag<["-"], "mno-extended-const">, Group<m_wasm_Features_Group>;
 def mfp16 : Flag<["-"], "mfp16">, Group<m_wasm_Features_Group>;
 def mno_fp16 : Flag<["-"], "mno-fp16">, Group<m_wasm_Features_Group>;
+def mgc : Flag<["-"], "mgc">, Group<m_wasm_Features_Group>;
+def mno_gc : Flag<["-"], "mno-gc">, Group<m_wasm_Features_Group>;
 def mmultimemory : Flag<["-"], "mmultimemory">, Group<m_wasm_Features_Group>;
 def mno_multimemory : Flag<["-"], "mno-multimemory">, Group<m_wasm_Features_Group>;
 def mmultivalue : Flag<["-"], "mmultivalue">, Group<m_wasm_Features_Group>;
@@ -5351,9 +5372,9 @@ defm amdgpu_precise_memory_op
                   " precise memory mode (AMDGPU only)">;
 
 def munsafe_fp_atomics : Flag<["-"], "munsafe-fp-atomics">,
-  Visibility<[ClangOption, CC1Option]>, Alias<fatomic_ignore_denormal_mode>;
+  Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>, Alias<fatomic_ignore_denormal_mode>;
 def mno_unsafe_fp_atomics : Flag<["-"], "mno-unsafe-fp-atomics">,
-  Visibility<[ClangOption]>, Alias<fno_atomic_ignore_denormal_mode>;
+  Visibility<[ClangOption, FlangOption]>, Alias<fno_atomic_ignore_denormal_mode>;
 
 def faltivec : Flag<["-"], "faltivec">, Group<f_Group>;
 def fno_altivec : Flag<["-"], "fno-altivec">, Group<f_Group>;
@@ -7129,6 +7150,8 @@ def fintrinsic_modules_path : Separate<["-"], "fintrinsic-modules-path">,  Group
   HelpText<"Specify where to find the compiled intrinsic modules">,
   DocBrief<[{This option specifies the location of pre-compiled intrinsic modules,
   if they are not in the default location expected by the compiler.}]>;
+def fintrinsic_modules_path_EQ : Joined<["-"], "fintrinsic-modules-path=">,
+  Group<f_Group>, Alias<fintrinsic_modules_path>;
 
 defm backslash : OptInFC1FFlag<"backslash", "Specify that backslash in string introduces an escape character">;
 defm xor_operator : OptInFC1FFlag<"xor-operator", "Enable .XOR. as a synonym of .NEQV.">;
@@ -8250,6 +8273,9 @@ def stats_file : Joined<["-"], "stats-file=">,
 def stats_file_append : Flag<["-"], "stats-file-append">,
   HelpText<"If stats should be appended to stats-file instead of overwriting it">,
   MarshallingInfoFlag<FrontendOpts<"AppendStats">>;
+def stats_file_timers : Flag<["-"], "stats-file-timers">,
+                        HelpText<"If stats should include timers.">,
+                        MarshallingInfoFlag<CodeGenOpts<"TimePassesStatsFile">>;
 def fdump_record_layouts_simple : Flag<["-"], "fdump-record-layouts-simple">,
   HelpText<"Dump record layout information in a simple form used for testing">,
   MarshallingInfoFlag<LangOpts<"DumpRecordLayoutsSimple">>;
diff --git a/clang/lib/Driver/ToolChains/ROCm.h b/clang/include/clang/Driver/RocmInstallationDetector.h
index 2a09da01..3a325a6 100644
--- a/clang/lib/Driver/ToolChains/ROCm.h
+++ b/clang/include/clang/Driver/RocmInstallationDetector.h
@@ -1,4 +1,4 @@
-//===--- ROCm.h - ROCm installation detector --------------------*- C++ -*-===//
+//===-- RocmInstallationDetector.h - ROCm Instalation Detector --*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -6,19 +6,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ROCM_H
-#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ROCM_H
+#ifndef LLVM_CLANG_DRIVER_ROCMINSTALLATIONDETECTOR_H
+#define LLVM_CLANG_DRIVER_ROCMINSTALLATIONDETECTOR_H
 
-#include "clang/Basic/Cuda.h"
-#include "clang/Basic/LLVM.h"
 #include "clang/Driver/Driver.h"
-#include "clang/Driver/Options.h"
-#include "clang/Driver/SanitizerArgs.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/Option/ArgList.h"
-#include "llvm/Support/VersionTuple.h"
-#include "llvm/TargetParser/Triple.h"
 
 namespace clang {
 namespace driver {
@@ -77,6 +68,24 @@ private:
           SPACKReleaseStr(SPACKReleaseStr.str()) {}
   };
 
+  struct CommonBitcodeLibsPreferences {
+    CommonBitcodeLibsPreferences(const Driver &D,
+                                 const llvm::opt::ArgList &DriverArgs,
+                                 StringRef GPUArch,
+                                 const Action::OffloadKind DeviceOffloadingKind,
+                                 const bool NeedsASanRT);
+
+    DeviceLibABIVersion ABIVer;
+    bool IsOpenMP;
+    bool Wave64;
+    bool DAZ;
+    bool FiniteOnly;
+    bool UnsafeMathOpt;
+    bool FastRelaxedMath;
+    bool CorrectSqrt;
+    bool GPUSan;
+  };
+
   const Driver &D;
   bool HasHIPRuntime = false;
   bool HasDeviceLibrary = false;
@@ -175,11 +184,11 @@ public:
 
   /// Get file paths of default bitcode libraries common to AMDGPU based
   /// toolchains.
-  llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> getCommonBitcodeLibs(
-      const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile,
-      bool Wave64, bool DAZ, bool FiniteOnly, bool UnsafeMathOpt,
-      bool FastRelaxedMath, bool CorrectSqrt, DeviceLibABIVersion ABIVer,
-      bool GPUSan, bool isOpenMP) const;
+  llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
+  getCommonBitcodeLibs(const llvm::opt::ArgList &DriverArgs,
+                       StringRef LibDeviceFile, StringRef GPUArch,
+                       const Action::OffloadKind DeviceOffloadingKind,
+                       const bool NeedsASanRT) const;
   /// Check file paths of default bitcode libraries common to AMDGPU based
   /// toolchains. \returns false if there are invalid or missing files.
   bool checkCommonBitcodeLibs(StringRef GPUArch, StringRef LibDeviceFile,
@@ -288,7 +297,7 @@ public:
   StringRef getHIPVersion() const { return DetectedVersion; }
 };
 
-} // end namespace driver
-} // end namespace clang
+} // namespace driver
+} // namespace clang
 
-#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ROCM_H
+#endif // LLVM_CLANG_DRIVER_ROCMINSTALLATIONDETECTOR_H
diff --git a/clang/include/clang/Driver/SyclInstallationDetector.h b/clang/include/clang/Driver/SyclInstallationDetector.h
new file mode 100644
index 0000000..6925ec2
--- /dev/null
+++ b/clang/include/clang/Driver/SyclInstallationDetector.h
@@ -0,0 +1,29 @@
+//===-- SyclInstallationDetector.h - SYCL Instalation Detector --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_DRIVER_SYCLINSTALLATIONDETECTOR_H
+#define LLVM_CLANG_DRIVER_SYCLINSTALLATIONDETECTOR_H
+
+#include "clang/Driver/Driver.h"
+
+namespace clang {
+namespace driver {
+
+class SYCLInstallationDetector {
+public:
+  SYCLInstallationDetector(const Driver &D, const llvm::Triple &HostTriple,
+                           const llvm::opt::ArgList &Args);
+
+  void addSYCLIncludeArgs(const llvm::opt::ArgList &DriverArgs,
+                          llvm::opt::ArgStringList &CC1Args) const;
+};
+
+} // namespace driver
+} // namespace clang
+
+#endif // LLVM_CLANG_DRIVER_SYCLINSTALLATIONDETECTOR_H
diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h
index b8899e7..2430563 100644
--- a/clang/include/clang/Driver/ToolChain.h
+++ b/clang/include/clang/Driver/ToolChain.h
@@ -202,10 +202,6 @@ protected:
   ToolChain(const Driver &D, const llvm::Triple &T,
             const llvm::opt::ArgList &Args);
 
-  /// Executes the given \p Executable and returns the stdout.
-  llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
-  executeToolChainProgram(StringRef Executable) const;
-
   void setTripleEnvironment(llvm::Triple::EnvironmentType Env);
 
   virtual Tool *buildAssembler() const;
@@ -806,7 +802,8 @@ public:
 
   /// Get paths for device libraries.
   virtual llvm::SmallVector<BitCodeLibraryInfo, 12>
-  getDeviceLibs(const llvm::opt::ArgList &Args) const;
+  getDeviceLibs(const llvm::opt::ArgList &Args,
+                const Action::OffloadKind DeviceOffloadingKind) const;
 
   /// Add the system specific linker arguments to use
   /// for the given HIP runtime library type.
diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h
index 7677604..31582a4 100644
--- a/clang/include/clang/Format/Format.h
+++ b/clang/include/clang/Format/Format.h
@@ -831,7 +831,7 @@ struct FormatStyle {
     /// Never merge functions into a single line.
     SFS_None,
     /// Only merge functions defined inside a class. Same as ``inline``,
-    /// except it does not implies ``empty``: i.e. top level empty functions
+    /// except it does not imply ``empty``: i.e. top level empty functions
     /// are not merged either.
     /// \code
     ///   class Foo {
@@ -4704,6 +4704,13 @@ struct FormatStyle {
     ///      <conditional-body>                     <conditional-body>
     /// \endcode
     bool AfterIfMacros;
+    /// If ``true``, put a space between alternative operator ``not`` and the
+    /// opening parenthesis.
+    /// \code
+    ///    true:                                  false:
+    ///    return not (a || b);            vs.    return not(a || b);
+    /// \endcode
+    bool AfterNot;
     /// If ``true``, put a space between operator overloading and opening
     /// parentheses.
     /// \code
@@ -4752,9 +4759,9 @@ struct FormatStyle {
         : AfterControlStatements(false), AfterForeachMacros(false),
           AfterFunctionDeclarationName(false),
           AfterFunctionDefinitionName(false), AfterIfMacros(false),
-          AfterOverloadedOperator(false), AfterPlacementOperator(true),
-          AfterRequiresInClause(false), AfterRequiresInExpression(false),
-          BeforeNonEmptyParentheses(false) {}
+          AfterNot(false), AfterOverloadedOperator(false),
+          AfterPlacementOperator(true), AfterRequiresInClause(false),
+          AfterRequiresInExpression(false), BeforeNonEmptyParentheses(false) {}
 
     bool operator==(const SpaceBeforeParensCustom &Other) const {
       return AfterControlStatements == Other.AfterControlStatements &&
@@ -4763,6 +4770,7 @@ struct FormatStyle {
                  Other.AfterFunctionDeclarationName &&
              AfterFunctionDefinitionName == Other.AfterFunctionDefinitionName &&
              AfterIfMacros == Other.AfterIfMacros &&
+             AfterNot == Other.AfterNot &&
              AfterOverloadedOperator == Other.AfterOverloadedOperator &&
              AfterPlacementOperator == Other.AfterPlacementOperator &&
              AfterRequiresInClause == Other.AfterRequiresInClause &&
diff --git a/clang/include/clang/Frontend/CompilerInstance.h b/clang/include/clang/Frontend/CompilerInstance.h
index 2408367..02dd16c 100644
--- a/clang/include/clang/Frontend/CompilerInstance.h
+++ b/clang/include/clang/Frontend/CompilerInstance.h
@@ -420,6 +420,8 @@ public:
   /// @{
 
   llvm::vfs::FileSystem &getVirtualFileSystem() const;
+  llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>
+  getVirtualFileSystemPtr() const;
 
   /// @}
   /// @name File Manager
diff --git a/clang/include/clang/Frontend/FrontendAction.h b/clang/include/clang/Frontend/FrontendAction.h
index 718684a..08c5fbc 100644
--- a/clang/include/clang/Frontend/FrontendAction.h
+++ b/clang/include/clang/Frontend/FrontendAction.h
@@ -84,6 +84,8 @@ protected:
   /// \return True on success; on failure ExecutionAction() and
   /// EndSourceFileAction() will not be called.
   virtual bool BeginSourceFileAction(CompilerInstance &CI) {
+    if (CurrentInput.isPreprocessed())
+      CI.getPreprocessor().SetMacroExpansionOnlyInDirectives();
     return true;
   }
 
@@ -98,7 +100,11 @@ protected:
   ///
   /// This is guaranteed to only be called following a successful call to
   /// BeginSourceFileAction (and BeginSourceFile).
-  virtual void EndSourceFileAction() {}
+  virtual void EndSourceFileAction() {
+    if (CurrentInput.isPreprocessed())
+      // Reset the preprocessor macro expansion to the default.
+      getCompilerInstance().getPreprocessor().SetEnableMacroExpansion();
+  }
 
   /// Callback at the end of processing a single input, to determine
   /// if the output files should be erased or not.
diff --git a/clang/include/clang/Lex/DependencyDirectivesScanner.h b/clang/include/clang/Lex/DependencyDirectivesScanner.h
index acdc9e2..f9fec39 100644
--- a/clang/include/clang/Lex/DependencyDirectivesScanner.h
+++ b/clang/include/clang/Lex/DependencyDirectivesScanner.h
@@ -47,11 +47,10 @@ struct Token {
 
   bool is(tok::TokenKind K) const { return Kind == K; }
   bool isNot(tok::TokenKind K) const { return Kind != K; }
-  bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const {
-    return is(K1) || is(K2);
-  }
-  template <typename... Ts> bool isOneOf(tok::TokenKind K1, Ts... Ks) const {
-    return is(K1) || isOneOf(Ks...);
+  template <typename... Ts> bool isOneOf(Ts... Ks) const {
+    static_assert(sizeof...(Ts) > 0,
+                  "requires at least one tok::TokenKind specified");
+    return (is(Ks) || ...);
   }
 };
 
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 4d82e20..71b0f8e 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -1847,6 +1847,10 @@ public:
     MacroExpansionInDirectivesOverride = true;
   }
 
+  void SetEnableMacroExpansion() {
+    DisableMacroExpansion = MacroExpansionInDirectivesOverride = false;
+  }
+
   /// Peeks ahead N tokens and returns that token without consuming any
   /// tokens.
   ///
diff --git a/clang/include/clang/Parse/ParseHLSLRootSignature.h b/clang/include/clang/Parse/ParseHLSLRootSignature.h
index ad66a26..a49bdfd 100644
--- a/clang/include/clang/Parse/ParseHLSLRootSignature.h
+++ b/clang/include/clang/Parse/ParseHLSLRootSignature.h
@@ -30,7 +30,6 @@ namespace hlsl {
 class RootSignatureParser {
 public:
   RootSignatureParser(llvm::dxbc::RootSignatureVersion Version,
-                      SmallVector<RootSignatureElement> &Elements,
                       StringLiteral *Signature, Preprocessor &PP);
 
   /// Consumes tokens from the Lexer and constructs the in-memory
@@ -40,6 +39,9 @@ public:
   /// Returns true if a parsing error is encountered.
   bool parse();
 
+  /// Return all elements that have been parsed.
+  ArrayRef<RootSignatureElement> getElements() { return Elements; }
+
 private:
   DiagnosticsEngine &getDiags() { return PP.getDiagnostics(); }
 
@@ -226,7 +228,7 @@ private:
 
 private:
   llvm::dxbc::RootSignatureVersion Version;
-  SmallVector<RootSignatureElement> &Elements;
+  SmallVector<RootSignatureElement> Elements;
   StringLiteral *Signature;
   RootSignatureLexer Lexer;
   Preprocessor &PP;
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index b331acb..423dcf9 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -834,6 +834,13 @@ enum class CCEKind {
                            ///< message.
 };
 
+/// Enums for the diagnostics of target, target_version and target_clones.
+namespace DiagAttrParams {
+enum DiagType { Unsupported, Duplicate, Unknown };
+enum Specifier { None, CPU, Tune };
+enum AttrName { Target, TargetClones, TargetVersion };
+} // end namespace DiagAttrParams
+
 void inferNoReturnAttr(Sema &S, const Decl *D);
 
 /// Sema - This implements semantic analysis and AST building for C.
@@ -4922,13 +4929,6 @@ public:
   // handled later in the process, once we know how many exist.
   bool checkTargetAttr(SourceLocation LiteralLoc, StringRef Str);
 
-  /// Check Target Version attrs
-  bool checkTargetVersionAttr(SourceLocation Loc, Decl *D, StringRef Str);
-  bool checkTargetClonesAttrString(
-      SourceLocation LiteralLoc, StringRef Str, const StringLiteral *Literal,
-      Decl *D, bool &HasDefault, bool &HasCommas, bool &HasNotDefault,
-      SmallVectorImpl<SmallString<64>> &StringsBuffer);
-
   ErrorAttr *mergeErrorAttr(Decl *D, const AttributeCommonInfo &CI,
                             StringRef NewUserDiagnostic);
   FormatAttr *mergeFormatAttr(Decl *D, const AttributeCommonInfo &CI,
@@ -9945,6 +9945,20 @@ private:
 
   VisibleModuleSet VisibleModules;
 
+  /// Whether we had imported any named modules.
+  bool HadImportedNamedModules = false;
+  /// The set of instantiations we need to check if they references TU-local
+  /// entity from TUs. This only makes sense if we imported any named modules.
+  llvm::SmallVector<std::pair<FunctionDecl *, SourceLocation>>
+      PendingCheckReferenceForTULocal;
+  /// Implement [basic.link]p18, which requires that we can't use TU-local
+  /// entities from other TUs (ignoring header units).
+  void checkReferenceToTULocalFromOtherTU(FunctionDecl *FD,
+                                          SourceLocation PointOfInstantiation);
+  /// Implement [basic.link]p17, which diagnose for non TU local exposure in
+  /// module interface or module partition.
+  void checkExposure(const TranslationUnitDecl *TU);
+
   ///@}
 
   //
diff --git a/clang/include/clang/Sema/SemaARM.h b/clang/include/clang/Sema/SemaARM.h
index 788a7ab..104992e 100644
--- a/clang/include/clang/Sema/SemaARM.h
+++ b/clang/include/clang/Sema/SemaARM.h
@@ -44,8 +44,8 @@ public:
 
   bool CheckImmediateArg(CallExpr *TheCall, unsigned CheckTy, unsigned ArgIdx,
                          unsigned EltBitWidth, unsigned VecBitWidth);
-  bool CheckARMBuiltinExclusiveCall(unsigned BuiltinID, CallExpr *TheCall,
-                                    unsigned MaxWidth);
+  bool CheckARMBuiltinExclusiveCall(const TargetInfo &TI, unsigned BuiltinID,
+                                    CallExpr *TheCall);
   bool CheckNeonBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
                                     CallExpr *TheCall);
   bool PerformNeonImmChecks(
@@ -91,6 +91,11 @@ public:
   /// Return true if the given vector types are lax-compatible SVE vector types,
   /// false otherwise.
   bool areLaxCompatibleSveTypes(QualType FirstType, QualType SecondType);
+
+  bool checkTargetVersionAttr(const StringRef Str, const SourceLocation Loc);
+  bool checkTargetClonesAttr(SmallVectorImpl<StringRef> &Params,
+                             SmallVectorImpl<SourceLocation> &Locs,
+                             SmallVectorImpl<SmallString<64>> &NewParams);
 };
 
 SemaARM::ArmStreamingType getArmStreamingFnType(const FunctionDecl *FD);
diff --git a/clang/include/clang/Sema/SemaRISCV.h b/clang/include/clang/Sema/SemaRISCV.h
index 8d2e1c6..844cc3c 100644
--- a/clang/include/clang/Sema/SemaRISCV.h
+++ b/clang/include/clang/Sema/SemaRISCV.h
@@ -55,6 +55,11 @@ public:
   bool DeclareAndesVectorBuiltins = false;
 
   std::unique_ptr<sema::RISCVIntrinsicManager> IntrinsicManager;
+
+  bool checkTargetVersionAttr(const StringRef Param, const SourceLocation Loc);
+  bool checkTargetClonesAttr(SmallVectorImpl<StringRef> &Params,
+                             SmallVectorImpl<SourceLocation> &Locs,
+                             SmallVectorImpl<SmallString<64>> &NewParams);
 };
 
 std::unique_ptr<sema::RISCVIntrinsicManager>
diff --git a/clang/include/clang/Sema/SemaWasm.h b/clang/include/clang/Sema/SemaWasm.h
index 2123e07..8c0639f 100644
--- a/clang/include/clang/Sema/SemaWasm.h
+++ b/clang/include/clang/Sema/SemaWasm.h
@@ -37,6 +37,7 @@ public:
   bool BuiltinWasmTableGrow(CallExpr *TheCall);
   bool BuiltinWasmTableFill(CallExpr *TheCall);
   bool BuiltinWasmTableCopy(CallExpr *TheCall);
+  bool BuiltinWasmTestFunctionPointerSignature(CallExpr *TheCall);
 
   WebAssemblyImportNameAttr *
   mergeImportNameAttr(Decl *D, const WebAssemblyImportNameAttr &AL);
diff --git a/clang/include/clang/Sema/SemaX86.h b/clang/include/clang/Sema/SemaX86.h
index b5a23f1..20783e3 100644
--- a/clang/include/clang/Sema/SemaX86.h
+++ b/clang/include/clang/Sema/SemaX86.h
@@ -37,6 +37,10 @@ public:
 
   void handleAnyInterruptAttr(Decl *D, const ParsedAttr &AL);
   void handleForceAlignArgPointerAttr(Decl *D, const ParsedAttr &AL);
+
+  bool checkTargetClonesAttr(SmallVectorImpl<StringRef> &Params,
+                             SmallVectorImpl<SourceLocation> &Locs,
+                             SmallVectorImpl<SmallString<64>> &NewParams);
 };
 } // namespace clang
 
diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
index 2234143..36196cd 100644
--- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
+++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
@@ -206,21 +206,15 @@ def CallAndMessageChecker : Checker<"CallAndMessage">,
   Documentation<HasDocumentation>,
   Dependencies<[CallAndMessageModeling]>;
 
-def DereferenceModeling : Checker<"DereferenceModeling">,
-  HelpText<"General support for dereference related checkers">,
-  Documentation<NotDocumented>,
-  Hidden;
-
 def FixedAddressDereferenceChecker
     : Checker<"FixedAddressDereference">,
       HelpText<"Check for dereferences of fixed addresses">,
-      Documentation<HasDocumentation>,
-      Dependencies<[DereferenceModeling]>;
+      Documentation<HasDocumentation>;
 
-def NullDereferenceChecker : Checker<"NullDereference">,
-  HelpText<"Check for dereferences of null pointers">,
-  Documentation<HasDocumentation>,
-  Dependencies<[DereferenceModeling]>;
+def NullDereferenceChecker
+    : Checker<"NullDereference">,
+      HelpText<"Check for dereferences of null pointers">,
+      Documentation<HasDocumentation>;
 
 def NonNullParamChecker : Checker<"NonNullParamChecker">,
   HelpText<"Check for null pointers passed as arguments to a function whose "
@@ -239,15 +233,11 @@ def UndefResultChecker : Checker<"UndefinedBinaryOperatorResult">,
   HelpText<"Check for undefined results of binary operators">,
   Documentation<HasDocumentation>;
 
-def StackAddrEscapeBase : Checker<"StackAddrEscapeBase">,
-  HelpText<"Generate information about stack address escapes.">,
-  Documentation<NotDocumented>,
-  Hidden;
-
-def StackAddrEscapeChecker : Checker<"StackAddressEscape">,
-  HelpText<"Check that addresses to stack memory do not escape the function">,
-  Dependencies<[StackAddrEscapeBase]>,
-  Documentation<HasDocumentation>;
+def StackAddrEscapeChecker
+    : Checker<"StackAddressEscape">,
+      HelpText<
+          "Check that addresses to stack memory do not escape the function">,
+      Documentation<HasDocumentation>;
 
 def DynamicTypePropagation : Checker<"DynamicTypePropagation">,
   HelpText<"Generate dynamic type information">,
@@ -301,10 +291,11 @@ def DynamicTypeChecker
       Dependencies<[DynamicTypePropagation]>,
       Documentation<HasDocumentation>;
 
-def StackAddrAsyncEscapeChecker : Checker<"StackAddressAsyncEscape">,
-  HelpText<"Check that addresses to stack memory do not escape the function">,
-  Dependencies<[StackAddrEscapeBase]>,
-  Documentation<HasDocumentation>;
+def StackAddrAsyncEscapeChecker
+    : Checker<"StackAddressAsyncEscape">,
+      HelpText<
+          "Check that addresses to stack memory do not escape the function">,
+      Documentation<HasDocumentation>;
 
 def PthreadLockBase : Checker<"PthreadLockBase">,
   HelpText<"Helper registering multiple checks.">,
@@ -1053,11 +1044,6 @@ def RetainCountBase : Checker<"RetainCountBase">,
 
 let ParentPackage = OSX in {
 
-def NSOrCFErrorDerefChecker : Checker<"NSOrCFErrorDerefChecker">,
-  HelpText<"Implementation checker for NSErrorChecker and CFErrorChecker">,
-  Documentation<NotDocumented>,
-  Hidden;
-
 def NumberObjectConversionChecker : Checker<"NumberObjectConversion">,
   HelpText<"Check for erroneous conversions of objects representing numbers "
            "into numbers">,
@@ -1155,9 +1141,8 @@ def ObjCSuperCallChecker : Checker<"MissingSuperCall">,
   Documentation<NotDocumented>;
 
 def NSErrorChecker : Checker<"NSError">,
-  HelpText<"Check usage of NSError** parameters">,
-  Dependencies<[NSOrCFErrorDerefChecker]>,
-  Documentation<HasDocumentation>;
+                     HelpText<"Check usage of NSError** parameters">,
+                     Documentation<HasDocumentation>;
 
 def RetainCountChecker : Checker<"RetainCount">,
   HelpText<"Check for leaks and improper reference count management">,
@@ -1259,9 +1244,8 @@ def CFRetainReleaseChecker : Checker<"CFRetainRelease">,
   Documentation<HasDocumentation>;
 
 def CFErrorChecker : Checker<"CFError">,
-  HelpText<"Check usage of CFErrorRef* parameters">,
-  Dependencies<[NSOrCFErrorDerefChecker]>,
-  Documentation<HasDocumentation>;
+                     HelpText<"Check usage of CFErrorRef* parameters">,
+                     Documentation<HasDocumentation>;
 
 } // end "osx.coreFoundation"
 
@@ -1656,8 +1640,8 @@ def CloneChecker : Checker<"CloneChecker">,
 let ParentPackage = PortabilityOptIn in {
 
 def UnixAPIPortabilityChecker : Checker<"UnixAPI">,
-  HelpText<"Finds implementation-defined behavior in UNIX/Posix functions">,
-  Documentation<NotDocumented>;
+  HelpText<"Finds dynamic memory allocation with size zero">,
+  Documentation<HasDocumentation>;
 
 } // end optin.portability
 
diff --git a/clang/include/clang/StaticAnalyzer/Frontend/CheckerRegistry.h b/clang/include/clang/StaticAnalyzer/Frontend/CheckerRegistry.h
index 43dbfb1..da3efd7 100644
--- a/clang/include/clang/StaticAnalyzer/Frontend/CheckerRegistry.h
+++ b/clang/include/clang/StaticAnalyzer/Frontend/CheckerRegistry.h
@@ -38,29 +38,29 @@
 // function clang_registerCheckers. For example:
 //
 //    extern "C"
-//    void clang_registerCheckers (CheckerRegistry &registry) {
-//      registry.addChecker<MainCallChecker>("example.MainCallChecker",
-//        "Disallows calls to functions called main");
+//    void clang_registerCheckers(CheckerRegistry &Registry) {
+//      Registry.addChecker<MainCallChecker>(
+//                    "example.MainCallChecker",
+//                    "Disallows calls to functions called main");
 //    }
 //
-// The first method argument is the full name of the checker, including its
-// enclosing package. By convention, the registered name of a checker is the
-// name of the associated class (the template argument).
-// The second method argument is a short human-readable description of the
-// checker.
+// The first argument of this templated method is the full name of the checker
+// (including its package), while the second argument is a short description
+// that is printed by `-analyzer-checker-help`.
 //
-// The clang_registerCheckers function may add any number of checkers to the
-// registry. If any checkers require additional initialization, use the three-
-// argument form of CheckerRegistry::addChecker.
+// A plugin may register several separate checkers by calling `addChecker()`
+// multiple times. If a checker requires custom registration functions (e.g.
+// checker option handling) use the non-templated overload of `addChecker` that
+// takes two callback functions as the first two parameters.
 //
 // To load a checker plugin, specify the full path to the dynamic library as
 // the argument to the -load option in the cc1 frontend. You can then enable
 // your custom checker using the -analyzer-checker:
 //
-//   clang -cc1 -load </path/to/plugin.dylib> -analyze
-//     -analyzer-checker=<example.MainCallChecker>
+//   clang -cc1 -load /path/to/plugin.dylib -analyze
+//     -analyzer-checker=example.MainCallChecker
 //
-// For a complete working example, see examples/analyzer-plugin.
+// For complete examples, see clang/lib/Analysis/plugins/SampleAnalyzer
 
 #ifndef CLANG_ANALYZER_API_VERSION_STRING
 // FIXME: The Clang version string is not particularly granular;
@@ -108,30 +108,25 @@ private:
     mgr.template registerChecker<T>();
   }
 
-  template <typename T> static bool returnTrue(const CheckerManager &mgr) {
-    return true;
-  }
+  static bool returnTrue(const CheckerManager &) { return true; }
 
 public:
-  /// Adds a checker to the registry. Use this non-templated overload when your
-  /// checker requires custom initialization.
-  void addChecker(RegisterCheckerFn Fn, ShouldRegisterFunction sfn,
-                  StringRef FullName, StringRef Desc, StringRef DocsUri,
-                  bool IsHidden);
-
-  /// Adds a checker to the registry. Use this templated overload when your
-  /// checker does not require any custom initialization.
-  /// This function isn't really needed and probably causes more headaches than
-  /// the tiny convenience that it provides, but external plugins might use it,
-  /// and there isn't a strong incentive to remove it.
+  /// Adds a checker to the registry.
+  /// Use this for a checker defined in a plugin if it requires custom
+  /// registration functions (e.g. for handling checker options).
+  /// NOTE: As of now `DocsUri` is never queried from the checker registry.
+  void addChecker(RegisterCheckerFn Fn, ShouldRegisterFunction Sfn,
+                  StringRef FullName, StringRef Desc,
+                  StringRef DocsUri = "NoDocsUri", bool IsHidden = false);
+
+  /// Adds a checker to the registry.
+  /// Use this for a checker defined in a plugin if it doesn't require custom
+  /// registration functions.
   template <class T>
-  void addChecker(StringRef FullName, StringRef Desc, StringRef DocsUri,
-                  bool IsHidden = false) {
-    // Avoid MSVC's Compiler Error C2276:
-    // http://msdn.microsoft.com/en-us/library/850cstw1(v=VS.80).aspx
+  void addChecker(StringRef FullName, StringRef Desc,
+                  StringRef DocsUri = "NoDocsUri", bool IsHidden = false) {
     addChecker(&CheckerRegistry::initializeManager<CheckerManager, T>,
-               &CheckerRegistry::returnTrue<T>, FullName, Desc, DocsUri,
-               IsHidden);
+               &CheckerRegistry::returnTrue, FullName, Desc, DocsUri, IsHidden);
   }
 
   /// Makes the checker with the full name \p fullName depend on the checker
diff --git a/clang/include/clang/Tooling/Inclusions/IncludeStyle.h b/clang/include/clang/Tooling/Inclusions/IncludeStyle.h
index fba90d8..bf06061 100644
--- a/clang/include/clang/Tooling/Inclusions/IncludeStyle.h
+++ b/clang/include/clang/Tooling/Inclusions/IncludeStyle.h
@@ -126,8 +126,8 @@ struct IncludeStyle {
   /// * ``""`` means "arbitrary suffix"
   /// * ``"$"`` means "no suffix"
   ///
-  /// For example, if configured to ``"(_test)?$"``, then a header a.h would be seen
-  /// as the "main" include in both a.cc and a_test.cc.
+  /// For example, if configured to ``"(_test)?$"``, then a header a.h would be
+  /// seen as the "main" include in both a.cc and a_test.cc.
   /// \version 3.9
   std::string IncludeIsMainRegex;
 
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 6b6275f..16cf114 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -940,7 +940,6 @@ ASTContext::ASTContext(LangOptions &LOpts, SourceManager &SM,
       FunctionProtoTypes(this_(), FunctionProtoTypesLog2InitSize),
       DependentTypeOfExprTypes(this_()), DependentDecltypeTypes(this_()),
       DependentPackIndexingTypes(this_()), TemplateSpecializationTypes(this_()),
-      DependentTemplateSpecializationTypes(this_()),
       DependentBitIntTypes(this_()), SubstTemplateTemplateParmPacks(this_()),
       DeducedTemplates(this_()), ArrayParameterTypes(this_()),
       CanonTemplateTemplateParms(this_()), SourceMgr(SM), LangOpts(LOpts),
@@ -5979,10 +5978,9 @@ QualType ASTContext::getDependentTemplateSpecializationType(
   llvm::FoldingSetNodeID ID;
   DependentTemplateSpecializationType::Profile(ID, *this, Keyword, Name, Args);
 
-  void *InsertPos = nullptr;
-  if (auto *T = DependentTemplateSpecializationTypes.FindNodeOrInsertPos(
-          ID, InsertPos))
-    return QualType(T, 0);
+  if (auto const T_iter = DependentTemplateSpecializationTypes.find(ID);
+      T_iter != DependentTemplateSpecializationTypes.end())
+    return QualType(T_iter->getSecond(), 0);
 
   NestedNameSpecifier *NNS = Name.getQualifier();
 
@@ -6001,11 +5999,6 @@ QualType ASTContext::getDependentTemplateSpecializationType(
           CanonKeyword, {CanonNNS, Name.getName(), /*HasTemplateKeyword=*/true},
           CanonArgs,
           /*IsCanonical=*/true);
-      // Find the insert position again.
-      [[maybe_unused]] auto *Nothing =
-          DependentTemplateSpecializationTypes.FindNodeOrInsertPos(ID,
-                                                                   InsertPos);
-      assert(!Nothing && "canonical type broken");
     }
   } else {
     assert(Keyword == getCanonicalElaboratedTypeKeyword(Keyword));
@@ -6021,8 +6014,13 @@ QualType ASTContext::getDependentTemplateSpecializationType(
                        alignof(DependentTemplateSpecializationType));
   auto *T =
       new (Mem) DependentTemplateSpecializationType(Keyword, Name, Args, Canon);
+#ifndef NDEBUG
+  llvm::FoldingSetNodeID InsertedID;
+  T->Profile(InsertedID, *this);
+  assert(InsertedID == ID && "ID does not match");
+#endif
   Types.push_back(T);
-  DependentTemplateSpecializationTypes.InsertNode(T, InsertPos);
+  DependentTemplateSpecializationTypes.try_emplace(ID, T);
   return QualType(T, 0);
 }
 
diff --git a/clang/lib/AST/ASTStructuralEquivalence.cpp b/clang/lib/AST/ASTStructuralEquivalence.cpp
index 0f2762d..22bb4cb 100644
--- a/clang/lib/AST/ASTStructuralEquivalence.cpp
+++ b/clang/lib/AST/ASTStructuralEquivalence.cpp
@@ -456,7 +456,9 @@ CheckStructurallyEquivalentAttributes(StructuralEquivalenceContext &Context,
                                       const Decl *D1, const Decl *D2,
                                       const Decl *PrimaryDecl = nullptr) {
   // If either declaration has an attribute on it, we treat the declarations
-  // as not being structurally equivalent.
+  // as not being structurally equivalent unless both declarations are implicit
+  // (ones generated by the compiler like __NSConstantString_tag).
+  //
   // FIXME: this should be handled on a case-by-case basis via tablegen in
   // Attr.td. There are multiple cases to consider: one declaration with the
   // attribute, another without it; different attribute syntax|spellings for
@@ -468,7 +470,7 @@ CheckStructurallyEquivalentAttributes(StructuralEquivalenceContext &Context,
     D1Attr = *D1->getAttrs().begin();
   if (D2->hasAttrs())
     D2Attr = *D2->getAttrs().begin();
-  if (D1Attr || D2Attr) {
+  if ((D1Attr || D2Attr) && !D1->isImplicit() && !D2->isImplicit()) {
     const auto *DiagnoseDecl = cast<TypeDecl>(PrimaryDecl ? PrimaryDecl : D2);
     Context.Diag2(DiagnoseDecl->getLocation(),
                   diag::warn_odr_tag_type_with_attributes)
@@ -870,7 +872,27 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
     else if (T1->getTypeClass() == Type::FunctionNoProto &&
              T2->getTypeClass() == Type::FunctionProto)
       TC = Type::FunctionNoProto;
-    else
+    else if (Context.LangOpts.C23 && !Context.StrictTypeSpelling &&
+             (T1->getTypeClass() == Type::Enum ||
+              T2->getTypeClass() == Type::Enum)) {
+      // In C23, if not being strict about token equivalence, we need to handle
+      // the case where one type is an enumeration and the other type is an
+      // integral type.
+      //
+      // C23 6.7.3.3p16: The enumerated type is compatible with the underlying
+      // type of the enumeration.
+      //
+      // Treat the enumeration as its underlying type and use the builtin type
+      // class comparison.
+      if (T1->getTypeClass() == Type::Enum) {
+        T1 = T1->getAs<EnumType>()->getDecl()->getIntegerType();
+        assert(T2->isBuiltinType() && !T1.isNull()); // Sanity check
+      } else if (T2->getTypeClass() == Type::Enum) {
+        T2 = T2->getAs<EnumType>()->getDecl()->getIntegerType();
+        assert(T1->isBuiltinType() && !T2.isNull()); // Sanity check
+      }
+      TC = Type::Builtin;
+    } else
       return false;
   }
 
@@ -2071,6 +2093,48 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
       !CheckStructurallyEquivalentAttributes(Context, D1, D2))
     return false;
 
+  // In C23, if one enumeration has a fixed underlying type, the other shall
+  // have a compatible fixed underlying type (6.2.7).
+  if (Context.LangOpts.C23) {
+    if (D1->isFixed() != D2->isFixed()) {
+      if (Context.Complain) {
+        Context.Diag2(D2->getLocation(),
+                      Context.getApplicableDiagnostic(
+                          diag::err_odr_tag_type_inconsistent))
+            << Context.ToCtx.getTypeDeclType(D2)
+            << (&Context.FromCtx != &Context.ToCtx);
+        Context.Diag1(D1->getLocation(),
+                      D1->isFixed()
+                          ? diag::note_odr_fixed_underlying_type
+                          : diag::note_odr_missing_fixed_underlying_type)
+            << D1;
+        Context.Diag2(D2->getLocation(),
+                      D2->isFixed()
+                          ? diag::note_odr_fixed_underlying_type
+                          : diag::note_odr_missing_fixed_underlying_type)
+            << D2;
+      }
+      return false;
+    }
+    if (D1->isFixed()) {
+      assert(D2->isFixed() && "enums expected to have fixed underlying types");
+      if (!IsStructurallyEquivalent(Context, D1->getIntegerType(),
+                                    D2->getIntegerType())) {
+        if (Context.Complain) {
+          Context.Diag2(D2->getLocation(),
+                        Context.getApplicableDiagnostic(
+                            diag::err_odr_tag_type_inconsistent))
+              << Context.ToCtx.getTypeDeclType(D2)
+              << (&Context.FromCtx != &Context.ToCtx);
+          Context.Diag2(D2->getLocation(),
+                        diag::note_odr_incompatible_fixed_underlying_type)
+              << D2 << D2->getIntegerType() << D1->getIntegerType();
+        }
+        return false;
+      }
+    }
+  }
+
   llvm::SmallVector<const EnumConstantDecl *, 8> D1Enums, D2Enums;
   auto CopyEnumerators =
       [](auto &&Range, llvm::SmallVectorImpl<const EnumConstantDecl *> &Cont) {
diff --git a/clang/lib/AST/ByteCode/ByteCodeEmitter.cpp b/clang/lib/AST/ByteCode/ByteCodeEmitter.cpp
index 965e235..3288585 100644
--- a/clang/lib/AST/ByteCode/ByteCodeEmitter.cpp
+++ b/clang/lib/AST/ByteCode/ByteCodeEmitter.cpp
@@ -62,7 +62,7 @@ void ByteCodeEmitter::compileFunc(const FunctionDecl *FuncDecl,
                   (Func->hasThisPointer() && !Func->isThisPointerExplicit());
   for (auto ParamOffset : llvm::drop_begin(Func->ParamOffsets, Drop)) {
     const ParmVarDecl *PD = FuncDecl->parameters()[ParamIndex];
-    std::optional<PrimType> T = Ctx.classify(PD->getType());
+    OptPrimType T = Ctx.classify(PD->getType());
     this->Params.insert({PD, {ParamOffset, T != std::nullopt}});
     ++ParamIndex;
   }
diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp
index 952a43a..8b9e5e0 100644
--- a/clang/lib/AST/ByteCode/Compiler.cpp
+++ b/clang/lib/AST/ByteCode/Compiler.cpp
@@ -25,34 +25,6 @@ using APSInt = llvm::APSInt;
 namespace clang {
 namespace interp {
 
-static bool refersToUnion(const Expr *E) {
-  for (;;) {
-    if (const auto *ME = dyn_cast<MemberExpr>(E)) {
-      if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl());
-          FD && FD->getParent()->isUnion())
-        return true;
-      E = ME->getBase();
-      continue;
-    }
-
-    if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E)) {
-      E = ASE->getBase()->IgnoreImplicit();
-      continue;
-    }
-
-    if (const auto *ICE = dyn_cast<ImplicitCastExpr>(E);
-        ICE && (ICE->getCastKind() == CK_NoOp ||
-                ICE->getCastKind() == CK_DerivedToBase ||
-                ICE->getCastKind() == CK_UncheckedDerivedToBase)) {
-      E = ICE->getSubExpr();
-      continue;
-    }
-
-    break;
-  }
-  return false;
-}
-
 static std::optional<bool> getBoolValue(const Expr *E) {
   if (const auto *CE = dyn_cast_if_present<ConstantExpr>(E);
       CE && CE->hasAPValueResult() &&
@@ -134,25 +106,14 @@ bool InitLink::emit(Compiler<Emitter> *Ctx, const Expr *E) const {
   return true;
 }
 
-/// Scope managing label targets.
-template <class Emitter> class LabelScope {
-public:
-  virtual ~LabelScope() {}
-
-protected:
-  LabelScope(Compiler<Emitter> *Ctx) : Ctx(Ctx) {}
-  /// Compiler instance.
-  Compiler<Emitter> *Ctx;
-};
-
 /// Sets the context for break/continue statements.
-template <class Emitter> class LoopScope final : public LabelScope<Emitter> {
+template <class Emitter> class LoopScope final {
 public:
   using LabelTy = typename Compiler<Emitter>::LabelTy;
   using OptLabelTy = typename Compiler<Emitter>::OptLabelTy;
 
   LoopScope(Compiler<Emitter> *Ctx, LabelTy BreakLabel, LabelTy ContinueLabel)
-      : LabelScope<Emitter>(Ctx), OldBreakLabel(Ctx->BreakLabel),
+      : Ctx(Ctx), OldBreakLabel(Ctx->BreakLabel),
         OldContinueLabel(Ctx->ContinueLabel),
         OldBreakVarScope(Ctx->BreakVarScope),
         OldContinueVarScope(Ctx->ContinueVarScope) {
@@ -170,6 +131,7 @@ public:
   }
 
 private:
+  Compiler<Emitter> *Ctx;
   OptLabelTy OldBreakLabel;
   OptLabelTy OldContinueLabel;
   VariableScope<Emitter> *OldBreakVarScope;
@@ -177,7 +139,7 @@ private:
 };
 
 // Sets the context for a switch scope, mapping labels.
-template <class Emitter> class SwitchScope final : public LabelScope<Emitter> {
+template <class Emitter> class SwitchScope final {
 public:
   using LabelTy = typename Compiler<Emitter>::LabelTy;
   using OptLabelTy = typename Compiler<Emitter>::OptLabelTy;
@@ -185,7 +147,7 @@ public:
 
   SwitchScope(Compiler<Emitter> *Ctx, CaseMap &&CaseLabels, LabelTy BreakLabel,
               OptLabelTy DefaultLabel)
-      : LabelScope<Emitter>(Ctx), OldBreakLabel(Ctx->BreakLabel),
+      : Ctx(Ctx), OldBreakLabel(Ctx->BreakLabel),
         OldDefaultLabel(this->Ctx->DefaultLabel),
         OldCaseLabels(std::move(this->Ctx->CaseLabels)),
         OldLabelVarScope(Ctx->BreakVarScope) {
@@ -203,6 +165,7 @@ public:
   }
 
 private:
+  Compiler<Emitter> *Ctx;
   OptLabelTy OldBreakLabel;
   OptLabelTy OldDefaultLabel;
   CaseMap OldCaseLabels;
@@ -237,7 +200,7 @@ bool Compiler<Emitter>::VisitCastExpr(const CastExpr *CE) {
     if (SubExpr->getType().isVolatileQualified())
       return this->emitInvalidCast(CastKind::Volatile, /*Fatal=*/true, CE);
 
-    std::optional<PrimType> SubExprT = classify(SubExpr->getType());
+    OptPrimType SubExprT = classify(SubExpr->getType());
     // Prepare storage for the result.
     if (!Initializing && !SubExprT) {
       std::optional<unsigned> LocalIndex = allocateLocal(SubExpr);
@@ -388,7 +351,7 @@ bool Compiler<Emitter>::VisitCastExpr(const CastExpr *CE) {
     const Descriptor *Desc = nullptr;
     const QualType PointeeType = CE->getType()->getPointeeType();
     if (!PointeeType.isNull()) {
-      if (std::optional<PrimType> T = classify(PointeeType))
+      if (OptPrimType T = classify(PointeeType))
         Desc = P.createDescriptor(SubExpr, *T);
       else
         Desc = P.createDescriptor(SubExpr, PointeeType.getTypePtr(),
@@ -436,7 +399,7 @@ bool Compiler<Emitter>::VisitCastExpr(const CastExpr *CE) {
     PrimType T = classifyPrim(IntType);
     QualType PtrType = CE->getType();
     const Descriptor *Desc;
-    if (std::optional<PrimType> T = classify(PtrType->getPointeeType()))
+    if (OptPrimType T = classify(PtrType->getPointeeType()))
       Desc = P.createDescriptor(SubExpr, *T);
     else if (PtrType->getPointeeType()->isVoidType())
       Desc = nullptr;
@@ -473,25 +436,29 @@ bool Compiler<Emitter>::VisitCastExpr(const CastExpr *CE) {
       return this->emitInvalidCast(CastKind::Reinterpret, /*Fatal=*/true, CE);
     }
     QualType SubExprTy = SubExpr->getType();
-    std::optional<PrimType> FromT = classify(SubExprTy);
+    OptPrimType FromT = classify(SubExprTy);
     // Casts from integer/vector to vector.
     if (CE->getType()->isVectorType())
       return this->emitBuiltinBitCast(CE);
 
-    std::optional<PrimType> ToT = classify(CE->getType());
+    OptPrimType ToT = classify(CE->getType());
     if (!FromT || !ToT)
       return false;
 
     assert(isPtrType(*FromT));
     assert(isPtrType(*ToT));
     if (FromT == ToT) {
-      if (CE->getType()->isVoidPointerType())
+      if (CE->getType()->isVoidPointerType() &&
+          !SubExprTy->isFunctionPointerType()) {
         return this->delegate(SubExpr);
+      }
 
       if (!this->visit(SubExpr))
         return false;
-      if (CE->getType()->isFunctionPointerType())
-        return true;
+      if (CE->getType()->isFunctionPointerType() ||
+          SubExprTy->isFunctionPointerType()) {
+        return this->emitFnPtrCast(CE);
+      }
       if (FromT == PT_Ptr)
         return this->emitPtrPtrCast(SubExprTy->isVoidPointerType(), CE);
       return true;
@@ -504,7 +471,7 @@ bool Compiler<Emitter>::VisitCastExpr(const CastExpr *CE) {
   case CK_IntegralToBoolean:
   case CK_FixedPointToBoolean: {
     // HLSL uses this to cast to one-element vectors.
-    std::optional<PrimType> FromT = classify(SubExpr->getType());
+    OptPrimType FromT = classify(SubExpr->getType());
     if (!FromT)
       return false;
 
@@ -517,8 +484,8 @@ bool Compiler<Emitter>::VisitCastExpr(const CastExpr *CE) {
 
   case CK_BooleanToSignedIntegral:
   case CK_IntegralCast: {
-    std::optional<PrimType> FromT = classify(SubExpr->getType());
-    std::optional<PrimType> ToT = classify(CE->getType());
+    OptPrimType FromT = classify(SubExpr->getType());
+    OptPrimType ToT = classify(CE->getType());
     if (!FromT || !ToT)
       return false;
 
@@ -688,7 +655,7 @@ bool Compiler<Emitter>::VisitCastExpr(const CastExpr *CE) {
 
   case CK_HLSLVectorTruncation: {
     assert(SubExpr->getType()->isVectorType());
-    if (std::optional<PrimType> ResultT = classify(CE)) {
+    if (OptPrimType ResultT = classify(CE)) {
       assert(!DiscardResult);
       // Result must be either a float or integer. Take the first element.
       if (!this->visit(SubExpr))
@@ -872,9 +839,9 @@ bool Compiler<Emitter>::VisitBinaryOperator(const BinaryOperator *BO) {
   }
 
   // Typecheck the args.
-  std::optional<PrimType> LT = classify(LHS);
-  std::optional<PrimType> RT = classify(RHS);
-  std::optional<PrimType> T = classify(BO->getType());
+  OptPrimType LT = classify(LHS);
+  OptPrimType RT = classify(RHS);
+  OptPrimType T = classify(BO->getType());
 
   // Special case for C++'s three-way/spaceship operator <=>, which
   // returns a std::{strong,weak,partial}_ordering (which is a class, so doesn't
@@ -995,8 +962,8 @@ bool Compiler<Emitter>::VisitPointerArithBinOp(const BinaryOperator *E) {
       (!LHS->getType()->isPointerType() && !RHS->getType()->isPointerType()))
     return false;
 
-  std::optional<PrimType> LT = classify(LHS);
-  std::optional<PrimType> RT = classify(RHS);
+  OptPrimType LT = classify(LHS);
+  OptPrimType RT = classify(RHS);
 
   if (!LT || !RT)
     return false;
@@ -1050,7 +1017,8 @@ bool Compiler<Emitter>::VisitPointerArithBinOp(const BinaryOperator *E) {
     if (classifyPrim(E) != PT_Ptr)
       return this->emitDecayPtr(PT_Ptr, classifyPrim(E), E);
     return true;
-  } else if (Op == BO_Sub) {
+  }
+  if (Op == BO_Sub) {
     if (!this->emitSubOffset(OffsetType, E))
       return false;
 
@@ -1068,7 +1036,7 @@ bool Compiler<Emitter>::VisitLogicalBinOp(const BinaryOperator *E) {
   BinaryOperatorKind Op = E->getOpcode();
   const Expr *LHS = E->getLHS();
   const Expr *RHS = E->getRHS();
-  std::optional<PrimType> T = classify(E->getType());
+  OptPrimType T = classify(E->getType());
 
   if (Op == BO_LOr) {
     // Logical OR. Visit LHS and only evaluate RHS if LHS was FALSE.
@@ -1648,7 +1616,7 @@ bool Compiler<Emitter>::VisitImplicitValueInitExpr(
     const ImplicitValueInitExpr *E) {
   QualType QT = E->getType();
 
-  if (std::optional<PrimType> T = classify(QT))
+  if (OptPrimType T = classify(QT))
     return this->visitZeroInitializer(*T, QT, E);
 
   if (QT->isRecordType()) {
@@ -1734,7 +1702,7 @@ bool Compiler<Emitter>::VisitArraySubscriptExpr(const ArraySubscriptExpr *E) {
   if (!Success)
     return false;
 
-  std::optional<PrimType> IndexT = classify(Index->getType());
+  OptPrimType IndexT = classify(Index->getType());
   // In error-recovery cases, the index expression has a dependent type.
   if (!IndexT)
     return this->emitError(E);
@@ -1776,7 +1744,7 @@ bool Compiler<Emitter>::visitInitList(ArrayRef<const Expr *> Inits,
   }
 
   // Primitive values.
-  if (std::optional<PrimType> T = classify(QT)) {
+  if (OptPrimType T = classify(QT)) {
     assert(!DiscardResult);
     if (Inits.size() == 0)
       return this->visitZeroInitializer(*T, QT, E);
@@ -1790,6 +1758,9 @@ bool Compiler<Emitter>::visitInitList(ArrayRef<const Expr *> Inits,
     if (Inits.size() == 1 && E->getType() == Inits[0]->getType())
       return this->delegate(Inits[0]);
 
+    if (!R)
+      return false;
+
     auto initPrimitiveField = [=](const Record::Field *FieldToInit,
                                   const Expr *Init, PrimType T,
                                   bool Activate = false) -> bool {
@@ -1840,7 +1811,7 @@ bool Compiler<Emitter>::visitInitList(ArrayRef<const Expr *> Inits,
           FToInit = cast<CXXParenListInitExpr>(E)->getInitializedFieldInUnion();
 
         const Record::Field *FieldToInit = R->getField(FToInit);
-        if (std::optional<PrimType> T = classify(Init)) {
+        if (OptPrimType T = classify(Init)) {
           if (!initPrimitiveField(FieldToInit, Init, *T, /*Activate=*/true))
             return false;
         } else {
@@ -1859,7 +1830,7 @@ bool Compiler<Emitter>::visitInitList(ArrayRef<const Expr *> Inits,
              R->getField(InitIndex)->isUnnamedBitField())
         ++InitIndex;
 
-      if (std::optional<PrimType> T = classify(Init)) {
+      if (OptPrimType T = classify(Init)) {
         const Record::Field *FieldToInit = R->getField(InitIndex);
         if (!initPrimitiveField(FieldToInit, Init, *T))
           return false;
@@ -1899,7 +1870,7 @@ bool Compiler<Emitter>::visitInitList(ArrayRef<const Expr *> Inits,
     if (!this->emitCheckArraySize(NumElems, E))
       return false;
 
-    std::optional<PrimType> InitT = classify(CAT->getElementType());
+    OptPrimType InitT = classify(CAT->getElementType());
     unsigned ElementIndex = 0;
     for (const Expr *Init : Inits) {
       if (const auto *EmbedS =
@@ -2013,7 +1984,7 @@ bool Compiler<Emitter>::visitInitList(ArrayRef<const Expr *> Inits,
 /// this.
 template <class Emitter>
 bool Compiler<Emitter>::visitArrayElemInit(unsigned ElemIndex, const Expr *Init,
-                                           std::optional<PrimType> InitT) {
+                                           OptPrimType InitT) {
   if (InitT) {
     // Visit the primitive element like normal.
     if (!this->visit(Init))
@@ -2042,7 +2013,7 @@ bool Compiler<Emitter>::visitCallArgs(ArrayRef<const Expr *> Args,
 
   unsigned ArgIndex = 0;
   for (const Expr *Arg : Args) {
-    if (std::optional<PrimType> T = classify(Arg)) {
+    if (OptPrimType T = classify(Arg)) {
       if (!this->visit(Arg))
         return false;
     } else {
@@ -2097,7 +2068,7 @@ bool Compiler<Emitter>::VisitSubstNonTypeTemplateParmExpr(
 
 template <class Emitter>
 bool Compiler<Emitter>::VisitConstantExpr(const ConstantExpr *E) {
-  std::optional<PrimType> T = classify(E->getType());
+  OptPrimType T = classify(E->getType());
   if (T && E->hasAPValueResult()) {
     // Try to emit the APValue directly, without visiting the subexpr.
     // This will only fail if we can't emit the APValue, so won't emit any
@@ -2292,7 +2263,7 @@ bool Compiler<Emitter>::VisitMemberExpr(const MemberExpr *E) {
   const auto maybeLoadValue = [&]() -> bool {
     if (E->isGLValue())
       return true;
-    if (std::optional<PrimType> T = classify(E))
+    if (OptPrimType T = classify(E))
       return this->emitLoadPop(*T, E);
     return false;
   };
@@ -2357,7 +2328,7 @@ bool Compiler<Emitter>::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *E) {
   //   Investigate compiling this to a loop.
   const Expr *SubExpr = E->getSubExpr();
   size_t Size = E->getArraySize().getZExtValue();
-  std::optional<PrimType> SubExprT = classify(SubExpr);
+  OptPrimType SubExprT = classify(SubExpr);
 
   // So, every iteration, we execute an assignment here
   // where the LHS is on the stack (the target array)
@@ -2589,8 +2560,8 @@ bool Compiler<Emitter>::VisitFloatCompoundAssignOperator(
   QualType LHSType = LHS->getType();
   QualType LHSComputationType = E->getComputationLHSType();
   QualType ResultType = E->getComputationResultType();
-  std::optional<PrimType> LT = classify(LHSComputationType);
-  std::optional<PrimType> RT = classify(ResultType);
+  OptPrimType LT = classify(LHSComputationType);
+  OptPrimType RT = classify(ResultType);
 
   assert(ResultType->isFloatingType());
 
@@ -2659,8 +2630,8 @@ bool Compiler<Emitter>::VisitPointerCompoundAssignOperator(
   BinaryOperatorKind Op = E->getOpcode();
   const Expr *LHS = E->getLHS();
   const Expr *RHS = E->getRHS();
-  std::optional<PrimType> LT = classify(LHS->getType());
-  std::optional<PrimType> RT = classify(RHS->getType());
+  OptPrimType LT = classify(LHS->getType());
+  OptPrimType RT = classify(RHS->getType());
 
   if (Op != BO_AddAssign && Op != BO_SubAssign)
     return false;
@@ -2698,11 +2669,10 @@ bool Compiler<Emitter>::VisitCompoundAssignOperator(
 
   const Expr *LHS = E->getLHS();
   const Expr *RHS = E->getRHS();
-  std::optional<PrimType> LHSComputationT =
-      classify(E->getComputationLHSType());
-  std::optional<PrimType> LT = classify(LHS->getType());
-  std::optional<PrimType> RT = classify(RHS->getType());
-  std::optional<PrimType> ResultT = classify(E->getType());
+  OptPrimType LHSComputationT = classify(E->getComputationLHSType());
+  OptPrimType LT = classify(LHS->getType());
+  OptPrimType RT = classify(RHS->getType());
+  OptPrimType ResultT = classify(E->getType());
 
   if (!Ctx.getLangOpts().CPlusPlus14)
     return this->visit(RHS) && this->visit(LHS) && this->emitError(E);
@@ -2837,7 +2807,7 @@ bool Compiler<Emitter>::VisitMaterializeTemporaryExpr(
 
   // When we're initializing a global variable *or* the storage duration of
   // the temporary is explicitly static, create a global variable.
-  std::optional<PrimType> SubExprT = classify(SubExpr);
+  OptPrimType SubExprT = classify(SubExpr);
   bool IsStatic = E->getStorageDuration() == SD_Static;
   if (IsStatic) {
     std::optional<unsigned> GlobalIndex = P.createGlobal(E);
@@ -2931,7 +2901,7 @@ bool Compiler<Emitter>::VisitCompoundLiteralExpr(const CompoundLiteralExpr *E) {
     return this->visitInitializer(Init) && this->emitFinishInit(E);
   }
 
-  std::optional<PrimType> T = classify(E->getType());
+  OptPrimType T = classify(E->getType());
   if (E->isFileScope()) {
     // Avoid creating a variable if this is a primitive RValue anyway.
     if (T && !E->isLValue())
@@ -3014,7 +2984,7 @@ bool Compiler<Emitter>::VisitLambdaExpr(const LambdaExpr *E) {
       continue;
     ++CaptureInitIt;
 
-    if (std::optional<PrimType> T = classify(Init)) {
+    if (OptPrimType T = classify(Init)) {
       if (!this->visit(Init))
         return false;
 
@@ -3061,21 +3031,21 @@ bool Compiler<Emitter>::VisitCXXReinterpretCastExpr(
     const CXXReinterpretCastExpr *E) {
   const Expr *SubExpr = E->getSubExpr();
 
-  std::optional<PrimType> FromT = classify(SubExpr);
-  std::optional<PrimType> ToT = classify(E);
+  OptPrimType FromT = classify(SubExpr);
+  OptPrimType ToT = classify(E);
 
   if (!FromT || !ToT)
     return this->emitInvalidCast(CastKind::Reinterpret, /*Fatal=*/true, E);
 
   if (FromT == PT_Ptr || ToT == PT_Ptr) {
     // Both types could be PT_Ptr because their expressions are glvalues.
-    std::optional<PrimType> PointeeFromT;
+    OptPrimType PointeeFromT;
     if (SubExpr->getType()->isPointerOrReferenceType())
       PointeeFromT = classify(SubExpr->getType()->getPointeeType());
     else
       PointeeFromT = classify(SubExpr->getType());
 
-    std::optional<PrimType> PointeeToT;
+    OptPrimType PointeeToT;
     if (E->getType()->isPointerOrReferenceType())
       PointeeToT = classify(E->getType()->getPointeeType());
     else
@@ -3344,7 +3314,7 @@ bool Compiler<Emitter>::VisitCXXScalarValueInitExpr(
   if (DiscardResult || Ty->isVoidType())
     return true;
 
-  if (std::optional<PrimType> T = classify(Ty))
+  if (OptPrimType T = classify(Ty))
     return this->visitZeroInitializer(*T, Ty, E);
 
   if (const auto *CT = Ty->getAs<ComplexType>()) {
@@ -3457,7 +3427,7 @@ bool Compiler<Emitter>::VisitCXXNewExpr(const CXXNewExpr *E) {
   assert(classifyPrim(E->getType()) == PT_Ptr);
   const Expr *Init = E->getInitializer();
   QualType ElementType = E->getAllocatedType();
-  std::optional<PrimType> ElemT = classify(ElementType);
+  OptPrimType ElemT = classify(ElementType);
   unsigned PlacementArgs = E->getNumPlacementArgs();
   const FunctionDecl *OperatorNew = E->getOperatorNew();
   const Expr *PlacementDest = nullptr;
@@ -3645,7 +3615,7 @@ bool Compiler<Emitter>::VisitCXXNewExpr(const CXXNewExpr *E) {
           if (!this->emitStorePop(InitT, E))
             return false;
         } else if (DynamicInit) {
-          if (std::optional<PrimType> InitT = classify(DynamicInit)) {
+          if (OptPrimType InitT = classify(DynamicInit)) {
             if (!this->visit(DynamicInit))
               return false;
             if (!this->emitStorePop(*InitT, E))
@@ -3732,7 +3702,7 @@ bool Compiler<Emitter>::VisitBlockExpr(const BlockExpr *E) {
     return true;
 
   const Function *Func = nullptr;
-  if (auto F = Ctx.getOrCreateObjCBlock(E))
+  if (const Function *F = Ctx.getOrCreateObjCBlock(E))
     Func = F;
 
   if (!Func)
@@ -4154,7 +4124,7 @@ bool Compiler<Emitter>::visitInitializer(const Expr *E) {
 }
 
 template <class Emitter> bool Compiler<Emitter>::visitBool(const Expr *E) {
-  std::optional<PrimType> T = classify(E->getType());
+  OptPrimType T = classify(E->getType());
   if (!T) {
     // Convert complex values to bool.
     if (E->getType()->isAnyComplexType()) {
@@ -4309,7 +4279,7 @@ bool Compiler<Emitter>::visitZeroArrayInitializer(QualType T, const Expr *E) {
   QualType ElemType = AT->getElementType();
   size_t NumElems = cast<ConstantArrayType>(AT)->getZExtSize();
 
-  if (std::optional<PrimType> ElemT = classify(ElemType)) {
+  if (OptPrimType ElemT = classify(ElemType)) {
     for (size_t I = 0; I != NumElems; ++I) {
       if (!this->visitZeroInitializer(*ElemT, ElemType, E))
         return false;
@@ -4317,7 +4287,8 @@ bool Compiler<Emitter>::visitZeroArrayInitializer(QualType T, const Expr *E) {
         return false;
     }
     return true;
-  } else if (ElemType->isRecordType()) {
+  }
+  if (ElemType->isRecordType()) {
     const Record *R = getRecord(ElemType);
 
     for (size_t I = 0; I != NumElems; ++I) {
@@ -4331,7 +4302,8 @@ bool Compiler<Emitter>::visitZeroArrayInitializer(QualType T, const Expr *E) {
         return false;
     }
     return true;
-  } else if (ElemType->isArrayType()) {
+  }
+  if (ElemType->isArrayType()) {
     for (size_t I = 0; I != NumElems; ++I) {
       if (!this->emitConstUint32(I, E))
         return false;
@@ -4602,7 +4574,7 @@ bool Compiler<Emitter>::visitExpr(const Expr *E, bool DestroyToplevelScope) {
   }
 
   // Expressions with a primitive return type.
-  if (std::optional<PrimType> T = classify(E)) {
+  if (OptPrimType T = classify(E)) {
     if (!visit(E))
       return false;
 
@@ -4679,7 +4651,7 @@ bool Compiler<Emitter>::visitDeclAndReturn(const VarDecl *VD,
   if (!this->visitVarDecl(VD, /*Toplevel=*/true))
     return false;
 
-  std::optional<PrimType> VarT = classify(VD->getType());
+  OptPrimType VarT = classify(VD->getType());
   if (Context::shouldBeGloballyIndexed(VD)) {
     auto GlobalIndex = P.getGlobal(VD);
     assert(GlobalIndex); // visitVarDecl() didn't return false.
@@ -4736,7 +4708,7 @@ VarCreationState Compiler<Emitter>::visitVarDecl(const VarDecl *VD,
     return VarCreationState::NotCreated();
 
   const Expr *Init = VD->getInit();
-  std::optional<PrimType> VarT = classify(VD->getType());
+  OptPrimType VarT = classify(VD->getType());
 
   if (Init && Init->isValueDependent())
     return false;
@@ -4803,11 +4775,10 @@ VarCreationState Compiler<Emitter>::visitVarDecl(const VarDecl *VD,
         if (!this->visit(Init))
           return false;
         return this->emitSetLocal(*VarT, Offset, VD) && Scope.destroyLocals();
-      } else {
+      }
         if (!this->visit(Init))
           return false;
         return this->emitSetLocal(*VarT, Offset, VD);
-      }
     }
   } else {
     if (std::optional<unsigned> Offset = this->allocateLocal(
@@ -4834,7 +4805,7 @@ bool Compiler<Emitter>::visitAPValue(const APValue &Val, PrimType ValType,
   assert(!DiscardResult);
   if (Val.isInt())
     return this->emitConst(Val.getInt(), ValType, E);
-  else if (Val.isFloat()) {
+  if (Val.isFloat()) {
     APFloat F = Val.getFloat();
     return this->emitFloat(F, E);
   }
@@ -4845,9 +4816,8 @@ bool Compiler<Emitter>::visitAPValue(const APValue &Val, PrimType ValType,
     APValue::LValueBase Base = Val.getLValueBase();
     if (const Expr *BaseExpr = Base.dyn_cast<const Expr *>())
       return this->visit(BaseExpr);
-    else if (const auto *VD = Base.dyn_cast<const ValueDecl *>()) {
+    if (const auto *VD = Base.dyn_cast<const ValueDecl *>())
       return this->visitDeclRef(VD, E);
-    }
   } else if (Val.isMemberPointer()) {
     if (const ValueDecl *MemberDecl = Val.getMemberPointerDecl())
       return this->emitGetMemberPtr(MemberDecl, E);
@@ -4868,7 +4838,7 @@ bool Compiler<Emitter>::visitAPValueInitializer(const APValue &Val,
       const Record::Field *RF = R->getField(I);
       QualType FieldType = RF->Decl->getType();
 
-      if (std::optional<PrimType> PT = classify(FieldType)) {
+      if (OptPrimType PT = classify(FieldType)) {
         if (!this->visitAPValue(F, *PT, E))
           return false;
         if (!this->emitInitField(*PT, RF->Offset, E))
@@ -4883,7 +4853,8 @@ bool Compiler<Emitter>::visitAPValueInitializer(const APValue &Val,
       }
     }
     return true;
-  } else if (Val.isUnion()) {
+  }
+  if (Val.isUnion()) {
     const FieldDecl *UnionField = Val.getUnionField();
     const Record *R = this->getRecord(UnionField->getParent());
     assert(R);
@@ -4893,12 +4864,13 @@ bool Compiler<Emitter>::visitAPValueInitializer(const APValue &Val,
     if (!this->visitAPValue(F, T, E))
       return false;
     return this->emitInitField(T, RF->Offset, E);
-  } else if (Val.isArray()) {
+  }
+  if (Val.isArray()) {
     const auto *ArrType = T->getAsArrayTypeUnsafe();
     QualType ElemType = ArrType->getElementType();
     for (unsigned A = 0, AN = Val.getArraySize(); A != AN; ++A) {
       const APValue &Elem = Val.getArrayInitializedElt(A);
-      if (std::optional<PrimType> ElemT = classify(ElemType)) {
+      if (OptPrimType ElemT = classify(ElemType)) {
         if (!this->visitAPValue(Elem, *ElemT, E))
           return false;
         if (!this->emitInitElem(*ElemT, A, E))
@@ -4958,7 +4930,7 @@ bool Compiler<Emitter>::VisitBuiltinCallExpr(const CallExpr *E,
   }
 
   QualType ReturnType = E->getType();
-  std::optional<PrimType> ReturnT = classify(E);
+  OptPrimType ReturnT = classify(E);
 
   // Non-primitive return type. Prepare storage.
   if (!Initializing && !ReturnT && !ReturnType->isVoidType()) {
@@ -5010,12 +4982,10 @@ bool Compiler<Emitter>::VisitCallExpr(const CallExpr *E) {
 
     // Calls to replaceable operator new/operator delete.
     if (FuncDecl->isUsableAsGlobalAllocationFunctionInConstantEvaluation()) {
-      if (FuncDecl->getDeclName().isAnyOperatorNew()) {
+      if (FuncDecl->getDeclName().isAnyOperatorNew())
         return VisitBuiltinCallExpr(E, Builtin::BI__builtin_operator_new);
-      } else {
-        assert(FuncDecl->getDeclName().getCXXOverloadedOperator() == OO_Delete);
-        return VisitBuiltinCallExpr(E, Builtin::BI__builtin_operator_delete);
-      }
+      assert(FuncDecl->getDeclName().getCXXOverloadedOperator() == OO_Delete);
+      return VisitBuiltinCallExpr(E, Builtin::BI__builtin_operator_delete);
     }
 
     // Explicit calls to trivial destructors
@@ -5032,7 +5002,7 @@ bool Compiler<Emitter>::VisitCallExpr(const CallExpr *E) {
   BlockScope<Emitter> CallScope(this, ScopeKind::Call);
 
   QualType ReturnType = E->getCallReturnType(Ctx.getASTContext());
-  std::optional<PrimType> T = classify(ReturnType);
+  OptPrimType T = classify(ReturnType);
   bool HasRVO = !ReturnType->isVoidType() && !T;
 
   if (HasRVO) {
@@ -5402,6 +5372,53 @@ bool Compiler<Emitter>::maybeEmitDeferredVarInit(const VarDecl *VD) {
   return true;
 }
 
+static bool hasTrivialDefaultCtorParent(const FieldDecl *FD) {
+  assert(FD);
+  assert(FD->getParent()->isUnion());
+  const auto *CXXRD = dyn_cast<CXXRecordDecl>(FD->getParent());
+  return !CXXRD || CXXRD->hasTrivialDefaultConstructor();
+}
+
+template <class Emitter> bool Compiler<Emitter>::refersToUnion(const Expr *E) {
+  for (;;) {
+    if (const auto *ME = dyn_cast<MemberExpr>(E)) {
+      if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl());
+          FD && FD->getParent()->isUnion() && hasTrivialDefaultCtorParent(FD))
+        return true;
+      E = ME->getBase();
+      continue;
+    }
+
+    if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E)) {
+      E = ASE->getBase()->IgnoreImplicit();
+      continue;
+    }
+
+    if (const auto *ICE = dyn_cast<ImplicitCastExpr>(E);
+        ICE && (ICE->getCastKind() == CK_NoOp ||
+                ICE->getCastKind() == CK_DerivedToBase ||
+                ICE->getCastKind() == CK_UncheckedDerivedToBase)) {
+      E = ICE->getSubExpr();
+      continue;
+    }
+
+    if (const auto *This = dyn_cast<CXXThisExpr>(E)) {
+      const auto *ThisRecord =
+          This->getType()->getPointeeType()->getAsRecordDecl();
+      if (!ThisRecord->isUnion())
+        return false;
+      // Otherwise, always activate if we're in the ctor.
+      if (const auto *Ctor =
+              dyn_cast_if_present<CXXConstructorDecl>(CompilingFunction))
+        return Ctor->getParent() == ThisRecord;
+      return false;
+    }
+
+    break;
+  }
+  return false;
+}
+
 template <class Emitter>
 bool Compiler<Emitter>::visitDeclStmt(const DeclStmt *DS,
                                       bool EvaluateConditionDecl) {
@@ -5437,7 +5454,9 @@ bool Compiler<Emitter>::visitReturnStmt(const ReturnStmt *RS) {
         return false;
       this->emitCleanup();
       return this->emitRet(*ReturnType, RS);
-    } else if (RE->getType()->isVoidType()) {
+    }
+
+    if (RE->getType()->isVoidType()) {
       if (!this->visit(RE))
         return false;
     } else {
@@ -5482,7 +5501,7 @@ template <class Emitter> bool Compiler<Emitter>::visitIfStmt(const IfStmt *IS) {
   if (std::optional<bool> BoolValue = getBoolValue(IS->getCond())) {
     if (*BoolValue)
       return visitChildStmt(IS->getThen());
-    else if (const Stmt *Else = IS->getElse())
+    if (const Stmt *Else = IS->getElse())
       return visitChildStmt(Else);
     return true;
   }
@@ -5933,17 +5952,16 @@ bool Compiler<Emitter>::compileConstructor(const CXXConstructorDecl *Ctor) {
     if (InitExpr->getType().isNull())
       return false;
 
-    if (std::optional<PrimType> T = this->classify(InitExpr)) {
+    if (OptPrimType T = this->classify(InitExpr)) {
+      if (Activate && !this->emitActivateThisField(FieldOffset, InitExpr))
+        return false;
+
       if (!this->visit(InitExpr))
         return false;
 
       bool BitField = F->isBitField();
-      if (BitField && Activate)
-        return this->emitInitThisBitFieldActivate(*T, F, FieldOffset, InitExpr);
       if (BitField)
         return this->emitInitThisBitField(*T, F, FieldOffset, InitExpr);
-      if (Activate)
-        return this->emitInitThisFieldActivate(*T, FieldOffset, InitExpr);
       return this->emitInitThisField(*T, FieldOffset, InitExpr);
     }
     // Non-primitive case. Get a pointer to the field-to-initialize
@@ -5975,7 +5993,7 @@ bool Compiler<Emitter>::compileConstructor(const CXXConstructorDecl *Ctor) {
     if (!this->emitThis(Ctor))
       return false;
 
-    auto PVD = Ctor->getParamDecl(0);
+    const ParmVarDecl *PVD = Ctor->getParamDecl(0);
     ParamOffset PO = this->Params[PVD]; // Must exist.
 
     if (!this->emitGetParam(PT_Ptr, PO.Offset, Ctor))
@@ -6136,7 +6154,7 @@ bool Compiler<Emitter>::compileUnionAssignmentOperator(
   if (!this->emitThis(MD))
     return false;
 
-  auto PVD = MD->getParamDecl(0);
+  const ParmVarDecl *PVD = MD->getParamDecl(0);
   ParamOffset PO = this->Params[PVD]; // Must exist.
 
   if (!this->emitGetParam(PT_Ptr, PO.Offset, MD))
@@ -6189,7 +6207,7 @@ bool Compiler<Emitter>::VisitUnaryOperator(const UnaryOperator *E) {
     return this->VisitVectorUnaryOperator(E);
   if (SubExpr->getType()->isFixedPointType())
     return this->VisitFixedPointUnaryOperator(E);
-  std::optional<PrimType> T = classify(SubExpr->getType());
+  OptPrimType T = classify(SubExpr->getType());
 
   switch (E->getOpcode()) {
   case UO_PostInc: { // x++
@@ -6415,7 +6433,7 @@ bool Compiler<Emitter>::VisitComplexUnaryOperator(const UnaryOperator *E) {
   if (DiscardResult)
     return this->discard(SubExpr);
 
-  std::optional<PrimType> ResT = classify(E);
+  OptPrimType ResT = classify(E);
   auto prepareResult = [=]() -> bool {
     if (!ResT && !Initializing) {
       std::optional<unsigned> LocalIndex = allocateLocal(SubExpr);
@@ -6637,7 +6655,7 @@ bool Compiler<Emitter>::visitDeclRef(const ValueDecl *D, const Expr *E) {
     if (std::optional<unsigned> Index = P.getOrCreateGlobal(D)) {
       if (!this->emitGetPtrGlobal(*Index, E))
         return false;
-      if (std::optional<PrimType> T = classify(E->getType())) {
+      if (OptPrimType T = classify(E->getType())) {
         if (!this->visitAPValue(TPOD->getValue(), *T, E))
           return false;
         return this->emitInitGlobal(*T, *Index, E);
@@ -7136,7 +7154,7 @@ bool Compiler<Emitter>::emitBuiltinBitCast(const CastExpr *E) {
   const Expr *SubExpr = E->getSubExpr();
   QualType FromType = SubExpr->getType();
   QualType ToType = E->getType();
-  std::optional<PrimType> ToT = classify(ToType);
+  OptPrimType ToT = classify(ToType);
 
   assert(!ToType->isReferenceType());
 
@@ -7157,7 +7175,7 @@ bool Compiler<Emitter>::emitBuiltinBitCast(const CastExpr *E) {
   if (SubExpr->isGLValue() || FromType->isVectorType()) {
     if (!this->visit(SubExpr))
       return false;
-  } else if (std::optional<PrimType> FromT = classify(SubExpr)) {
+  } else if (OptPrimType FromT = classify(SubExpr)) {
     unsigned TempOffset =
         allocateLocalPrimitive(SubExpr, *FromT, /*IsConst=*/true);
     if (!this->visit(SubExpr))
diff --git a/clang/lib/AST/ByteCode/Compiler.h b/clang/lib/AST/ByteCode/Compiler.h
index debee672..16f108f 100644
--- a/clang/lib/AST/ByteCode/Compiler.h
+++ b/clang/lib/AST/ByteCode/Compiler.h
@@ -21,7 +21,6 @@
 #include "clang/AST/Decl.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/StmtVisitor.h"
-#include "clang/Basic/TargetInfo.h"
 
 namespace clang {
 class QualType;
@@ -254,12 +253,8 @@ protected:
   /// If the function does not exist yet, it is compiled.
   const Function *getFunction(const FunctionDecl *FD);
 
-  std::optional<PrimType> classify(const Expr *E) const {
-    return Ctx.classify(E);
-  }
-  std::optional<PrimType> classify(QualType Ty) const {
-    return Ctx.classify(Ty);
-  }
+  OptPrimType classify(const Expr *E) const { return Ctx.classify(E); }
+  OptPrimType classify(QualType Ty) const { return Ctx.classify(Ty); }
 
   /// Classifies a known primitive type.
   PrimType classifyPrim(QualType Ty) const {
@@ -306,7 +301,7 @@ protected:
   bool visitInitList(ArrayRef<const Expr *> Inits, const Expr *ArrayFiller,
                      const Expr *E);
   bool visitArrayElemInit(unsigned ElemIndex, const Expr *Init,
-                          std::optional<PrimType> InitT);
+                          OptPrimType InitT);
   bool visitCallArgs(ArrayRef<const Expr *> Args, const FunctionDecl *FuncDecl,
                      bool Activate);
 
@@ -405,6 +400,8 @@ private:
   bool checkLiteralType(const Expr *E);
   bool maybeEmitDeferredVarInit(const VarDecl *VD);
 
+  bool refersToUnion(const Expr *E);
+
 protected:
   /// Variable to storage mapping.
   llvm::DenseMap<const ValueDecl *, Scope::Local> Locals;
@@ -435,7 +432,7 @@ protected:
   bool InitStackActive = false;
 
   /// Type of the expression returned by the function.
-  std::optional<PrimType> ReturnType;
+  OptPrimType ReturnType;
 
   /// Switch case mapping.
   CaseMap CaseLabels;
diff --git a/clang/lib/AST/ByteCode/Context.cpp b/clang/lib/AST/ByteCode/Context.cpp
index ead6e4a..aaeb52e 100644
--- a/clang/lib/AST/ByteCode/Context.cpp
+++ b/clang/lib/AST/ByteCode/Context.cpp
@@ -235,6 +235,43 @@ bool Context::evaluateCharRange(State &Parent, const Expr *SizeExpr,
   return evaluateStringRepr(Parent, SizeExpr, PtrExpr, Result);
 }
 
+bool Context::evaluateStrlen(State &Parent, const Expr *E, uint64_t &Result) {
+  assert(Stk.empty());
+  Compiler<EvalEmitter> C(*this, *P, Parent, Stk);
+
+  auto PtrRes = C.interpretAsPointer(E, [&](const Pointer &Ptr) {
+    const Descriptor *FieldDesc = Ptr.getFieldDesc();
+    if (!FieldDesc->isPrimitiveArray())
+      return false;
+
+    unsigned N = Ptr.getNumElems();
+    if (Ptr.elemSize() == 1) {
+      Result = strnlen(reinterpret_cast<const char *>(Ptr.getRawAddress()), N);
+      return Result != N;
+    }
+
+    PrimType ElemT = FieldDesc->getPrimType();
+    Result = 0;
+    for (unsigned I = Ptr.getIndex(); I != N; ++I) {
+      INT_TYPE_SWITCH(ElemT, {
+        auto Elem = Ptr.elem<T>(I);
+        if (Elem.isZero())
+          return true;
+        ++Result;
+      });
+    }
+    // We didn't find a 0 byte.
+    return false;
+  });
+
+  if (PtrRes.isInvalid()) {
+    C.cleanup();
+    Stk.clear();
+    return false;
+  }
+  return true;
+}
+
 const LangOptions &Context::getLangOpts() const { return Ctx.getLangOpts(); }
 
 static PrimType integralTypeToPrimTypeS(unsigned BitWidth) {
@@ -269,7 +306,7 @@ static PrimType integralTypeToPrimTypeU(unsigned BitWidth) {
   llvm_unreachable("Unhandled BitWidth");
 }
 
-std::optional<PrimType> Context::classify(QualType T) const {
+OptPrimType Context::classify(QualType T) const {
 
   if (const auto *BT = dyn_cast<BuiltinType>(T.getCanonicalType())) {
     auto Kind = BT->getKind();
@@ -505,7 +542,7 @@ const Function *Context::getOrCreateFunction(const FunctionDecl *FuncDecl) {
   // Assign descriptors to all parameters.
   // Composite objects are lowered to pointers.
   for (const ParmVarDecl *PD : FuncDecl->parameters()) {
-    std::optional<PrimType> T = classify(PD->getType());
+    OptPrimType T = classify(PD->getType());
     PrimType PT = T.value_or(PT_Ptr);
     Descriptor *Desc = P->createDescriptor(PD, PT);
     ParamDescriptors.insert({ParamOffset, {PT, Desc}});
@@ -533,7 +570,7 @@ const Function *Context::getOrCreateObjCBlock(const BlockExpr *E) {
   // Assign descriptors to all parameters.
   // Composite objects are lowered to pointers.
   for (const ParmVarDecl *PD : BD->parameters()) {
-    std::optional<PrimType> T = classify(PD->getType());
+    OptPrimType T = classify(PD->getType());
     PrimType PT = T.value_or(PT_Ptr);
     Descriptor *Desc = P->createDescriptor(PD, PT);
     ParamDescriptors.insert({ParamOffset, {PT, Desc}});
diff --git a/clang/lib/AST/ByteCode/Context.h b/clang/lib/AST/ByteCode/Context.h
index acf7504..62ef529 100644
--- a/clang/lib/AST/ByteCode/Context.h
+++ b/clang/lib/AST/ByteCode/Context.h
@@ -66,6 +66,10 @@ public:
   bool evaluateCharRange(State &Parent, const Expr *SizeExpr,
                          const Expr *PtrExpr, std::string &Result);
 
+  /// Evalute \param E and if it can be evaluated to a string literal,
+  /// run strlen() on it.
+  bool evaluateStrlen(State &Parent, const Expr *E, uint64_t &Result);
+
   /// Returns the AST context.
   ASTContext &getASTContext() const { return Ctx; }
   /// Returns the language options.
@@ -78,10 +82,10 @@ public:
   uint32_t getBitWidth(QualType T) const { return Ctx.getIntWidth(T); }
 
   /// Classifies a type.
-  std::optional<PrimType> classify(QualType T) const;
+  OptPrimType classify(QualType T) const;
 
   /// Classifies an expression.
-  std::optional<PrimType> classify(const Expr *E) const {
+  OptPrimType classify(const Expr *E) const {
     assert(E);
     if (E->isGLValue())
       return PT_Ptr;
diff --git a/clang/lib/AST/ByteCode/Descriptor.h b/clang/lib/AST/ByteCode/Descriptor.h
index 0227e4c..4c925f6 100644
--- a/clang/lib/AST/ByteCode/Descriptor.h
+++ b/clang/lib/AST/ByteCode/Descriptor.h
@@ -164,7 +164,7 @@ public:
   /// The primitive type this descriptor was created for,
   /// or the primitive element type in case this is
   /// a primitive array.
-  const std::optional<PrimType> PrimT = std::nullopt;
+  const OptPrimType PrimT = std::nullopt;
   /// Flag indicating if the block is mutable.
   const bool IsConst = false;
   /// Flag indicating if a field is mutable.
diff --git a/clang/lib/AST/ByteCode/Disasm.cpp b/clang/lib/AST/ByteCode/Disasm.cpp
index 74399d1..5049a65 100644
--- a/clang/lib/AST/ByteCode/Disasm.cpp
+++ b/clang/lib/AST/ByteCode/Disasm.cpp
@@ -531,7 +531,7 @@ LLVM_DUMP_METHOD void Block::dump(llvm::raw_ostream &OS) const {
   Desc->dump(OS);
   OS << ")\n";
   unsigned NPointers = 0;
-  for (const Pointer *P = Pointers; P; P = P->Next) {
+  for (const Pointer *P = Pointers; P; P = P->asBlockPointer().Next) {
     ++NPointers;
   }
   OS << "  EvalID: " << EvalID << '\n';
diff --git a/clang/lib/AST/ByteCode/DynamicAllocator.cpp b/clang/lib/AST/ByteCode/DynamicAllocator.cpp
index 4f0f511..169250c 100644
--- a/clang/lib/AST/ByteCode/DynamicAllocator.cpp
+++ b/clang/lib/AST/ByteCode/DynamicAllocator.cpp
@@ -27,7 +27,7 @@ void DynamicAllocator::cleanup() {
       B->invokeDtor();
       if (B->hasPointers()) {
         while (B->Pointers) {
-          Pointer *Next = B->Pointers->Next;
+          Pointer *Next = B->Pointers->asBlockPointer().Next;
           B->Pointers->PointeeStorage.BS.Pointee = nullptr;
           B->Pointers = Next;
         }
diff --git a/clang/lib/AST/ByteCode/EvalEmitter.cpp b/clang/lib/AST/ByteCode/EvalEmitter.cpp
index 6e511bc..81ebc56 100644
--- a/clang/lib/AST/ByteCode/EvalEmitter.cpp
+++ b/clang/lib/AST/ByteCode/EvalEmitter.cpp
@@ -324,7 +324,7 @@ void EvalEmitter::updateGlobalTemporaries() {
       const Pointer &Ptr = P.getPtrGlobal(*GlobalIndex);
       APValue *Cached = Temp->getOrCreateValue(true);
 
-      if (std::optional<PrimType> T = Ctx.classify(E->getType())) {
+      if (OptPrimType T = Ctx.classify(E->getType())) {
         TYPE_SWITCH(
             *T, { *Cached = Ptr.deref<T>().toAPValue(Ctx.getASTContext()); });
       } else {
diff --git a/clang/lib/AST/ByteCode/EvaluationResult.cpp b/clang/lib/AST/ByteCode/EvaluationResult.cpp
index f59612b..b11531f 100644
--- a/clang/lib/AST/ByteCode/EvaluationResult.cpp
+++ b/clang/lib/AST/ByteCode/EvaluationResult.cpp
@@ -204,7 +204,7 @@ static void collectBlocks(const Pointer &Ptr,
 
   } else if (Desc->isPrimitiveArray() && Desc->getPrimType() == PT_Ptr) {
     for (unsigned I = 0; I != Desc->getNumElems(); ++I) {
-      const Pointer &ElemPointee = Ptr.atIndex(I).deref<Pointer>();
+      const Pointer &ElemPointee = Ptr.elem<Pointer>(I);
       if (isUsefulPtr(ElemPointee) && !Blocks.contains(ElemPointee.block()))
         collectBlocks(ElemPointee, Blocks);
     }
diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp
index 5463aec..224d65c 100644
--- a/clang/lib/AST/ByteCode/Interp.cpp
+++ b/clang/lib/AST/ByteCode/Interp.cpp
@@ -845,7 +845,7 @@ bool CheckInit(InterpState &S, CodePtr OpPC, const Pointer &Ptr) {
   return true;
 }
 
-bool CheckCallable(InterpState &S, CodePtr OpPC, const Function *F) {
+static bool CheckCallable(InterpState &S, CodePtr OpPC, const Function *F) {
 
   if (F->isVirtual() && !S.getLangOpts().CPlusPlus20) {
     const SourceLocation &Loc = S.Current->getLocation(OpPC);
diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h
index 7ba6e21..9a325ab 100644
--- a/clang/lib/AST/ByteCode/Interp.h
+++ b/clang/lib/AST/ByteCode/Interp.h
@@ -25,7 +25,6 @@
 #include "InterpStack.h"
 #include "InterpState.h"
 #include "MemberPointer.h"
-#include "Opcode.h"
 #include "PrimType.h"
 #include "Program.h"
 #include "State.h"
@@ -468,10 +467,10 @@ inline bool Mulc(InterpState &S, CodePtr OpPC) {
   const Pointer &Result = S.Stk.peek<Pointer>();
 
   if constexpr (std::is_same_v<T, Floating>) {
-    APFloat A = LHS.atIndex(0).deref<Floating>().getAPFloat();
-    APFloat B = LHS.atIndex(1).deref<Floating>().getAPFloat();
-    APFloat C = RHS.atIndex(0).deref<Floating>().getAPFloat();
-    APFloat D = RHS.atIndex(1).deref<Floating>().getAPFloat();
+    APFloat A = LHS.elem<Floating>(0).getAPFloat();
+    APFloat B = LHS.elem<Floating>(1).getAPFloat();
+    APFloat C = RHS.elem<Floating>(0).getAPFloat();
+    APFloat D = RHS.elem<Floating>(1).getAPFloat();
 
     APFloat ResR(A.getSemantics());
     APFloat ResI(A.getSemantics());
@@ -480,20 +479,18 @@ inline bool Mulc(InterpState &S, CodePtr OpPC) {
     // Copy into the result.
     Floating RA = S.allocFloat(A.getSemantics());
     RA.copy(ResR);
-    Result.atIndex(0).deref<Floating>() = RA; // Floating(ResR);
-    Result.atIndex(0).initialize();
+    Result.elem<Floating>(0) = RA; // Floating(ResR);
 
     Floating RI = S.allocFloat(A.getSemantics());
     RI.copy(ResI);
-    Result.atIndex(1).deref<Floating>() = RI; // Floating(ResI);
-    Result.atIndex(1).initialize();
-    Result.initialize();
+    Result.elem<Floating>(1) = RI; // Floating(ResI);
+    Result.initializeAllElements();
   } else {
     // Integer element type.
-    const T &LHSR = LHS.atIndex(0).deref<T>();
-    const T &LHSI = LHS.atIndex(1).deref<T>();
-    const T &RHSR = RHS.atIndex(0).deref<T>();
-    const T &RHSI = RHS.atIndex(1).deref<T>();
+    const T &LHSR = LHS.elem<T>(0);
+    const T &LHSI = LHS.elem<T>(1);
+    const T &RHSR = RHS.elem<T>(0);
+    const T &RHSI = RHS.elem<T>(1);
     unsigned Bits = LHSR.bitWidth();
 
     // real(Result) = (real(LHS) * real(RHS)) - (imag(LHS) * imag(RHS))
@@ -503,19 +500,18 @@ inline bool Mulc(InterpState &S, CodePtr OpPC) {
     T B;
     if (T::mul(LHSI, RHSI, Bits, &B))
       return false;
-    if (T::sub(A, B, Bits, &Result.atIndex(0).deref<T>()))
+    if (T::sub(A, B, Bits, &Result.elem<T>(0)))
       return false;
-    Result.atIndex(0).initialize();
 
     // imag(Result) = (real(LHS) * imag(RHS)) + (imag(LHS) * real(RHS))
     if (T::mul(LHSR, RHSI, Bits, &A))
       return false;
     if (T::mul(LHSI, RHSR, Bits, &B))
       return false;
-    if (T::add(A, B, Bits, &Result.atIndex(1).deref<T>()))
+    if (T::add(A, B, Bits, &Result.elem<T>(1)))
       return false;
-    Result.atIndex(1).initialize();
     Result.initialize();
+    Result.initializeAllElements();
   }
 
   return true;
@@ -528,10 +524,10 @@ inline bool Divc(InterpState &S, CodePtr OpPC) {
   const Pointer &Result = S.Stk.peek<Pointer>();
 
   if constexpr (std::is_same_v<T, Floating>) {
-    APFloat A = LHS.atIndex(0).deref<Floating>().getAPFloat();
-    APFloat B = LHS.atIndex(1).deref<Floating>().getAPFloat();
-    APFloat C = RHS.atIndex(0).deref<Floating>().getAPFloat();
-    APFloat D = RHS.atIndex(1).deref<Floating>().getAPFloat();
+    APFloat A = LHS.elem<Floating>(0).getAPFloat();
+    APFloat B = LHS.elem<Floating>(1).getAPFloat();
+    APFloat C = RHS.elem<Floating>(0).getAPFloat();
+    APFloat D = RHS.elem<Floating>(1).getAPFloat();
 
     APFloat ResR(A.getSemantics());
     APFloat ResI(A.getSemantics());
@@ -540,21 +536,19 @@ inline bool Divc(InterpState &S, CodePtr OpPC) {
     // Copy into the result.
     Floating RA = S.allocFloat(A.getSemantics());
     RA.copy(ResR);
-    Result.atIndex(0).deref<Floating>() = RA; // Floating(ResR);
-    Result.atIndex(0).initialize();
+    Result.elem<Floating>(0) = RA; // Floating(ResR);
 
     Floating RI = S.allocFloat(A.getSemantics());
     RI.copy(ResI);
-    Result.atIndex(1).deref<Floating>() = RI; // Floating(ResI);
-    Result.atIndex(1).initialize();
+    Result.elem<Floating>(1) = RI; // Floating(ResI);
 
-    Result.initialize();
+    Result.initializeAllElements();
   } else {
     // Integer element type.
-    const T &LHSR = LHS.atIndex(0).deref<T>();
-    const T &LHSI = LHS.atIndex(1).deref<T>();
-    const T &RHSR = RHS.atIndex(0).deref<T>();
-    const T &RHSI = RHS.atIndex(1).deref<T>();
+    const T &LHSR = LHS.elem<T>(0);
+    const T &LHSI = LHS.elem<T>(1);
+    const T &RHSR = RHS.elem<T>(0);
+    const T &RHSI = RHS.elem<T>(1);
     unsigned Bits = LHSR.bitWidth();
     const T Zero = T::from(0, Bits);
 
@@ -581,8 +575,8 @@ inline bool Divc(InterpState &S, CodePtr OpPC) {
     }
 
     // real(Result) = ((real(LHS) * real(RHS)) + (imag(LHS) * imag(RHS))) / Den
-    T &ResultR = Result.atIndex(0).deref<T>();
-    T &ResultI = Result.atIndex(1).deref<T>();
+    T &ResultR = Result.elem<T>(0);
+    T &ResultI = Result.elem<T>(1);
 
     if (T::mul(LHSR, RHSR, Bits, &A) || T::mul(LHSI, RHSI, Bits, &B))
       return false;
@@ -590,7 +584,6 @@ inline bool Divc(InterpState &S, CodePtr OpPC) {
       return false;
     if (T::div(ResultR, Den, Bits, &ResultR))
       return false;
-    Result.atIndex(0).initialize();
 
     // imag(Result) = ((imag(LHS) * real(RHS)) - (real(LHS) * imag(RHS))) / Den
     if (T::mul(LHSI, RHSR, Bits, &A) || T::mul(LHSR, RHSI, Bits, &B))
@@ -599,8 +592,7 @@ inline bool Divc(InterpState &S, CodePtr OpPC) {
       return false;
     if (T::div(ResultI, Den, Bits, &ResultI))
       return false;
-    Result.atIndex(1).initialize();
-    Result.initialize();
+    Result.initializeAllElements();
   }
 
   return true;
@@ -1138,8 +1130,9 @@ inline bool CmpHelperEQ<Pointer>(InterpState &S, CodePtr OpPC, CompareFn Fn) {
     S.FFDiag(Loc, diag::note_constexpr_pointer_comparison_past_end)
         << LHS.toDiagnosticString(S.getASTContext());
     return false;
-  } else if (RHS.isOnePastEnd() && !LHS.isOnePastEnd() && !LHS.isZero() &&
-             LHS.getOffset() == 0) {
+  }
+  if (RHS.isOnePastEnd() && !LHS.isOnePastEnd() && !LHS.isZero() &&
+      LHS.getOffset() == 0) {
     const SourceInfo &Loc = S.Current->getSource(OpPC);
     S.FFDiag(Loc, diag::note_constexpr_pointer_comparison_past_end)
         << RHS.toDiagnosticString(S.getASTContext());
@@ -1157,8 +1150,9 @@ inline bool CmpHelperEQ<Pointer>(InterpState &S, CodePtr OpPC, CompareFn Fn) {
         const SourceInfo &Loc = S.Current->getSource(OpPC);
         S.FFDiag(Loc, diag::note_constexpr_literal_comparison);
         return false;
-      } else if (const auto *CE = dyn_cast<CallExpr>(E);
-                 CE && IsOpaqueConstantCall(CE)) {
+      }
+      if (const auto *CE = dyn_cast<CallExpr>(E);
+          CE && IsOpaqueConstantCall(CE)) {
         const SourceInfo &Loc = S.Current->getSource(OpPC);
         S.FFDiag(Loc, diag::note_constexpr_opaque_call_comparison)
             << P.toDiagnosticString(S.getASTContext());
@@ -1983,6 +1977,16 @@ static inline bool Activate(InterpState &S, CodePtr OpPC) {
   return true;
 }
 
+static inline bool ActivateThisField(InterpState &S, CodePtr OpPC, uint32_t I) {
+  if (S.checkingPotentialConstantExpression())
+    return false;
+
+  const Pointer &Ptr = S.Current->getThis();
+  assert(Ptr.atField(I).canBeInitialized());
+  Ptr.atField(I).activate();
+  return true;
+}
+
 template <PrimType Name, class T = typename PrimConv<Name>::T>
 bool StoreActivate(InterpState &S, CodePtr OpPC) {
   const T &Value = S.Stk.pop<T>();
@@ -2678,6 +2682,14 @@ static inline bool CastFixedPointIntegral(InterpState &S, CodePtr OpPC) {
   return true;
 }
 
+static inline bool FnPtrCast(InterpState &S, CodePtr OpPC) {
+  const SourceInfo &E = S.Current->getSource(OpPC);
+  S.CCEDiag(E, diag::note_constexpr_invalid_cast)
+      << diag::ConstexprInvalidCastKind::ThisConversionOrReinterpret
+      << S.getLangOpts().CPlusPlus << S.Current->getRange(OpPC);
+  return true;
+}
+
 static inline bool PtrPtrCast(InterpState &S, CodePtr OpPC, bool SrcIsVoidPtr) {
   const auto &Ptr = S.Stk.peek<Pointer>();
 
@@ -3103,7 +3115,7 @@ inline bool ArrayElem(InterpState &S, CodePtr OpPC, uint32_t Index) {
     return false;
 
   assert(Ptr.atIndex(Index).getFieldDesc()->getPrimType() == Name);
-  S.Stk.push<T>(Ptr.atIndex(Index).deref<T>());
+  S.Stk.push<T>(Ptr.elem<T>(Index));
   return true;
 }
 
@@ -3115,7 +3127,7 @@ inline bool ArrayElemPop(InterpState &S, CodePtr OpPC, uint32_t Index) {
     return false;
 
   assert(Ptr.atIndex(Index).getFieldDesc()->getPrimType() == Name);
-  S.Stk.push<T>(Ptr.atIndex(Index).deref<T>());
+  S.Stk.push<T>(Ptr.elem<T>(Index));
   return true;
 }
 
@@ -3263,7 +3275,8 @@ inline bool InvalidCast(InterpState &S, CodePtr OpPC, CastKind Kind,
     S.CCEDiag(Loc, diag::note_constexpr_invalid_cast)
         << static_cast<unsigned>(Kind) << S.Current->getRange(OpPC);
     return !Fatal;
-  } else if (Kind == CastKind::Volatile) {
+  }
+  if (Kind == CastKind::Volatile) {
     if (!S.checkingPotentialConstantExpression()) {
       const auto *E = cast<CastExpr>(S.Current->getExpr(OpPC));
       if (S.getLangOpts().CPlusPlus)
@@ -3274,7 +3287,8 @@ inline bool InvalidCast(InterpState &S, CodePtr OpPC, CastKind Kind,
     }
 
     return false;
-  } else if (Kind == CastKind::Dynamic) {
+  }
+  if (Kind == CastKind::Dynamic) {
     assert(!S.getLangOpts().CPlusPlus20);
     S.CCEDiag(S.Current->getSource(OpPC), diag::note_constexpr_invalid_cast)
         << diag::ConstexprInvalidCastKind::Dynamic;
@@ -3557,12 +3571,22 @@ inline bool BitCastPrim(InterpState &S, CodePtr OpPC, bool TargetIsUCharOrByte,
       Floating Result = S.allocFloat(*Sem);
       Floating::bitcastFromMemory(Buff.data(), *Sem, &Result);
       S.Stk.push<Floating>(Result);
-
-      // S.Stk.push<Floating>(T::bitcastFromMemory(Buff.data(), *Sem));
     } else if constexpr (needsAlloc<T>()) {
       T Result = S.allocAP<T>(ResultBitWidth);
       T::bitcastFromMemory(Buff.data(), ResultBitWidth, &Result);
       S.Stk.push<T>(Result);
+    } else if constexpr (std::is_same_v<T, Boolean>) {
+      // Only allow to cast single-byte integers to bool if they are either 0
+      // or 1.
+      assert(FullBitWidth.getQuantity() == 8);
+      auto Val = static_cast<unsigned int>(Buff[0]);
+      if (Val > 1) {
+        S.FFDiag(S.Current->getSource(OpPC),
+                 diag::note_constexpr_bit_cast_unrepresentable_value)
+            << S.getASTContext().BoolTy << Val;
+        return false;
+      }
+      S.Stk.push<T>(T::bitcastFromMemory(Buff.data(), ResultBitWidth));
     } else {
       assert(!Sem);
       S.Stk.push<T>(T::bitcastFromMemory(Buff.data(), ResultBitWidth));
diff --git a/clang/lib/AST/ByteCode/InterpBlock.cpp b/clang/lib/AST/ByteCode/InterpBlock.cpp
index f603078..963b54e 100644
--- a/clang/lib/AST/ByteCode/InterpBlock.cpp
+++ b/clang/lib/AST/ByteCode/InterpBlock.cpp
@@ -27,9 +27,9 @@ void Block::addPointer(Pointer *P) {
   assert(!hasPointer(P));
 #endif
   if (Pointers)
-    Pointers->Prev = P;
-  P->Next = Pointers;
-  P->Prev = nullptr;
+    Pointers->PointeeStorage.BS.Prev = P;
+  P->PointeeStorage.BS.Next = Pointers;
+  P->PointeeStorage.BS.Prev = nullptr;
   Pointers = P;
 #ifndef NDEBUG
   assert(hasPointer(P));
@@ -48,13 +48,15 @@ void Block::removePointer(Pointer *P) {
   assert(hasPointer(P));
 #endif
 
+  BlockPointer &BP = P->PointeeStorage.BS;
+
   if (Pointers == P)
-    Pointers = P->Next;
+    Pointers = BP.Next;
 
-  if (P->Prev)
-    P->Prev->Next = P->Next;
-  if (P->Next)
-    P->Next->Prev = P->Prev;
+  if (BP.Prev)
+    BP.Prev->PointeeStorage.BS.Next = BP.Next;
+  if (BP.Next)
+    BP.Next->PointeeStorage.BS.Prev = BP.Prev;
   P->PointeeStorage.BS.Pointee = nullptr;
 #ifndef NDEBUG
   assert(!hasPointer(P));
@@ -68,7 +70,9 @@ void Block::cleanup() {
 
 void Block::replacePointer(Pointer *Old, Pointer *New) {
   assert(Old);
+  assert(Old->isBlockPointer());
   assert(New);
+  assert(New->isBlockPointer());
   assert(Old != New);
   if (IsStatic) {
     assert(!Pointers);
@@ -78,17 +82,20 @@ void Block::replacePointer(Pointer *Old, Pointer *New) {
   assert(hasPointer(Old));
 #endif
 
-  if (Old->Prev)
-    Old->Prev->Next = New;
-  if (Old->Next)
-    Old->Next->Prev = New;
-  New->Prev = Old->Prev;
-  New->Next = Old->Next;
+  BlockPointer &OldBP = Old->PointeeStorage.BS;
+  BlockPointer &NewBP = New->PointeeStorage.BS;
+
+  if (OldBP.Prev)
+    OldBP.Prev->PointeeStorage.BS.Next = New;
+  if (OldBP.Next)
+    OldBP.Next->PointeeStorage.BS.Prev = New;
+  NewBP.Prev = OldBP.Prev;
+  NewBP.Next = OldBP.Next;
   if (Pointers == Old)
     Pointers = New;
 
-  Old->PointeeStorage.BS.Pointee = nullptr;
-  New->PointeeStorage.BS.Pointee = this;
+  OldBP.Pointee = nullptr;
+  NewBP.Pointee = this;
 #ifndef NDEBUG
   assert(!hasPointer(Old));
   assert(hasPointer(New));
@@ -97,7 +104,7 @@ void Block::replacePointer(Pointer *Old, Pointer *New) {
 
 #ifndef NDEBUG
 bool Block::hasPointer(const Pointer *P) const {
-  for (const Pointer *C = Pointers; C; C = C->Next) {
+  for (const Pointer *C = Pointers; C; C = C->asBlockPointer().Next) {
     if (C == P)
       return true;
   }
@@ -120,7 +127,7 @@ DeadBlock::DeadBlock(DeadBlock *&Root, Block *Blk)
 
   // Transfer pointers.
   B.Pointers = Blk->Pointers;
-  for (Pointer *P = Blk->Pointers; P; P = P->Next)
+  for (Pointer *P = Blk->Pointers; P; P = P->asBlockPointer().Next)
     P->PointeeStorage.BS.Pointee = &B;
   Blk->Pointers = nullptr;
 }
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 462b9a1..f908d02 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -53,7 +53,7 @@ static APSInt popToAPSInt(InterpStack &Stk, PrimType T) {
 static void pushInteger(InterpState &S, const APSInt &Val, QualType QT) {
   assert(QT->isSignedIntegerOrEnumerationType() ||
          QT->isUnsignedIntegerOrEnumerationType());
-  std::optional<PrimType> T = S.getContext().classify(QT);
+  OptPrimType T = S.getContext().classify(QT);
   assert(T);
 
   unsigned BitWidth = S.getASTContext().getTypeSize(QT);
@@ -240,9 +240,9 @@ static bool interp__builtin_strcmp(InterpState &S, CodePtr OpPC,
         T CB = PB.deref<T>();
         if (CA > CB)
           return returnResult(1);
-        else if (CA < CB)
+        if (CA < CB)
           return returnResult(-1);
-        else if (CA.isZero() || CB.isZero())
+        if (CA.isZero() || CB.isZero())
           return returnResult(0);
       });
       continue;
@@ -253,7 +253,7 @@ static bool interp__builtin_strcmp(InterpState &S, CodePtr OpPC,
 
     if (CA > CB)
       return returnResult(1);
-    else if (CA < CB)
+    if (CA < CB)
       return returnResult(-1);
     if (CA == 0 || CB == 0)
       return returnResult(0);
@@ -1048,7 +1048,7 @@ static bool interp__builtin_atomic_lock_free(InterpState &S, CodePtr OpPC,
           PtrArg = ICE->getSubExpr();
       }
 
-      if (auto PtrTy = PtrArg->getType()->getAs<PointerType>()) {
+      if (const auto *PtrTy = PtrArg->getType()->getAs<PointerType>()) {
         QualType PointeeType = PtrTy->getPointeeType();
         if (!PointeeType->isIncompleteType() &&
             S.getASTContext().getTypeAlignInChars(PointeeType) >= Size) {
@@ -1098,11 +1098,9 @@ static bool interp__builtin_complex(InterpState &S, CodePtr OpPC,
   const Floating &Arg1 = S.Stk.pop<Floating>();
   Pointer &Result = S.Stk.peek<Pointer>();
 
-  Result.atIndex(0).deref<Floating>() = Arg1;
-  Result.atIndex(0).initialize();
-  Result.atIndex(1).deref<Floating>() = Arg2;
-  Result.atIndex(1).initialize();
-  Result.initialize();
+  Result.elem<Floating>(0) = Arg1;
+  Result.elem<Floating>(1) = Arg2;
+  Result.initializeAllElements();
 
   return true;
 }
@@ -1530,7 +1528,7 @@ static bool interp__builtin_operator_new(InterpState &S, CodePtr OpPC,
     return false;
 
   bool IsArray = NumElems.ugt(1);
-  std::optional<PrimType> ElemT = S.getContext().classify(ElemType);
+  OptPrimType ElemT = S.getContext().classify(ElemType);
   DynamicAllocator &Allocator = S.getAllocator();
   if (ElemT) {
     Block *B =
@@ -1644,10 +1642,10 @@ static bool interp__builtin_vector_reduce(InterpState &S, CodePtr OpPC,
   unsigned NumElems = Arg.getNumElems();
 
   INT_TYPE_SWITCH_NO_BOOL(ElemT, {
-    T Result = Arg.atIndex(0).deref<T>();
+    T Result = Arg.elem<T>(0);
     unsigned BitWidth = Result.bitWidth();
     for (unsigned I = 1; I != NumElems; ++I) {
-      T Elem = Arg.atIndex(I).deref<T>();
+      T Elem = Arg.elem<T>(I);
       T PrevResult = Result;
 
       if (ID == Builtin::BI__builtin_reduce_add) {
@@ -1723,15 +1721,14 @@ static bool interp__builtin_elementwise_popcount(InterpState &S, CodePtr OpPC,
   for (unsigned I = 0; I != NumElems; ++I) {
     INT_TYPE_SWITCH_NO_BOOL(ElemT, {
       if (BuiltinID == Builtin::BI__builtin_elementwise_popcount) {
-        Dst.atIndex(I).deref<T>() =
-            T::from(Arg.atIndex(I).deref<T>().toAPSInt().popcount());
+        Dst.elem<T>(I) = T::from(Arg.elem<T>(I).toAPSInt().popcount());
       } else {
-        Dst.atIndex(I).deref<T>() = T::from(
-            Arg.atIndex(I).deref<T>().toAPSInt().reverseBits().getZExtValue());
+        Dst.elem<T>(I) =
+            T::from(Arg.elem<T>(I).toAPSInt().reverseBits().getZExtValue());
       }
-      Dst.atIndex(I).initialize();
     });
   }
+  Dst.initializeAllElements();
 
   return true;
 }
@@ -1968,7 +1965,8 @@ static bool interp__builtin_memcmp(InterpState &S, CodePtr OpPC,
         if (A < B) {
           pushInteger(S, -1, Call->getType());
           return true;
-        } else if (A > B) {
+        }
+        if (A > B) {
           pushInteger(S, 1, Call->getType());
           return true;
         }
@@ -1980,7 +1978,8 @@ static bool interp__builtin_memcmp(InterpState &S, CodePtr OpPC,
       if (A < B) {
         pushInteger(S, -1, Call->getType());
         return true;
-      } else if (A > B) {
+      }
+      if (A > B) {
         pushInteger(S, 1, Call->getType());
         return true;
       }
@@ -2296,8 +2295,8 @@ static bool interp__builtin_elementwise_sat(InterpState &S, CodePtr OpPC,
     APSInt Elem1;
     APSInt Elem2;
     INT_TYPE_SWITCH_NO_BOOL(ElemT, {
-      Elem1 = LHS.atIndex(I).deref<T>().toAPSInt();
-      Elem2 = RHS.atIndex(I).deref<T>().toAPSInt();
+      Elem1 = LHS.elem<T>(I).toAPSInt();
+      Elem2 = RHS.elem<T>(I).toAPSInt();
     });
 
     APSInt Result;
@@ -2313,12 +2312,10 @@ static bool interp__builtin_elementwise_sat(InterpState &S, CodePtr OpPC,
       llvm_unreachable("Wrong builtin ID");
     }
 
-    INT_TYPE_SWITCH_NO_BOOL(ElemT, {
-      const Pointer &E = Dst.atIndex(I);
-      E.deref<T>() = static_cast<T>(Result);
-      E.initialize();
-    });
+    INT_TYPE_SWITCH_NO_BOOL(ElemT,
+                            { Dst.elem<T>(I) = static_cast<T>(Result); });
   }
+  Dst.initializeAllElements();
 
   return true;
 }
@@ -2880,7 +2877,7 @@ static bool copyRecord(InterpState &S, CodePtr OpPC, const Pointer &Src,
 
   auto copyField = [&](const Record::Field &F, bool Activate) -> bool {
     Pointer DestField = Dest.atField(F.Offset);
-    if (std::optional<PrimType> FT = S.Ctx.classify(F.Decl->getType())) {
+    if (OptPrimType FT = S.Ctx.classify(F.Decl->getType())) {
       TYPE_SWITCH(*FT, {
         DestField.deref<T>() = Src.atField(F.Offset).deref<T>();
         if (Src.atField(F.Offset).isInitialized())
@@ -2942,7 +2939,7 @@ static bool copyComposite(InterpState &S, CodePtr OpPC, const Pointer &Src,
     for (unsigned I = 0, N = DestDesc->getNumElems(); I != N; ++I) {
       Pointer DestElem = Dest.atIndex(I);
       TYPE_SWITCH(ET, {
-        DestElem.deref<T>() = Src.atIndex(I).deref<T>();
+        DestElem.deref<T>() = Src.elem<T>(I);
         DestElem.initialize();
       });
     }
diff --git a/clang/lib/AST/ByteCode/InterpStack.h b/clang/lib/AST/ByteCode/InterpStack.h
index 0b76f1d..580494e 100644
--- a/clang/lib/AST/ByteCode/InterpStack.h
+++ b/clang/lib/AST/ByteCode/InterpStack.h
@@ -14,11 +14,9 @@
 #define LLVM_CLANG_AST_INTERP_INTERPSTACK_H
 
 #include "FixedPoint.h"
-#include "FunctionPointer.h"
 #include "IntegralAP.h"
 #include "MemberPointer.h"
 #include "PrimType.h"
-#include <memory>
 #include <vector>
 
 namespace clang {
diff --git a/clang/lib/AST/ByteCode/InterpState.cpp b/clang/lib/AST/ByteCode/InterpState.cpp
index 7848f29..3010847 100644
--- a/clang/lib/AST/ByteCode/InterpState.cpp
+++ b/clang/lib/AST/ByteCode/InterpState.cpp
@@ -52,7 +52,7 @@ void InterpState::cleanup() {
   // As a last resort, make sure all pointers still pointing to a dead block
   // don't point to it anymore.
   for (DeadBlock *DB = DeadBlocks; DB; DB = DB->Next) {
-    for (Pointer *P = DB->B.Pointers; P; P = P->Next) {
+    for (Pointer *P = DB->B.Pointers; P; P = P->asBlockPointer().Next) {
       P->PointeeStorage.BS.Pointee = nullptr;
     }
   }
diff --git a/clang/lib/AST/ByteCode/Opcodes.td b/clang/lib/AST/ByteCode/Opcodes.td
index 80703ad..95a4433 100644
--- a/clang/lib/AST/ByteCode/Opcodes.td
+++ b/clang/lib/AST/ByteCode/Opcodes.td
@@ -412,7 +412,7 @@ def CheckDecl : Opcode {
 
 def CheckEnumValue : Opcode {
   let Args = [ArgEnumDecl];
-  let Types = [FixedSizeIntegralTypeClass];
+  let Types = [IntegralTypeClass];
   let HasGroup = 1;
 }
 
@@ -510,6 +510,7 @@ def StoreBitFieldActivate : StoreBitFieldOpcode {}
 def StoreBitFieldActivatePop : StoreBitFieldOpcode {}
 
 def Activate : Opcode {}
+def ActivateThisField : Opcode { let Args = [ArgUint32]; }
 
 // [Pointer, Value] -> []
 def Init : StoreOpcode {}
@@ -734,6 +735,8 @@ def PtrPtrCast : Opcode {
 
 }
 
+def FnPtrCast : Opcode;
+
 def DecayPtr : Opcode {
   let Types = [PtrTypeClass, PtrTypeClass];
   let HasGroup = 1;
diff --git a/clang/lib/AST/ByteCode/Pointer.cpp b/clang/lib/AST/ByteCode/Pointer.cpp
index 2f9ecf9..dec2088 100644
--- a/clang/lib/AST/ByteCode/Pointer.cpp
+++ b/clang/lib/AST/ByteCode/Pointer.cpp
@@ -16,6 +16,7 @@
 #include "MemberPointer.h"
 #include "PrimType.h"
 #include "Record.h"
+#include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/RecordLayout.h"
 
@@ -41,7 +42,7 @@ Pointer::Pointer(Block *Pointee, unsigned Base, uint64_t Offset)
     : Offset(Offset), StorageKind(Storage::Block) {
   assert((Base == RootPtrMark || Base % alignof(void *) == 0) && "wrong base");
 
-  PointeeStorage.BS = {Pointee, Base};
+  PointeeStorage.BS = {Pointee, Base, nullptr, nullptr};
 
   if (Pointee)
     Pointee->addPointer(this);
@@ -66,14 +67,14 @@ Pointer::~Pointer() {
   }
 }
 
-void Pointer::operator=(const Pointer &P) {
+Pointer &Pointer::operator=(const Pointer &P) {
   // If the current storage type is Block, we need to remove
   // this pointer from the block.
   if (isBlockPointer()) {
     if (P.isBlockPointer() && this->block() == P.block()) {
       Offset = P.Offset;
       PointeeStorage.BS.Base = P.PointeeStorage.BS.Base;
-      return;
+      return *this;
     }
 
     if (Block *Pointee = PointeeStorage.BS.Pointee) {
@@ -88,7 +89,6 @@ void Pointer::operator=(const Pointer &P) {
 
   if (P.isBlockPointer()) {
     PointeeStorage.BS = P.PointeeStorage.BS;
-    PointeeStorage.BS.Pointee = P.PointeeStorage.BS.Pointee;
 
     if (PointeeStorage.BS.Pointee)
       PointeeStorage.BS.Pointee->addPointer(this);
@@ -101,16 +101,17 @@ void Pointer::operator=(const Pointer &P) {
   } else {
     assert(false && "Unhandled storage kind");
   }
+  return *this;
 }
 
-void Pointer::operator=(Pointer &&P) {
+Pointer &Pointer::operator=(Pointer &&P) {
   // If the current storage type is Block, we need to remove
   // this pointer from the block.
   if (isBlockPointer()) {
     if (P.isBlockPointer() && this->block() == P.block()) {
       Offset = P.Offset;
       PointeeStorage.BS.Base = P.PointeeStorage.BS.Base;
-      return;
+      return *this;
     }
 
     if (Block *Pointee = PointeeStorage.BS.Pointee) {
@@ -125,7 +126,6 @@ void Pointer::operator=(Pointer &&P) {
 
   if (P.isBlockPointer()) {
     PointeeStorage.BS = P.PointeeStorage.BS;
-    PointeeStorage.BS.Pointee = P.PointeeStorage.BS.Pointee;
 
     if (PointeeStorage.BS.Pointee)
       PointeeStorage.BS.Pointee->addPointer(this);
@@ -138,6 +138,7 @@ void Pointer::operator=(Pointer &&P) {
   } else {
     assert(false && "Unhandled storage kind");
   }
+  return *this;
 }
 
 APValue Pointer::toAPValue(const ASTContext &ASTCtx) const {
@@ -492,6 +493,19 @@ void Pointer::initialize() const {
   getInlineDesc()->IsInitialized = true;
 }
 
+void Pointer::initializeAllElements() const {
+  assert(getFieldDesc()->isPrimitiveArray());
+  assert(isArrayRoot());
+
+  InitMapPtr &IM = getInitMap();
+  if (!IM) {
+    IM = std::make_pair(true, nullptr);
+  } else {
+    IM->first = true;
+    IM->second.reset();
+  }
+}
+
 void Pointer::activate() const {
   // Field has its bit in an inline descriptor.
   assert(PointeeStorage.BS.Base != 0 &&
@@ -603,7 +617,7 @@ bool Pointer::pointsToStringLiteral() const {
     return false;
 
   const Expr *E = block()->getDescriptor()->asExpr();
-  return E && isa<StringLiteral>(E);
+  return isa_and_nonnull<StringLiteral>(E);
 }
 
 std::optional<std::pair<Pointer, Pointer>>
@@ -665,7 +679,7 @@ std::optional<APValue> Pointer::toRValue(const Context &Ctx,
       return false;
 
     // Primitive values.
-    if (std::optional<PrimType> T = Ctx.classify(Ty)) {
+    if (OptPrimType T = Ctx.classify(Ty)) {
       TYPE_SWITCH(*T, R = Ptr.deref<T>().toAPValue(ASTCtx));
       return true;
     }
@@ -682,7 +696,7 @@ std::optional<APValue> Pointer::toRValue(const Context &Ctx,
           const Pointer &FP = Ptr.atField(F.Offset);
           QualType FieldTy = F.Decl->getType();
           if (FP.isActive()) {
-            if (std::optional<PrimType> T = Ctx.classify(FieldTy)) {
+            if (OptPrimType T = Ctx.classify(FieldTy)) {
               TYPE_SWITCH(*T, Value = FP.deref<T>().toAPValue(ASTCtx));
             } else {
               Ok &= Composite(FieldTy, FP, Value);
@@ -705,7 +719,7 @@ std::optional<APValue> Pointer::toRValue(const Context &Ctx,
           const Pointer &FP = Ptr.atField(FD->Offset);
           APValue &Value = R.getStructField(I);
 
-          if (std::optional<PrimType> T = Ctx.classify(FieldTy)) {
+          if (OptPrimType T = Ctx.classify(FieldTy)) {
             TYPE_SWITCH(*T, Value = FP.deref<T>().toAPValue(ASTCtx));
           } else {
             Ok &= Composite(FieldTy, FP, Value);
@@ -743,7 +757,7 @@ std::optional<APValue> Pointer::toRValue(const Context &Ctx,
       for (unsigned I = 0; I < NumElems; ++I) {
         APValue &Slot = R.getArrayInitializedElt(I);
         const Pointer &EP = Ptr.atIndex(I);
-        if (std::optional<PrimType> T = Ctx.classify(ElemTy)) {
+        if (OptPrimType T = Ctx.classify(ElemTy)) {
           TYPE_SWITCH(*T, Slot = EP.deref<T>().toAPValue(ASTCtx));
         } else {
           Ok &= Composite(ElemTy, EP.narrow(), Slot);
@@ -757,17 +771,17 @@ std::optional<APValue> Pointer::toRValue(const Context &Ctx,
       QualType ElemTy = CT->getElementType();
 
       if (ElemTy->isIntegerType()) {
-        std::optional<PrimType> ElemT = Ctx.classify(ElemTy);
+        OptPrimType ElemT = Ctx.classify(ElemTy);
         assert(ElemT);
         INT_TYPE_SWITCH(*ElemT, {
-          auto V1 = Ptr.atIndex(0).deref<T>();
-          auto V2 = Ptr.atIndex(1).deref<T>();
+          auto V1 = Ptr.elem<T>(0);
+          auto V2 = Ptr.elem<T>(1);
           R = APValue(V1.toAPSInt(), V2.toAPSInt());
           return true;
         });
       } else if (ElemTy->isFloatingType()) {
-        R = APValue(Ptr.atIndex(0).deref<Floating>().getAPFloat(),
-                    Ptr.atIndex(1).deref<Floating>().getAPFloat());
+        R = APValue(Ptr.elem<Floating>(0).getAPFloat(),
+                    Ptr.elem<Floating>(1).getAPFloat());
         return true;
       }
       return false;
@@ -782,9 +796,8 @@ std::optional<APValue> Pointer::toRValue(const Context &Ctx,
       SmallVector<APValue> Values;
       Values.reserve(VT->getNumElements());
       for (unsigned I = 0; I != VT->getNumElements(); ++I) {
-        TYPE_SWITCH(ElemT, {
-          Values.push_back(Ptr.atIndex(I).deref<T>().toAPValue(ASTCtx));
-        });
+        TYPE_SWITCH(ElemT,
+                    { Values.push_back(Ptr.elem<T>(I).toAPValue(ASTCtx)); });
       }
 
       assert(Values.size() == VT->getNumElements());
@@ -804,7 +817,7 @@ std::optional<APValue> Pointer::toRValue(const Context &Ctx,
     return toAPValue(ASTCtx);
 
   // Just load primitive types.
-  if (std::optional<PrimType> T = Ctx.classify(ResultType)) {
+  if (OptPrimType T = Ctx.classify(ResultType)) {
     TYPE_SWITCH(*T, return this->deref<T>().toAPValue(ASTCtx));
   }
 
diff --git a/clang/lib/AST/ByteCode/Pointer.h b/clang/lib/AST/ByteCode/Pointer.h
index da74013..5bafc5b 100644
--- a/clang/lib/AST/ByteCode/Pointer.h
+++ b/clang/lib/AST/ByteCode/Pointer.h
@@ -39,6 +39,10 @@ struct BlockPointer {
   Block *Pointee;
   /// Start of the current subfield.
   unsigned Base;
+  /// Previous link in the pointer chain.
+  Pointer *Prev;
+  /// Next link in the pointer chain.
+  Pointer *Next;
 };
 
 struct IntPointer {
@@ -120,8 +124,8 @@ public:
   Pointer(Block *Pointee, unsigned Base, uint64_t Offset);
   ~Pointer();
 
-  void operator=(const Pointer &P);
-  void operator=(Pointer &&P);
+  Pointer &operator=(const Pointer &P);
+  Pointer &operator=(Pointer &&P);
 
   /// Equality operators are just for tests.
   bool operator==(const Pointer &P) const {
@@ -693,6 +697,25 @@ public:
     return *reinterpret_cast<T *>(asBlockPointer().Pointee->rawData() + Offset);
   }
 
+  /// Dereferences the element at index \p I.
+  /// This is equivalent to atIndex(I).deref<T>().
+  template <typename T> T &elem(unsigned I) const {
+    assert(isLive() && "Invalid pointer");
+    assert(isBlockPointer());
+    assert(asBlockPointer().Pointee);
+    assert(isDereferencable());
+    assert(getFieldDesc()->isPrimitiveArray());
+
+    unsigned ElemByteOffset = I * getFieldDesc()->getElemSize();
+    if (isArrayRoot())
+      return *reinterpret_cast<T *>(asBlockPointer().Pointee->rawData() +
+                                    asBlockPointer().Base + sizeof(InitMapPtr) +
+                                    ElemByteOffset);
+
+    return *reinterpret_cast<T *>(asBlockPointer().Pointee->rawData() + Offset +
+                                  ElemByteOffset);
+  }
+
   /// Whether this block can be read from at all. This is only true for
   /// block pointers that point to a valid location inside that block.
   bool isDereferencable() const {
@@ -706,6 +729,10 @@ public:
 
   /// Initializes a field.
   void initialize() const;
+  /// Initialize all elements of a primitive array at once. This can be
+  /// used in situations where we *know* we have initialized *all* elements
+  /// of a primtive array.
+  void initializeAllElements() const;
   /// Activats a field.
   void activate() const;
   /// Deactivates an entire strurcutre.
@@ -742,7 +769,7 @@ public:
 
     if (Offset < Other.Offset)
       return ComparisonCategoryResult::Less;
-    else if (Offset > Other.Offset)
+    if (Offset > Other.Offset)
       return ComparisonCategoryResult::Greater;
 
     return ComparisonCategoryResult::Equal;
@@ -809,15 +836,10 @@ private:
   /// Offset into the storage.
   uint64_t Offset = 0;
 
-  /// Previous link in the pointer chain.
-  Pointer *Prev = nullptr;
-  /// Next link in the pointer chain.
-  Pointer *Next = nullptr;
-
   Storage StorageKind = Storage::Int;
   union {
-    BlockPointer BS;
     IntPointer Int;
+    BlockPointer BS;
     FunctionPointer Fn;
     TypeidPointer Typeid;
   } PointeeStorage;
diff --git a/clang/lib/AST/ByteCode/PrimType.h b/clang/lib/AST/ByteCode/PrimType.h
index a156ccc..38c29b9 100644
--- a/clang/lib/AST/ByteCode/PrimType.h
+++ b/clang/lib/AST/ByteCode/PrimType.h
@@ -13,6 +13,7 @@
 #ifndef LLVM_CLANG_AST_INTERP_TYPE_H
 #define LLVM_CLANG_AST_INTERP_TYPE_H
 
+#include "clang/Basic/UnsignedOrNone.h"
 #include "llvm/Support/raw_ostream.h"
 #include <climits>
 #include <cstddef>
@@ -49,6 +50,38 @@ enum PrimType : unsigned {
   PT_MemberPtr = 14,
 };
 
+// Like std::optional<PrimType>, but only sizeof(PrimType).
+class OptPrimType final {
+  unsigned V = ~0u;
+
+public:
+  OptPrimType() = default;
+  OptPrimType(std::nullopt_t) {}
+  OptPrimType(PrimType T) : V(static_cast<unsigned>(T)) {}
+
+  explicit constexpr operator bool() const { return V != ~0u; }
+  PrimType operator*() const {
+    assert(operator bool());
+    return static_cast<PrimType>(V);
+  }
+
+  PrimType value_or(PrimType PT) const {
+    if (operator bool())
+      return static_cast<PrimType>(V);
+    return PT;
+  }
+
+  bool operator==(PrimType PT) const {
+    if (!operator bool())
+      return false;
+    return V == static_cast<unsigned>(PT);
+  }
+  bool operator==(OptPrimType OPT) const { return V == OPT.V; }
+  bool operator!=(PrimType PT) const { return !(*this == PT); }
+  bool operator!=(OptPrimType OPT) const { return V != OPT.V; }
+};
+static_assert(sizeof(OptPrimType) == sizeof(PrimType));
+
 inline constexpr bool isPtrType(PrimType T) {
   return T == PT_Ptr || T == PT_MemberPtr;
 }
diff --git a/clang/lib/AST/ByteCode/Program.cpp b/clang/lib/AST/ByteCode/Program.cpp
index 5ac0f59f..2421ec4 100644
--- a/clang/lib/AST/ByteCode/Program.cpp
+++ b/clang/lib/AST/ByteCode/Program.cpp
@@ -74,27 +74,25 @@ unsigned Program::createGlobalString(const StringLiteral *S, const Expr *Base) {
 
   const Pointer Ptr(G->block());
   if (CharWidth == 1) {
-    std::memcpy(&Ptr.atIndex(0).deref<char>(), S->getString().data(),
-                StringLength);
+    std::memcpy(&Ptr.elem<char>(0), S->getString().data(), StringLength);
   } else {
     // Construct the string in storage.
     for (unsigned I = 0; I <= StringLength; ++I) {
-      Pointer Field = Ptr.atIndex(I);
       const uint32_t CodePoint = I == StringLength ? 0 : S->getCodeUnit(I);
       switch (CharType) {
       case PT_Sint8: {
         using T = PrimConv<PT_Sint8>::T;
-        Field.deref<T>() = T::from(CodePoint, BitWidth);
+        Ptr.elem<T>(I) = T::from(CodePoint, BitWidth);
         break;
       }
       case PT_Uint16: {
         using T = PrimConv<PT_Uint16>::T;
-        Field.deref<T>() = T::from(CodePoint, BitWidth);
+        Ptr.elem<T>(I) = T::from(CodePoint, BitWidth);
         break;
       }
       case PT_Uint32: {
         using T = PrimConv<PT_Uint32>::T;
-        Field.deref<T>() = T::from(CodePoint, BitWidth);
+        Ptr.elem<T>(I) = T::from(CodePoint, BitWidth);
         break;
       }
       default:
@@ -171,7 +169,7 @@ unsigned Program::getOrCreateDummy(const DeclTy &D) {
   assert(!QT.isNull());
 
   Descriptor *Desc;
-  if (std::optional<PrimType> T = Ctx.classify(QT))
+  if (OptPrimType T = Ctx.classify(QT))
     Desc = createDescriptor(D, *T, /*SourceTy=*/nullptr, std::nullopt,
                             /*IsConst=*/QT.isConstQualified());
   else
@@ -250,7 +248,7 @@ std::optional<unsigned> Program::createGlobal(const DeclTy &D, QualType Ty,
   const bool IsConst = Ty.isConstQualified();
   const bool IsTemporary = D.dyn_cast<const Expr *>();
   const bool IsVolatile = Ty.isVolatileQualified();
-  if (std::optional<PrimType> T = Ctx.classify(Ty))
+  if (OptPrimType T = Ctx.classify(Ty))
     Desc = createDescriptor(D, *T, nullptr, Descriptor::GlobalMD, IsConst,
                             IsTemporary, /*IsMutable=*/false, IsVolatile);
   else
@@ -373,7 +371,7 @@ Record *Program::getOrCreateRecord(const RecordDecl *RD) {
     const bool IsMutable = FD->isMutable();
     const bool IsVolatile = FT.isVolatileQualified();
     const Descriptor *Desc;
-    if (std::optional<PrimType> T = Ctx.classify(FT)) {
+    if (OptPrimType T = Ctx.classify(FT)) {
       Desc = createDescriptor(FD, *T, nullptr, std::nullopt, IsConst,
                               /*isTemporary=*/false, IsMutable, IsVolatile);
     } else {
@@ -412,7 +410,7 @@ Descriptor *Program::createDescriptor(const DeclTy &D, const Type *Ty,
     // Array of well-known bounds.
     if (const auto *CAT = dyn_cast<ConstantArrayType>(ArrayType)) {
       size_t NumElems = CAT->getZExtSize();
-      if (std::optional<PrimType> T = Ctx.classify(ElemTy)) {
+      if (OptPrimType T = Ctx.classify(ElemTy)) {
         // Arrays of primitives.
         unsigned ElemSize = primSize(*T);
         if (std::numeric_limits<unsigned>::max() / ElemSize <= NumElems) {
@@ -420,7 +418,7 @@ Descriptor *Program::createDescriptor(const DeclTy &D, const Type *Ty,
         }
         return allocateDescriptor(D, *T, MDSize, NumElems, IsConst, IsTemporary,
                                   IsMutable);
-      } else {
+      }
         // Arrays of composites. In this case, the array is a list of pointers,
         // followed by the actual elements.
         const Descriptor *ElemDesc = createDescriptor(
@@ -432,24 +430,22 @@ Descriptor *Program::createDescriptor(const DeclTy &D, const Type *Ty,
           return {};
         return allocateDescriptor(D, Ty, ElemDesc, MDSize, NumElems, IsConst,
                                   IsTemporary, IsMutable);
-      }
     }
 
     // Array of unknown bounds - cannot be accessed and pointer arithmetic
     // is forbidden on pointers to such objects.
     if (isa<IncompleteArrayType>(ArrayType) ||
         isa<VariableArrayType>(ArrayType)) {
-      if (std::optional<PrimType> T = Ctx.classify(ElemTy)) {
+      if (OptPrimType T = Ctx.classify(ElemTy)) {
         return allocateDescriptor(D, *T, MDSize, IsConst, IsTemporary,
                                   Descriptor::UnknownSize{});
-      } else {
+      }
         const Descriptor *Desc = createDescriptor(
             D, ElemTy.getTypePtr(), std::nullopt, IsConst, IsTemporary);
         if (!Desc)
           return nullptr;
         return allocateDescriptor(D, Desc, MDSize, IsTemporary,
                                   Descriptor::UnknownSize{});
-      }
     }
   }
 
@@ -462,7 +458,7 @@ Descriptor *Program::createDescriptor(const DeclTy &D, const Type *Ty,
 
   // Complex types - represented as arrays of elements.
   if (const auto *CT = Ty->getAs<ComplexType>()) {
-    std::optional<PrimType> ElemTy = Ctx.classify(CT->getElementType());
+    OptPrimType ElemTy = Ctx.classify(CT->getElementType());
     if (!ElemTy)
       return nullptr;
 
@@ -472,7 +468,7 @@ Descriptor *Program::createDescriptor(const DeclTy &D, const Type *Ty,
 
   // Same with vector types.
   if (const auto *VT = Ty->getAs<VectorType>()) {
-    std::optional<PrimType> ElemTy = Ctx.classify(VT->getElementType());
+    OptPrimType ElemTy = Ctx.classify(VT->getElementType());
     if (!ElemTy)
       return nullptr;
 
diff --git a/clang/lib/AST/ByteCode/Program.h b/clang/lib/AST/ByteCode/Program.h
index 5d9c422..207ceef 100644
--- a/clang/lib/AST/ByteCode/Program.h
+++ b/clang/lib/AST/ByteCode/Program.h
@@ -19,10 +19,7 @@
 #include "Record.h"
 #include "Source.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/PointerUnion.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Allocator.h"
-#include <map>
 #include <vector>
 
 namespace clang {
diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index 83fcd87..1588be4 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -5988,11 +5988,10 @@ bool clang::IsArmStreamingFunction(const FunctionDecl *FD,
     if (FD->hasAttr<ArmLocallyStreamingAttr>())
       return true;
 
-  if (const Type *Ty = FD->getType().getTypePtrOrNull())
-    if (const auto *FPT = Ty->getAs<FunctionProtoType>())
-      if (FPT->getAArch64SMEAttributes() &
-          FunctionType::SME_PStateSMEnabledMask)
-        return true;
+  assert(!FD->getType().isNull() && "Expected a valid FunctionDecl");
+  if (const auto *FPT = FD->getType()->getAs<FunctionProtoType>())
+    if (FPT->getAArch64SMEAttributes() & FunctionType::SME_PStateSMEnabledMask)
+      return true;
 
   return false;
 }
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 2e1a9a3..cd9672d 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -1629,20 +1629,20 @@ QualType CallExpr::getCallReturnType(const ASTContext &Ctx) const {
   return FnType->getReturnType();
 }
 
-std::pair<const NamedDecl *, const Attr *>
-CallExpr::getUnusedResultAttr(const ASTContext &Ctx) const {
+std::pair<const NamedDecl *, const WarnUnusedResultAttr *>
+Expr::getUnusedResultAttrImpl(const Decl *Callee, QualType ReturnType) {
   // If the callee is marked nodiscard, return that attribute
-  if (const Decl *D = getCalleeDecl())
-    if (const auto *A = D->getAttr<WarnUnusedResultAttr>())
+  if (Callee != nullptr)
+    if (const auto *A = Callee->getAttr<WarnUnusedResultAttr>())
       return {nullptr, A};
 
   // If the return type is a struct, union, or enum that is marked nodiscard,
   // then return the return type attribute.
-  if (const TagDecl *TD = getCallReturnType(Ctx)->getAsTagDecl())
+  if (const TagDecl *TD = ReturnType->getAsTagDecl())
     if (const auto *A = TD->getAttr<WarnUnusedResultAttr>())
       return {TD, A};
 
-  for (const auto *TD = getCallReturnType(Ctx)->getAs<TypedefType>(); TD;
+  for (const auto *TD = ReturnType->getAs<TypedefType>(); TD;
        TD = TD->desugar()->getAs<TypedefType>())
     if (const auto *A = TD->getDecl()->getAttr<WarnUnusedResultAttr>())
       return {TD->getDecl(), A};
@@ -2844,12 +2844,11 @@ bool Expr::isUnusedResultAWarning(const Expr *&WarnE, SourceLocation &Loc,
       return true;
     }
 
-    if (const ObjCMethodDecl *MD = ME->getMethodDecl())
-      if (MD->hasAttr<WarnUnusedResultAttr>()) {
-        WarnE = this;
-        Loc = getExprLoc();
-        return true;
-      }
+    if (ME->hasUnusedResultAttr(Ctx)) {
+      WarnE = this;
+      Loc = getExprLoc();
+      return true;
+    }
 
     return false;
   }
@@ -4234,8 +4233,15 @@ bool Expr::isSameComparisonOperand(const Expr* E1, const Expr* E2) {
       // template parameters.
       const auto *DRE1 = cast<DeclRefExpr>(E1);
       const auto *DRE2 = cast<DeclRefExpr>(E2);
-      return DRE1->isPRValue() && DRE2->isPRValue() &&
-             DRE1->getDecl() == DRE2->getDecl();
+
+      if (DRE1->getDecl() != DRE2->getDecl())
+        return false;
+
+      if ((DRE1->isPRValue() && DRE2->isPRValue()) ||
+          (DRE1->isLValue() && DRE2->isLValue()))
+        return true;
+
+      return false;
     }
     case ImplicitCastExprClass: {
       // Peel off implicit casts.
@@ -4245,7 +4251,8 @@ bool Expr::isSameComparisonOperand(const Expr* E1, const Expr* E2) {
         if (!ICE1 || !ICE2)
           return false;
         if (ICE1->getCastKind() != ICE2->getCastKind())
-          return false;
+          return isSameComparisonOperand(ICE1->IgnoreParenImpCasts(),
+                                         ICE2->IgnoreParenImpCasts());
         E1 = ICE1->getSubExpr()->IgnoreParens();
         E2 = ICE2->getSubExpr()->IgnoreParens();
         // The final cast must be one of these types.
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index e575405..993b64b 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -9741,10 +9741,19 @@ bool PointerExprEvaluator::VisitCastExpr(const CastExpr *E) {
   case CK_AddressSpaceConversion:
     if (!Visit(SubExpr))
       return false;
-    // Bitcasts to cv void* are static_casts, not reinterpret_casts, so are
-    // permitted in constant expressions in C++11. Bitcasts from cv void* are
-    // also static_casts, but we disallow them as a resolution to DR1312.
-    if (!E->getType()->isVoidPointerType()) {
+    if (E->getType()->isFunctionPointerType() ||
+        SubExpr->getType()->isFunctionPointerType()) {
+      // Casting between two function pointer types, or between a function
+      // pointer and an object pointer, is always a reinterpret_cast.
+      CCEDiag(E, diag::note_constexpr_invalid_cast)
+          << diag::ConstexprInvalidCastKind::ThisConversionOrReinterpret
+          << Info.Ctx.getLangOpts().CPlusPlus;
+      Result.Designator.setInvalid();
+    } else if (!E->getType()->isVoidPointerType()) {
+      // Bitcasts to cv void* are static_casts, not reinterpret_casts, so are
+      // permitted in constant expressions in C++11. Bitcasts from cv void* are
+      // also static_casts, but we disallow them as a resolution to DR1312.
+      //
       // In some circumstances, we permit casting from void* to cv1 T*, when the
       // actual pointee object is actually a cv2 T.
       bool HasValidResult = !Result.InvalidBase && !Result.Designator.Invalid &&
@@ -14636,7 +14645,9 @@ EvaluateComparisonBinaryOperator(EvalInfo &Info, const BinaryOperator *E,
     if (!LHSDesignator.Invalid && !RHSDesignator.Invalid && IsRelational) {
       bool WasArrayIndex;
       unsigned Mismatch = FindDesignatorMismatch(
-          getType(LHSValue.Base), LHSDesignator, RHSDesignator, WasArrayIndex);
+          LHSValue.Base.isNull() ? QualType()
+                                 : getType(LHSValue.Base).getNonReferenceType(),
+          LHSDesignator, RHSDesignator, WasArrayIndex);
       // At the point where the designators diverge, the comparison has a
       // specified value if:
       //  - we are comparing array indices
@@ -14680,7 +14691,7 @@ EvaluateComparisonBinaryOperator(EvalInfo &Info, const BinaryOperator *E,
     // compare pointers within the object in question; otherwise, the result
     // depends on where the object is located in memory.
     if (!LHSValue.Base.isNull() && IsRelational) {
-      QualType BaseTy = getType(LHSValue.Base);
+      QualType BaseTy = getType(LHSValue.Base).getNonReferenceType();
       if (BaseTy->isIncompleteType())
         return Error(E);
       CharUnits Size = Info.Ctx.getTypeSizeInChars(BaseTy);
@@ -18184,6 +18195,10 @@ bool Expr::EvaluateCharRangeAsString(APValue &Result,
 bool Expr::tryEvaluateStrLen(uint64_t &Result, ASTContext &Ctx) const {
   Expr::EvalStatus Status;
   EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantFold);
+
+  if (Info.EnableNewConstInterp)
+    return Info.Ctx.getInterpContext().evaluateStrlen(Info, this, Result);
+
   return EvaluateBuiltinStrLen(this, Result, Info);
 }
 
diff --git a/clang/lib/AST/ExprObjC.cpp b/clang/lib/AST/ExprObjC.cpp
index 50d3a447..83419a1 100644
--- a/clang/lib/AST/ExprObjC.cpp
+++ b/clang/lib/AST/ExprObjC.cpp
@@ -12,6 +12,7 @@
 
 #include "clang/AST/ExprObjC.h"
 #include "clang/AST/ASTContext.h"
+#include "clang/AST/Attr.h"
 #include "clang/AST/ComputeDependence.h"
 #include "clang/AST/SelectorLocationsKind.h"
 #include "clang/AST/Type.h"
diff --git a/clang/lib/AST/OSLog.cpp b/clang/lib/AST/OSLog.cpp
index b777d4d..91f8410 100644
--- a/clang/lib/AST/OSLog.cpp
+++ b/clang/lib/AST/OSLog.cpp
@@ -1,4 +1,16 @@
-// TODO: header template
+//===--- OSLog.cpp - OS log format string analysis ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements analysis functions for OS log format strings and
+/// buffer layout computation for __builtin_os_log_format and related builtins.
+///
+//===----------------------------------------------------------------------===//
 
 #include "clang/AST/OSLog.h"
 #include "clang/AST/Attr.h"
@@ -137,8 +149,8 @@ public:
     for (auto &Data : ArgsData) {
       if (!Data.MaskType.empty()) {
         CharUnits Size = CharUnits::fromQuantity(8);
-        Layout.Items.emplace_back(OSLogBufferItem::MaskKind, nullptr,
-                                  Size, 0, Data.MaskType);
+        Layout.Items.emplace_back(OSLogBufferItem::MaskKind, nullptr, Size, 0,
+                                  Data.MaskType);
       }
 
       if (Data.FieldWidth) {
diff --git a/clang/lib/AST/RecordLayoutBuilder.cpp b/clang/lib/AST/RecordLayoutBuilder.cpp
index 6a74e98..760b2fc 100644
--- a/clang/lib/AST/RecordLayoutBuilder.cpp
+++ b/clang/lib/AST/RecordLayoutBuilder.cpp
@@ -1953,7 +1953,7 @@ void ItaniumRecordLayoutBuilder::LayoutField(const FieldDecl *D,
           // silently there. For other targets that have ms_struct enabled
           // (most probably via a pragma or attribute), trigger a diagnostic
           // that defaults to an error.
-          if (!Context.getTargetInfo().getTriple().isWindowsGNUEnvironment())
+          if (!Context.getTargetInfo().getTriple().isOSCygMing())
             Diag(D->getLocation(), diag::warn_npot_ms_struct);
         }
         if (TypeSize > FieldAlign &&
diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp
index 3d9397f..6b524cf 100644
--- a/clang/lib/AST/TextNodeDumper.cpp
+++ b/clang/lib/AST/TextNodeDumper.cpp
@@ -843,7 +843,10 @@ void TextNodeDumper::Visit(const APValue &Value, QualType Ty) {
     }
 
     ColorScope Color(OS, ShowColors, DeclNameColor);
-    OS << Value.getMemberPointerDecl()->getDeclName();
+    if (const ValueDecl *MemDecl = Value.getMemberPointerDecl())
+      OS << MemDecl->getDeclName();
+    else
+      OS << "null";
     return;
   }
   case APValue::AddrLabelDiff:
diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp
index e3a03cf..94b8197 100644
--- a/clang/lib/Analysis/LifetimeSafety.cpp
+++ b/clang/lib/Analysis/LifetimeSafety.cpp
@@ -23,9 +23,10 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/TimeProfiler.h"
 #include <cstdint>
+#include <memory>
 
-namespace clang {
-namespace {
+namespace clang::lifetimes {
+namespace internal {
 
 /// Represents the storage location being borrowed, e.g., a specific stack
 /// variable.
@@ -36,32 +37,6 @@ struct AccessPath {
   AccessPath(const clang::ValueDecl *D) : D(D) {}
 };
 
-/// A generic, type-safe wrapper for an ID, distinguished by its `Tag` type.
-/// Used for giving ID to loans and origins.
-template <typename Tag> struct ID {
-  uint32_t Value = 0;
-
-  bool operator==(const ID<Tag> &Other) const { return Value == Other.Value; }
-  bool operator!=(const ID<Tag> &Other) const { return !(*this == Other); }
-  bool operator<(const ID<Tag> &Other) const { return Value < Other.Value; }
-  ID<Tag> operator++(int) {
-    ID<Tag> Tmp = *this;
-    ++Value;
-    return Tmp;
-  }
-  void Profile(llvm::FoldingSetNodeID &IDBuilder) const {
-    IDBuilder.AddInteger(Value);
-  }
-};
-
-template <typename Tag>
-inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, ID<Tag> ID) {
-  return OS << ID.Value;
-}
-
-using LoanID = ID<struct LoanTag>;
-using OriginID = ID<struct OriginTag>;
-
 /// Information about a single borrow, or "Loan". A loan is created when a
 /// reference or pointer is created.
 struct Loan {
@@ -223,7 +198,9 @@ public:
     /// An origin is propagated from a source to a destination (e.g., p = q).
     AssignOrigin,
     /// An origin escapes the function by flowing into the return value.
-    ReturnOfOrigin
+    ReturnOfOrigin,
+    /// A marker for a specific point in the code, for testing.
+    TestPoint,
   };
 
 private:
@@ -310,6 +287,24 @@ public:
   }
 };
 
+/// A dummy-fact used to mark a specific point in the code for testing.
+/// It is generated by recognizing a `void("__lifetime_test_point_...")` cast.
+class TestPointFact : public Fact {
+  StringRef Annotation;
+
+public:
+  static bool classof(const Fact *F) { return F->getKind() == Kind::TestPoint; }
+
+  explicit TestPointFact(StringRef Annotation)
+      : Fact(Kind::TestPoint), Annotation(Annotation) {}
+
+  StringRef getAnnotation() const { return Annotation; }
+
+  void dump(llvm::raw_ostream &OS) const override {
+    OS << "TestPoint (Annotation: \"" << getAnnotation() << "\")\n";
+  }
+};
+
 class FactManager {
 public:
   llvm::ArrayRef<const Fact *> getFacts(const CFGBlock *B) const {
@@ -363,6 +358,7 @@ private:
 };
 
 class FactGenerator : public ConstStmtVisitor<FactGenerator> {
+  using Base = ConstStmtVisitor<FactGenerator>;
 
 public:
   FactGenerator(FactManager &FactMgr, AnalysisDeclContext &AC)
@@ -458,6 +454,15 @@ public:
     }
   }
 
+  void VisitCXXFunctionalCastExpr(const CXXFunctionalCastExpr *FCE) {
+    // Check if this is a test point marker. If so, we are done with this
+    // expression.
+    if (VisitTestPoint(FCE))
+      return;
+    // Visit as normal otherwise.
+    Base::VisitCXXFunctionalCastExpr(FCE);
+  }
+
 private:
   // Check if a type has an origin.
   bool hasOrigin(QualType QT) { return QT->isPointerOrReferenceType(); }
@@ -491,6 +496,27 @@ private:
     }
   }
 
+  /// Checks if the expression is a `void("__lifetime_test_point_...")` cast.
+  /// If so, creates a `TestPointFact` and returns true.
+  bool VisitTestPoint(const CXXFunctionalCastExpr *FCE) {
+    if (!FCE->getType()->isVoidType())
+      return false;
+
+    const auto *SubExpr = FCE->getSubExpr()->IgnoreParenImpCasts();
+    if (const auto *SL = dyn_cast<StringLiteral>(SubExpr)) {
+      llvm::StringRef LiteralValue = SL->getString();
+      const std::string Prefix = "__lifetime_test_point_";
+
+      if (LiteralValue.starts_with(Prefix)) {
+        StringRef Annotation = LiteralValue.drop_front(Prefix.length());
+        CurrentBlockFacts.push_back(
+            FactMgr.createFact<TestPointFact>(Annotation));
+        return true;
+      }
+    }
+    return false;
+  }
+
   FactManager &FactMgr;
   AnalysisDeclContext &AC;
   llvm::SmallVector<Fact *> CurrentBlockFacts;
@@ -502,6 +528,13 @@ private:
 
 enum class Direction { Forward, Backward };
 
+/// A `ProgramPoint` identifies a location in the CFG by pointing to a specific
+/// `Fact`. identified by a lifetime-related event (`Fact`).
+///
+/// A `ProgramPoint` has "after" semantics: it represents the location
+/// immediately after its corresponding `Fact`.
+using ProgramPoint = const Fact *;
+
 /// A generic, policy-based driver for dataflow analyses. It combines
 /// the dataflow runner and the transferer logic into a single class hierarchy.
 ///
@@ -524,14 +557,20 @@ template <typename Derived, typename LatticeType, Direction Dir>
 class DataflowAnalysis {
 public:
   using Lattice = LatticeType;
-  using Base = DataflowAnalysis<Derived, LatticeType, Dir>;
+  using Base = DataflowAnalysis<Derived, Lattice, Dir>;
 
 private:
   const CFG &Cfg;
   AnalysisDeclContext &AC;
 
+  /// The dataflow state before a basic block is processed.
   llvm::DenseMap<const CFGBlock *, Lattice> InStates;
+  /// The dataflow state after a basic block is processed.
   llvm::DenseMap<const CFGBlock *, Lattice> OutStates;
+  /// The dataflow state at a Program Point.
+  /// In a forward analysis, this is the state after the Fact at that point has
+  /// been applied, while in a backward analysis, it is the state before.
+  llvm::DenseMap<ProgramPoint, Lattice> PerPointStates;
 
   static constexpr bool isForward() { return Dir == Direction::Forward; }
 
@@ -577,6 +616,8 @@ public:
     }
   }
 
+  Lattice getState(ProgramPoint P) const { return PerPointStates.lookup(P); }
+
   Lattice getInState(const CFGBlock *B) const { return InStates.lookup(B); }
 
   Lattice getOutState(const CFGBlock *B) const { return OutStates.lookup(B); }
@@ -590,18 +631,23 @@ public:
     getOutState(&B).dump(llvm::dbgs());
   }
 
+private:
   /// Computes the state at one end of a block by applying all its facts
   /// sequentially to a given state from the other end.
-  /// TODO: We might need to store intermediate states per-fact in the block for
-  /// later analysis.
   Lattice transferBlock(const CFGBlock *Block, Lattice State) {
     auto Facts = AllFacts.getFacts(Block);
-    if constexpr (isForward())
-      for (const Fact *F : Facts)
+    if constexpr (isForward()) {
+      for (const Fact *F : Facts) {
         State = transferFact(State, F);
-    else
-      for (const Fact *F : llvm::reverse(Facts))
+        PerPointStates[F] = State;
+      }
+    } else {
+      for (const Fact *F : llvm::reverse(Facts)) {
+        // In backward analysis, capture the state before applying the fact.
+        PerPointStates[F] = State;
         State = transferFact(State, F);
+      }
+    }
     return State;
   }
 
@@ -617,6 +663,8 @@ public:
       return D->transfer(In, *F->getAs<AssignOriginFact>());
     case Fact::Kind::ReturnOfOrigin:
       return D->transfer(In, *F->getAs<ReturnOfOriginFact>());
+    case Fact::Kind::TestPoint:
+      return D->transfer(In, *F->getAs<TestPointFact>());
     }
     llvm_unreachable("Unknown fact kind");
   }
@@ -626,14 +674,16 @@ public:
   Lattice transfer(Lattice In, const ExpireFact &) { return In; }
   Lattice transfer(Lattice In, const AssignOriginFact &) { return In; }
   Lattice transfer(Lattice In, const ReturnOfOriginFact &) { return In; }
+  Lattice transfer(Lattice In, const TestPointFact &) { return In; }
 };
 
 namespace utils {
 
 /// Computes the union of two ImmutableSets.
 template <typename T>
-llvm::ImmutableSet<T> join(llvm::ImmutableSet<T> A, llvm::ImmutableSet<T> B,
-                           typename llvm::ImmutableSet<T>::Factory &F) {
+static llvm::ImmutableSet<T> join(llvm::ImmutableSet<T> A,
+                                  llvm::ImmutableSet<T> B,
+                                  typename llvm::ImmutableSet<T>::Factory &F) {
   if (A.getHeight() < B.getHeight())
     std::swap(A, B);
   for (const T &E : B)
@@ -646,7 +696,7 @@ llvm::ImmutableSet<T> join(llvm::ImmutableSet<T> A, llvm::ImmutableSet<T> B,
 // efficient merge could be implemented using a Patricia Trie or HAMT
 // instead of the current AVL-tree-based ImmutableMap.
 template <typename K, typename V, typename Joiner>
-llvm::ImmutableMap<K, V>
+static llvm::ImmutableMap<K, V>
 join(llvm::ImmutableMap<K, V> A, llvm::ImmutableMap<K, V> B,
      typename llvm::ImmutableMap<K, V>::Factory &F, Joiner joinValues) {
   if (A.getHeight() < B.getHeight())
@@ -670,10 +720,6 @@ join(llvm::ImmutableMap<K, V> A, llvm::ImmutableMap<K, V> B,
 //                          Loan Propagation Analysis
 // ========================================================================= //
 
-// Using LLVM's immutable collections is efficient for dataflow analysis
-// as it avoids deep copies during state transitions.
-// TODO(opt): Consider using a bitset to represent the set of loans.
-using LoanSet = llvm::ImmutableSet<LoanID>;
 using OriginLoanMap = llvm::ImmutableMap<OriginID, LoanSet>;
 
 /// An object to hold the factories for immutable collections, ensuring
@@ -769,6 +815,10 @@ public:
         Factory.OriginMapFactory.add(In.Origins, DestOID, SrcLoans));
   }
 
+  LoanSet getLoans(OriginID OID, ProgramPoint P) {
+    return getLoans(getState(P), OID);
+  }
+
 private:
   LoanSet getLoans(Lattice L, OriginID OID) {
     if (auto *Loans = L.Origins.lookup(OID))
@@ -778,23 +828,118 @@ private:
 };
 
 // ========================================================================= //
+//                         Expired Loans Analysis
+// ========================================================================= //
+
+/// The dataflow lattice for tracking the set of expired loans.
+struct ExpiredLattice {
+  LoanSet Expired;
+
+  ExpiredLattice() : Expired(nullptr) {};
+  explicit ExpiredLattice(LoanSet S) : Expired(S) {}
+
+  bool operator==(const ExpiredLattice &Other) const {
+    return Expired == Other.Expired;
+  }
+  bool operator!=(const ExpiredLattice &Other) const {
+    return !(*this == Other);
+  }
+
+  void dump(llvm::raw_ostream &OS) const {
+    OS << "ExpiredLattice State:\n";
+    if (Expired.isEmpty())
+      OS << "  <empty>\n";
+    for (const LoanID &LID : Expired)
+      OS << "  Loan " << LID << " is expired\n";
+  }
+};
+
+/// The analysis that tracks which loans have expired.
+class ExpiredLoansAnalysis
+    : public DataflowAnalysis<ExpiredLoansAnalysis, ExpiredLattice,
+                              Direction::Forward> {
+
+  LoanSet::Factory &Factory;
+
+public:
+  ExpiredLoansAnalysis(const CFG &C, AnalysisDeclContext &AC, FactManager &F,
+                       LifetimeFactory &Factory)
+      : DataflowAnalysis(C, AC, F), Factory(Factory.LoanSetFactory) {}
+
+  using Base::transfer;
+
+  StringRef getAnalysisName() const { return "ExpiredLoans"; }
+
+  Lattice getInitialState() { return Lattice(Factory.getEmptySet()); }
+
+  /// Merges two lattices by taking the union of the expired loan sets.
+  Lattice join(Lattice L1, Lattice L2) const {
+    return Lattice(utils::join(L1.Expired, L2.Expired, Factory));
+  }
+
+  Lattice transfer(Lattice In, const ExpireFact &F) {
+    return Lattice(Factory.add(In.Expired, F.getLoanID()));
+  }
+
+  // Removes the loan from the set of expired loans.
+  //
+  // When a loan is re-issued (e.g., in a loop), it is no longer considered
+  // expired. A loan can be in the expired set at the point of issue due to
+  // the dataflow state from a previous loop iteration being propagated along
+  // a backedge in the CFG.
+  //
+  // Note: This has a subtle false-negative though where a loan from previous
+  // iteration is not overwritten by a reissue. This needs careful tracking
+  // of loans "across iterations" which can be considered for future
+  // enhancements.
+  //
+  //    void foo(int safe) {
+  //      int* p = &safe;
+  //      int* q = &safe;
+  //      while (condition()) {
+  //        int x = 1;
+  //        p = &x;    // A loan to 'x' is issued to 'p' in every iteration.
+  //        if (condition()) {
+  //          q = p;
+  //        }
+  //        (void)*p; // OK  — 'p' points to 'x' from new iteration.
+  //        (void)*q; // UaF - 'q' still points to 'x' from previous iteration
+  //                  // which is now destroyed.
+  //      }
+  // }
+  Lattice transfer(Lattice In, const IssueFact &F) {
+    return Lattice(Factory.remove(In.Expired, F.getLoanID()));
+  }
+};
+
+// ========================================================================= //
 //  TODO:
-// - Modifying loan propagation to answer `LoanSet getLoans(Origin O, Point P)`
 // - Modify loan expiry analysis to answer `bool isExpired(Loan L, Point P)`
 // - Modify origin liveness analysis to answer `bool isLive(Origin O, Point P)`
 // - Using the above three to perform the final error reporting.
 // ========================================================================= //
-} // anonymous namespace
 
-void runLifetimeSafetyAnalysis(const DeclContext &DC, const CFG &Cfg,
-                               AnalysisDeclContext &AC) {
+// ========================================================================= //
+//                  LifetimeSafetyAnalysis Class Implementation
+// ========================================================================= //
+
+// We need this here for unique_ptr with forward declared class.
+LifetimeSafetyAnalysis::~LifetimeSafetyAnalysis() = default;
+
+LifetimeSafetyAnalysis::LifetimeSafetyAnalysis(AnalysisDeclContext &AC)
+    : AC(AC), Factory(std::make_unique<LifetimeFactory>()),
+      FactMgr(std::make_unique<FactManager>()) {}
+
+void LifetimeSafetyAnalysis::run() {
   llvm::TimeTraceScope TimeProfile("LifetimeSafetyAnalysis");
+
+  const CFG &Cfg = *AC.getCFG();
   DEBUG_WITH_TYPE("PrintCFG", Cfg.dump(AC.getASTContext().getLangOpts(),
                                        /*ShowColors=*/true));
-  FactManager FactMgr;
-  FactGenerator FactGen(FactMgr, AC);
+
+  FactGenerator FactGen(*FactMgr, AC);
   FactGen.run();
-  DEBUG_WITH_TYPE("LifetimeFacts", FactMgr.dump(Cfg, AC));
+  DEBUG_WITH_TYPE("LifetimeFacts", FactMgr->dump(Cfg, AC));
 
   /// TODO(opt): Consider optimizing individual blocks before running the
   /// dataflow analysis.
@@ -805,9 +950,65 @@ void runLifetimeSafetyAnalysis(const DeclContext &DC, const CFG &Cfg,
   ///    blocks; only Decls are visible.  Therefore, loans in a block that
   ///    never reach an Origin associated with a Decl can be safely dropped by
   ///    the analysis.
-  LifetimeFactory Factory;
-  LoanPropagationAnalysis LoanPropagation(Cfg, AC, FactMgr, Factory);
-  LoanPropagation.run();
-  DEBUG_WITH_TYPE("LifetimeLoanPropagation", LoanPropagation.dump());
+  LoanPropagation =
+      std::make_unique<LoanPropagationAnalysis>(Cfg, AC, *FactMgr, *Factory);
+  LoanPropagation->run();
+
+  ExpiredLoans =
+      std::make_unique<ExpiredLoansAnalysis>(Cfg, AC, *FactMgr, *Factory);
+  ExpiredLoans->run();
+}
+
+LoanSet LifetimeSafetyAnalysis::getLoansAtPoint(OriginID OID,
+                                                ProgramPoint PP) const {
+  assert(LoanPropagation && "Analysis has not been run.");
+  return LoanPropagation->getLoans(OID, PP);
+}
+
+LoanSet LifetimeSafetyAnalysis::getExpiredLoansAtPoint(ProgramPoint PP) const {
+  assert(ExpiredLoans && "ExpiredLoansAnalysis has not been run.");
+  return ExpiredLoans->getState(PP).Expired;
+}
+
+std::optional<OriginID>
+LifetimeSafetyAnalysis::getOriginIDForDecl(const ValueDecl *D) const {
+  assert(FactMgr && "FactManager not initialized");
+  // This assumes the OriginManager's `get` can find an existing origin.
+  // We might need a `find` method on OriginManager to avoid `getOrCreate` logic
+  // in a const-query context if that becomes an issue.
+  return FactMgr->getOriginMgr().get(*D);
+}
+
+std::vector<LoanID>
+LifetimeSafetyAnalysis::getLoanIDForVar(const VarDecl *VD) const {
+  assert(FactMgr && "FactManager not initialized");
+  std::vector<LoanID> Result;
+  for (const Loan &L : FactMgr->getLoanMgr().getLoans())
+    if (L.Path.D == VD)
+      Result.push_back(L.ID);
+  return Result;
+}
+
+llvm::StringMap<ProgramPoint> LifetimeSafetyAnalysis::getTestPoints() const {
+  assert(FactMgr && "FactManager not initialized");
+  llvm::StringMap<ProgramPoint> AnnotationToPointMap;
+  for (const CFGBlock *Block : *AC.getCFG()) {
+    for (const Fact *F : FactMgr->getFacts(Block)) {
+      if (const auto *TPF = F->getAs<TestPointFact>()) {
+        StringRef PointName = TPF->getAnnotation();
+        assert(AnnotationToPointMap.find(PointName) ==
+                   AnnotationToPointMap.end() &&
+               "more than one test points with the same name");
+        AnnotationToPointMap[PointName] = F;
+      }
+    }
+  }
+  return AnnotationToPointMap;
+}
+} // namespace internal
+
+void runLifetimeSafetyAnalysis(AnalysisDeclContext &AC) {
+  internal::LifetimeSafetyAnalysis Analysis(AC);
+  Analysis.run();
 }
-} // namespace clang
+} // namespace clang::lifetimes
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index ac47b12..40dff7e 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -25,6 +25,7 @@
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Lex/Lexer.h"
 #include "clang/Lex/Preprocessor.h"
+#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/STLFunctionalExtras.h"
 #include "llvm/ADT/SmallSet.h"
@@ -809,28 +810,86 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg,
     const CallExpr *Call;
     unsigned FmtArgIdx;
     const Expr *&UnsafeArg;
+    ASTContext &Ctx;
+
+    // Returns an `Expr` representing the precision if specified, null
+    // otherwise.
+    // The parameter `Call` is a printf call and the parameter `Precision` is
+    // the precision of a format specifier of the `Call`.
+    //
+    // For example, for the `printf("%d, %.10s", 10, p)` call
+    // `Precision` can be the precision of either "%d" or "%.10s". The former
+    // one will have `NotSpecified` kind.
+    const Expr *
+    getPrecisionAsExpr(const analyze_printf::OptionalAmount &Precision,
+                       const CallExpr *Call) {
+      unsigned PArgIdx = -1;
+
+      if (Precision.hasDataArgument())
+        PArgIdx = Precision.getPositionalArgIndex() + FmtArgIdx;
+      if (0 < PArgIdx && PArgIdx < Call->getNumArgs()) {
+        const Expr *PArg = Call->getArg(PArgIdx);
+
+        // Strip the cast if `PArg` is a cast-to-int expression:
+        if (auto *CE = dyn_cast<CastExpr>(PArg);
+            CE && CE->getType()->isSignedIntegerType())
+          PArg = CE->getSubExpr();
+        return PArg;
+      }
+      if (Precision.getHowSpecified() ==
+          analyze_printf::OptionalAmount::HowSpecified::Constant) {
+        auto SizeTy = Ctx.getSizeType();
+        llvm::APSInt PArgVal = llvm::APSInt(
+            llvm::APInt(Ctx.getTypeSize(SizeTy), Precision.getConstantAmount()),
+            true);
+
+        return IntegerLiteral::Create(Ctx, PArgVal, Ctx.getSizeType(), {});
+      }
+      return nullptr;
+    }
 
   public:
     StringFormatStringHandler(const CallExpr *Call, unsigned FmtArgIdx,
-                              const Expr *&UnsafeArg)
-        : Call(Call), FmtArgIdx(FmtArgIdx), UnsafeArg(UnsafeArg) {}
+                              const Expr *&UnsafeArg, ASTContext &Ctx)
+        : Call(Call), FmtArgIdx(FmtArgIdx), UnsafeArg(UnsafeArg), Ctx(Ctx) {}
 
     bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
                                const char *startSpecifier,
                                unsigned specifierLen,
                                const TargetInfo &Target) override {
-      if (FS.getConversionSpecifier().getKind() ==
-          analyze_printf::PrintfConversionSpecifier::sArg) {
-        unsigned ArgIdx = FS.getPositionalArgIndex() + FmtArgIdx;
-
-        if (0 < ArgIdx && ArgIdx < Call->getNumArgs())
-          if (!isNullTermPointer(Call->getArg(ArgIdx))) {
-            UnsafeArg = Call->getArg(ArgIdx); // output
-            // returning false stops parsing immediately
-            return false;
-          }
-      }
-      return true; // continue parsing
+      if (FS.getConversionSpecifier().getKind() !=
+          analyze_printf::PrintfConversionSpecifier::sArg)
+        return true; // continue parsing
+
+      unsigned ArgIdx = FS.getPositionalArgIndex() + FmtArgIdx;
+
+      if (!(0 < ArgIdx && ArgIdx < Call->getNumArgs()))
+        // If the `ArgIdx` is invalid, give up.
+        return true; // continue parsing
+
+      const Expr *Arg = Call->getArg(ArgIdx);
+
+      if (isNullTermPointer(Arg))
+        // If Arg is a null-terminated pointer, it is safe anyway.
+        return true; // continue parsing
+
+      // Otherwise, check if the specifier has a precision and if the character
+      // pointer is safely bound by the precision:
+      auto LengthModifier = FS.getLengthModifier();
+      QualType ArgType = Arg->getType();
+      bool IsArgTypeValid = // Is ArgType a character pointer type?
+          ArgType->isPointerType() &&
+          (LengthModifier.getKind() == LengthModifier.AsWideChar
+               ? ArgType->getPointeeType()->isWideCharType()
+               : ArgType->getPointeeType()->isCharType());
+
+      if (auto *Precision = getPrecisionAsExpr(FS.getPrecision(), Call);
+          Precision && IsArgTypeValid)
+        if (isPtrBufferSafe(Arg, Precision, Ctx))
+          return true;
+      // Handle unsafe case:
+      UnsafeArg = Call->getArg(ArgIdx); // output
+      return false; // returning false stops parsing immediately
     }
   };
 
@@ -846,7 +905,7 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg,
     else
       goto CHECK_UNSAFE_PTR;
 
-    StringFormatStringHandler Handler(Call, FmtArgIdx, UnsafeArg);
+    StringFormatStringHandler Handler(Call, FmtArgIdx, UnsafeArg, Ctx);
 
     return analyze_format_string::ParsePrintfString(
         Handler, FmtStr.begin(), FmtStr.end(), Ctx.getLangOpts(),
diff --git a/clang/lib/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandling.cpp b/clang/lib/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandling.cpp
index aacb886..518f9e7 100644
--- a/clang/lib/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandling.cpp
+++ b/clang/lib/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandling.cpp
@@ -2,6 +2,9 @@
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
 #include "clang/StaticAnalyzer/Frontend/CheckerRegistry.h"
 
+// This barebones plugin is used by clang/test/Analysis/checker-plugins.c
+// to test dependency handling among checkers loaded from plugins.
+
 using namespace clang;
 using namespace ento;
 
@@ -15,12 +18,12 @@ struct DependendentChecker : public Checker<check::BeginFunction> {
 } // end anonymous namespace
 
 // Register plugin!
-extern "C" void clang_registerCheckers(CheckerRegistry &registry) {
-  registry.addChecker<Dependency>("example.Dependency", "", "");
-  registry.addChecker<DependendentChecker>("example.DependendentChecker", "",
-                                           "");
+extern "C" void clang_registerCheckers(CheckerRegistry &Registry) {
+  Registry.addChecker<Dependency>("example.Dependency", "MockDescription");
+  Registry.addChecker<DependendentChecker>("example.DependendentChecker",
+                                           "MockDescription");
 
-  registry.addDependency("example.DependendentChecker", "example.Dependency");
+  Registry.addDependency("example.DependendentChecker", "example.Dependency");
 }
 
 extern "C" const char clang_analyzerAPIVersionString[] =
diff --git a/clang/lib/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandling.cpp b/clang/lib/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandling.cpp
index 82c1058..2adb934 100644
--- a/clang/lib/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandling.cpp
+++ b/clang/lib/Analysis/plugins/CheckerOptionHandling/CheckerOptionHandling.cpp
@@ -5,6 +5,9 @@
 using namespace clang;
 using namespace ento;
 
+// This barebones plugin is used by clang/test/Analysis/checker-plugins.c
+// to test option handling on checkers loaded from plugins.
+
 namespace {
 struct MyChecker : public Checker<check::BeginFunction> {
   void checkBeginFunction(CheckerContext &Ctx) const {}
@@ -25,13 +28,11 @@ bool shouldRegisterMyChecker(const CheckerManager &mgr) { return true; }
 } // end anonymous namespace
 
 // Register plugin!
-extern "C" void clang_registerCheckers(CheckerRegistry &registry) {
-  registry.addChecker(registerMyChecker, shouldRegisterMyChecker,
-                      "example.MyChecker", "Example Description",
-                      "example.mychecker.documentation.nonexistent.html",
-                      /*isHidden*/false);
+extern "C" void clang_registerCheckers(CheckerRegistry &Registry) {
+  Registry.addChecker(registerMyChecker, shouldRegisterMyChecker,
+                      "example.MyChecker", "Example Description");
 
-  registry.addCheckerOption(/*OptionType*/ "bool",
+  Registry.addCheckerOption(/*OptionType*/ "bool",
                             /*CheckerFullName*/ "example.MyChecker",
                             /*OptionName*/ "ExampleOption",
                             /*DefaultValStr*/ "false",
diff --git a/clang/lib/Analysis/plugins/SampleAnalyzer/MainCallChecker.cpp b/clang/lib/Analysis/plugins/SampleAnalyzer/MainCallChecker.cpp
index fd210d7..53a01d2 100644
--- a/clang/lib/Analysis/plugins/SampleAnalyzer/MainCallChecker.cpp
+++ b/clang/lib/Analysis/plugins/SampleAnalyzer/MainCallChecker.cpp
@@ -3,12 +3,16 @@
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
 #include "clang/StaticAnalyzer/Frontend/CheckerRegistry.h"
 
+// This simple plugin is used by clang/test/Analysis/checker-plugins.c
+// to test the use of a checker that is defined in a plugin.
+
 using namespace clang;
 using namespace ento;
 
 namespace {
 class MainCallChecker : public Checker<check::PreStmt<CallExpr>> {
-  mutable std::unique_ptr<BugType> BT;
+
+  const BugType BT{this, "call to main", "example analyzer plugin"};
 
 public:
   void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
@@ -33,21 +37,17 @@ void MainCallChecker::checkPreStmt(const CallExpr *CE,
     if (!N)
       return;
 
-    if (!BT)
-      BT.reset(new BugType(this, "call to main", "example analyzer plugin"));
-
     auto report =
-        std::make_unique<PathSensitiveBugReport>(*BT, BT->getDescription(), N);
+        std::make_unique<PathSensitiveBugReport>(BT, BT.getDescription(), N);
     report->addRange(Callee->getSourceRange());
     C.emitReport(std::move(report));
   }
 }
 
 // Register plugin!
-extern "C" void clang_registerCheckers(CheckerRegistry &registry) {
-  registry.addChecker<MainCallChecker>(
-      "example.MainCallChecker", "Disallows calls to functions called main",
-      "");
+extern "C" void clang_registerCheckers(CheckerRegistry &Registry) {
+  Registry.addChecker<MainCallChecker>("example.MainCallChecker",
+                                       "Example Description");
 }
 
 extern "C" const char clang_analyzerAPIVersionString[] =
diff --git a/clang/lib/Basic/FileManager.cpp b/clang/lib/Basic/FileManager.cpp
index fc4ec78..7481e1e 100644
--- a/clang/lib/Basic/FileManager.cpp
+++ b/clang/lib/Basic/FileManager.cpp
@@ -368,11 +368,6 @@ void FileManager::trackVFSUsage(bool Active) {
   });
 }
 
-const FileEntry *FileManager::getVirtualFile(StringRef Filename, off_t Size,
-                                             time_t ModificationTime) {
-  return &getVirtualFileRef(Filename, Size, ModificationTime).getFileEntry();
-}
-
 FileEntryRef FileManager::getVirtualFileRef(StringRef Filename, off_t Size,
                                             time_t ModificationTime) {
   ++NumFileLookups;
diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp
index b2b1488..5b8444a 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -2366,18 +2366,16 @@ size_t SourceManager::getDataStructureSizes() const {
 
 SourceManagerForFile::SourceManagerForFile(StringRef FileName,
                                            StringRef Content) {
-  // This is referenced by `FileMgr` and will be released by `FileMgr` when it
-  // is deleted.
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   InMemoryFileSystem->addFile(
       FileName, 0,
       llvm::MemoryBuffer::getMemBuffer(Content, FileName,
                                        /*RequiresNullTerminator=*/false));
   // This is passed to `SM` as reference, so the pointer has to be referenced
   // in `Environment` so that `FileMgr` can out-live this function scope.
-  FileMgr =
-      std::make_unique<FileManager>(FileSystemOptions(), InMemoryFileSystem);
+  FileMgr = std::make_unique<FileManager>(FileSystemOptions(),
+                                          std::move(InMemoryFileSystem));
   DiagOpts = std::make_unique<DiagnosticOptions>();
   // This is passed to `SM` as reference, so the pointer has to be referenced
   // by `Environment` due to the same reason above.
diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp
index 09b6a1f..21fc084 100644
--- a/clang/lib/Basic/TargetInfo.cpp
+++ b/clang/lib/Basic/TargetInfo.cpp
@@ -172,7 +172,7 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : Triple(T) {
   ComplexLongDoubleUsesFP2Ret = false;
 
   // Set the C++ ABI based on the triple.
-  TheCXXABI.set(Triple.isKnownWindowsMSVCEnvironment()
+  TheCXXABI.set(Triple.isKnownWindowsMSVCEnvironment() || Triple.isUEFI()
                     ? TargetCXXABI::Microsoft
                     : TargetCXXABI::GenericItanium);
 
diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp
index 5c2af9b..e3f9760 100644
--- a/clang/lib/Basic/Targets.cpp
+++ b/clang/lib/Basic/Targets.cpp
@@ -757,6 +757,9 @@ std::unique_ptr<TargetInfo> AllocateTarget(const llvm::Triple &Triple,
     case llvm::Triple::FreeBSD:
       return std::make_unique<FreeBSDTargetInfo<LoongArch64TargetInfo>>(Triple,
                                                                         Opts);
+    case llvm::Triple::OpenBSD:
+      return std::make_unique<OpenBSDTargetInfo<LoongArch64TargetInfo>>(Triple,
+                                                                        Opts);
     default:
       return std::make_unique<LoongArch64TargetInfo>(Triple, Opts);
     }
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index 72d2e5f..2b023e5 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -786,7 +786,8 @@ AArch64TargetInfo::getVScaleRange(const LangOptions &LangOpts,
   return std::nullopt;
 }
 
-uint64_t AArch64TargetInfo::getFMVPriority(ArrayRef<StringRef> Features) const {
+llvm::APInt
+AArch64TargetInfo::getFMVPriority(ArrayRef<StringRef> Features) const {
   return llvm::AArch64::getFMVPriority(Features);
 }
 
diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h
index f4277e9..dfd89be 100644
--- a/clang/lib/Basic/Targets/AArch64.h
+++ b/clang/lib/Basic/Targets/AArch64.h
@@ -152,7 +152,7 @@ public:
   void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override;
   bool setCPU(const std::string &Name) override;
 
-  uint64_t getFMVPriority(ArrayRef<StringRef> Features) const override;
+  llvm::APInt getFMVPriority(ArrayRef<StringRef> Features) const override;
 
   bool useFP16ConversionIntrinsics() const override {
     return false;
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp
index cebcfa3..52cbdbc 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -266,8 +266,11 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
 
   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
   CUMode = !(GPUFeatures & llvm::AMDGPU::FEATURE_WGP);
-  for (auto F : {"image-insts", "gws", "vmem-to-lds-load-insts"})
-    ReadOnlyFeatures.insert(F);
+
+  for (auto F : {"image-insts", "gws", "vmem-to-lds-load-insts"}) {
+    if (GPUKind != llvm::AMDGPU::GK_NONE)
+      ReadOnlyFeatures.insert(F);
+  }
   HalfArgsAndReturns = true;
 }
 
diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp
index 7ff8e51..6bec2fa 100644
--- a/clang/lib/Basic/Targets/ARM.cpp
+++ b/clang/lib/Basic/Targets/ARM.cpp
@@ -618,19 +618,21 @@ bool ARMTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       LDREX = 0;
     else if (ArchKind == llvm::ARM::ArchKind::ARMV6K ||
              ArchKind == llvm::ARM::ArchKind::ARMV6KZ)
-      LDREX = LDREX_D | LDREX_W | LDREX_H | LDREX_B;
+      LDREX = ARM_LDREX_D | ARM_LDREX_W | ARM_LDREX_H | ARM_LDREX_B;
     else
-      LDREX = LDREX_W;
+      LDREX = ARM_LDREX_W;
     break;
   case 7:
+  case 8:
     if (ArchProfile == llvm::ARM::ProfileKind::M)
-      LDREX = LDREX_W | LDREX_H | LDREX_B;
+      LDREX = ARM_LDREX_W | ARM_LDREX_H | ARM_LDREX_B;
     else
-      LDREX = LDREX_D | LDREX_W | LDREX_H | LDREX_B;
+      LDREX = ARM_LDREX_D | ARM_LDREX_W | ARM_LDREX_H | ARM_LDREX_B;
     break;
-  case 8:
   case 9:
-    LDREX = LDREX_D | LDREX_W | LDREX_H | LDREX_B;
+    assert(ArchProfile != llvm::ARM::ProfileKind::M &&
+           "No Armv9-M architectures defined");
+    LDREX = ARM_LDREX_D | ARM_LDREX_W | ARM_LDREX_H | ARM_LDREX_B;
   }
 
   if (!(FPU & NeonFPU) && FPMath == FP_Neon) {
diff --git a/clang/lib/Basic/Targets/ARM.h b/clang/lib/Basic/Targets/ARM.h
index 1719217..43c4718 100644
--- a/clang/lib/Basic/Targets/ARM.h
+++ b/clang/lib/Basic/Targets/ARM.h
@@ -98,13 +98,6 @@ class LLVM_LIBRARY_VISIBILITY ARMTargetInfo : public TargetInfo {
   LLVM_PREFERRED_TYPE(bool)
   unsigned HasBTI : 1;
 
-  enum {
-    LDREX_B = (1 << 0), /// byte (8-bit)
-    LDREX_H = (1 << 1), /// half (16-bit)
-    LDREX_W = (1 << 2), /// word (32-bit)
-    LDREX_D = (1 << 3), /// double (64-bit)
-  };
-
   uint32_t LDREX;
 
   // ACLE 6.5.1 Hardware floating point
@@ -225,6 +218,8 @@ public:
 
   bool hasBitIntType() const override { return true; }
 
+  unsigned getARMLDREXMask() const override { return LDREX; }
+
   const char *getBFloat16Mangling() const override { return "u6__bf16"; };
 
   std::pair<unsigned, unsigned> hardwareInterferenceSizes() const override {
diff --git a/clang/lib/Basic/Targets/Mips.h b/clang/lib/Basic/Targets/Mips.h
index 35501ed..e199df3 100644
--- a/clang/lib/Basic/Targets/Mips.h
+++ b/clang/lib/Basic/Targets/Mips.h
@@ -129,7 +129,7 @@ public:
     LongWidth = LongAlign = 32;
     MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 32;
     PointerWidth = PointerAlign = 32;
-    PtrDiffType = SignedInt;
+    PtrDiffType = IntPtrType = SignedInt;
     SizeType = UnsignedInt;
     SuitableAlign = 64;
   }
@@ -155,7 +155,7 @@ public:
     IntMaxType = Int64Type;
     LongWidth = LongAlign = 64;
     PointerWidth = PointerAlign = 64;
-    PtrDiffType = SignedLong;
+    PtrDiffType = IntPtrType = SignedLong;
     SizeType = UnsignedLong;
   }
 
@@ -165,7 +165,7 @@ public:
     IntMaxType = Int64Type;
     LongWidth = LongAlign = 32;
     PointerWidth = PointerAlign = 32;
-    PtrDiffType = SignedInt;
+    PtrDiffType = IntPtrType = SignedInt;
     SizeType = UnsignedInt;
   }
 
diff --git a/clang/lib/Basic/Targets/OSTargets.h b/clang/lib/Basic/Targets/OSTargets.h
index 42cff65..94b018a 100644
--- a/clang/lib/Basic/Targets/OSTargets.h
+++ b/clang/lib/Basic/Targets/OSTargets.h
@@ -496,6 +496,7 @@ public:
     case llvm::Triple::sparcv9:
       this->MCountName = "_mcount";
       break;
+    case llvm::Triple::loongarch64:
     case llvm::Triple::riscv64:
       break;
     }
diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp
index 8a28c07..a6a5ec4 100644
--- a/clang/lib/Basic/Targets/RISCV.cpp
+++ b/clang/lib/Basic/Targets/RISCV.cpp
@@ -568,7 +568,8 @@ ParsedTargetAttr RISCVTargetInfo::parseTargetAttr(StringRef Features) const {
   return Ret;
 }
 
-uint64_t RISCVTargetInfo::getFMVPriority(ArrayRef<StringRef> Features) const {
+llvm::APInt
+RISCVTargetInfo::getFMVPriority(ArrayRef<StringRef> Features) const {
   // Priority is explicitly specified on RISC-V unlike on other targets, where
   // it is derived by all the features of a specific version. Therefore if a
   // feature contains the priority string, then return it immediately.
@@ -580,12 +581,12 @@ uint64_t RISCVTargetInfo::getFMVPriority(ArrayRef<StringRef> Features) const {
       Feature = RHS;
     else
       continue;
-    uint64_t Priority;
+    unsigned Priority;
     if (!Feature.getAsInteger(0, Priority))
-      return Priority;
+      return llvm::APInt(32, Priority);
   }
   // Default Priority is zero.
-  return 0;
+  return llvm::APInt::getZero(32);
 }
 
 TargetInfo::CallingConvCheckResult
diff --git a/clang/lib/Basic/Targets/RISCV.h b/clang/lib/Basic/Targets/RISCV.h
index 8d629ab..58bfad1 100644
--- a/clang/lib/Basic/Targets/RISCV.h
+++ b/clang/lib/Basic/Targets/RISCV.h
@@ -123,7 +123,7 @@ public:
   void fillValidTuneCPUList(SmallVectorImpl<StringRef> &Values) const override;
   bool supportsTargetAttributeTune() const override { return true; }
   ParsedTargetAttr parseTargetAttr(StringRef Str) const override;
-  uint64_t getFMVPriority(ArrayRef<StringRef> Features) const override;
+  llvm::APInt getFMVPriority(ArrayRef<StringRef> Features) const override;
 
   std::pair<unsigned, unsigned> hardwareInterferenceSizes() const override {
     return std::make_pair(32, 32);
diff --git a/clang/lib/Basic/Targets/WebAssembly.cpp b/clang/lib/Basic/Targets/WebAssembly.cpp
index af25d25..55ffe1d 100644
--- a/clang/lib/Basic/Targets/WebAssembly.cpp
+++ b/clang/lib/Basic/Targets/WebAssembly.cpp
@@ -59,6 +59,7 @@ bool WebAssemblyTargetInfo::hasFeature(StringRef Feature) const {
       .Case("exception-handling", HasExceptionHandling)
       .Case("extended-const", HasExtendedConst)
       .Case("fp16", HasFP16)
+      .Case("gc", HasGC)
       .Case("multimemory", HasMultiMemory)
       .Case("multivalue", HasMultivalue)
       .Case("mutable-globals", HasMutableGlobals)
@@ -98,6 +99,8 @@ void WebAssemblyTargetInfo::getTargetDefines(const LangOptions &Opts,
     Builder.defineMacro("__wasm_multimemory__");
   if (HasFP16)
     Builder.defineMacro("__wasm_fp16__");
+  if (HasGC)
+    Builder.defineMacro("__wasm_gc__");
   if (HasMultivalue)
     Builder.defineMacro("__wasm_multivalue__");
   if (HasMutableGlobals)
@@ -191,6 +194,7 @@ bool WebAssemblyTargetInfo::initFeatureMap(
     Features["exception-handling"] = true;
     Features["extended-const"] = true;
     Features["fp16"] = true;
+    Features["gc"] = true;
     Features["multimemory"] = true;
     Features["tail-call"] = true;
     Features["wide-arithmetic"] = true;
@@ -267,6 +271,14 @@ bool WebAssemblyTargetInfo::handleTargetFeatures(
       HasFP16 = false;
       continue;
     }
+    if (Feature == "+gc") {
+      HasGC = true;
+      continue;
+    }
+    if (Feature == "-gc") {
+      HasGC = false;
+      continue;
+    }
     if (Feature == "+multimemory") {
       HasMultiMemory = true;
       continue;
@@ -353,6 +365,11 @@ bool WebAssemblyTargetInfo::handleTargetFeatures(
     return false;
   }
 
+  // gc implies reference-types
+  if (HasGC) {
+    HasReferenceTypes = true;
+  }
+
   // bulk-memory-opt is a subset of bulk-memory.
   if (HasBulkMemory) {
     HasBulkMemoryOpt = true;
diff --git a/clang/lib/Basic/Targets/WebAssembly.h b/clang/lib/Basic/Targets/WebAssembly.h
index 57b366c..eba7422 100644
--- a/clang/lib/Basic/Targets/WebAssembly.h
+++ b/clang/lib/Basic/Targets/WebAssembly.h
@@ -64,6 +64,7 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyTargetInfo : public TargetInfo {
   bool HasExceptionHandling = false;
   bool HasExtendedConst = false;
   bool HasFP16 = false;
+  bool HasGC = false;
   bool HasMultiMemory = false;
   bool HasMultivalue = false;
   bool HasMutableGlobals = false;
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index a1f5aa2..24ecec2 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -1390,8 +1390,8 @@ static llvm::X86::ProcessorFeatures getFeature(StringRef Name) {
   // correct, so it asserts if the value is out of range.
 }
 
-uint64_t X86TargetInfo::getFMVPriority(ArrayRef<StringRef> Features) const {
-  auto getPriority = [](StringRef Feature) -> uint64_t {
+llvm::APInt X86TargetInfo::getFMVPriority(ArrayRef<StringRef> Features) const {
+  auto getPriority = [](StringRef Feature) -> unsigned {
     // Valid CPUs have a 'key feature' that compares just better than its key
     // feature.
     using namespace llvm::X86;
@@ -1405,11 +1405,11 @@ uint64_t X86TargetInfo::getFMVPriority(ArrayRef<StringRef> Features) const {
     return getFeaturePriority(getFeature(Feature)) << 1;
   };
 
-  uint64_t Priority = 0;
+  unsigned Priority = 0;
   for (StringRef Feature : Features)
     if (!Feature.empty())
       Priority = std::max(Priority, getPriority(Feature));
-  return Priority;
+  return llvm::APInt(32, Priority);
 }
 
 bool X86TargetInfo::validateCPUSpecificCPUDispatch(StringRef Name) const {
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index ebc59c9..eb15103 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -388,7 +388,7 @@ public:
     return CPU != llvm::X86::CK_None;
   }
 
-  uint64_t getFMVPriority(ArrayRef<StringRef> Features) const override;
+  llvm::APInt getFMVPriority(ArrayRef<StringRef> Features) const override;
 
   bool setFPMath(StringRef Name) override;
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index 61d1c54..9049a01 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -121,6 +121,13 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
     return RValue::get(nullptr);
   }
 
+  case Builtin::BI__builtin_assume_separate_storage: {
+    mlir::Value value0 = emitScalarExpr(e->getArg(0));
+    mlir::Value value1 = emitScalarExpr(e->getArg(1));
+    builder.create<cir::AssumeSepStorageOp>(loc, value0, value1);
+    return RValue::get(nullptr);
+  }
+
   case Builtin::BI__builtin_complex: {
     mlir::Value real = emitScalarExpr(e->getArg(0));
     mlir::Value imag = emitScalarExpr(e->getArg(1));
@@ -183,6 +190,11 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
     assert(!cir::MissingFeatures::builtinCheckKind());
     return emitBuiltinBitOp<cir::BitClzOp>(*this, e, /*poisonZero=*/true);
 
+  case Builtin::BI__builtin_ffs:
+  case Builtin::BI__builtin_ffsl:
+  case Builtin::BI__builtin_ffsll:
+    return emitBuiltinBitOp<cir::BitFfsOp>(*this, e);
+
   case Builtin::BI__builtin_parity:
   case Builtin::BI__builtin_parityl:
   case Builtin::BI__builtin_parityll:
diff --git a/clang/lib/CIR/CodeGen/CIRGenCXXABI.h b/clang/lib/CIR/CodeGen/CIRGenCXXABI.h
index eb079b8..5929568 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCXXABI.h
+++ b/clang/lib/CIR/CodeGen/CIRGenCXXABI.h
@@ -75,6 +75,11 @@ public:
   /// Emit dtor variants required by this ABI.
   virtual void emitCXXDestructors(const clang::CXXDestructorDecl *d) = 0;
 
+  virtual void emitDestructorCall(CIRGenFunction &cgf,
+                                  const CXXDestructorDecl *dd, CXXDtorType type,
+                                  bool forVirtualBase, bool delegating,
+                                  Address thisAddr, QualType thisTy) = 0;
+
   /// Returns true if the given destructor type should be emitted as a linkonce
   /// delegating thunk, regardless of whether the dtor is defined in this TU or
   /// not.
diff --git a/clang/lib/CIR/CodeGen/CIRGenCXXExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenCXXExpr.cpp
index 8da832d..67d8988 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCXXExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenCXXExpr.cpp
@@ -246,6 +246,29 @@ static void emitNewInitializer(CIRGenFunction &cgf, const CXXNewExpr *e,
   }
 }
 
+RValue CIRGenFunction::emitCXXDestructorCall(
+    GlobalDecl dtor, const CIRGenCallee &callee, mlir::Value thisVal,
+    QualType thisTy, mlir::Value implicitParam, QualType implicitParamTy,
+    const CallExpr *ce) {
+  const CXXMethodDecl *dtorDecl = cast<CXXMethodDecl>(dtor.getDecl());
+
+  assert(!thisTy.isNull());
+  assert(thisTy->getAsCXXRecordDecl() == dtorDecl->getParent() &&
+         "Pointer/Object mixup");
+
+  assert(!cir::MissingFeatures::addressSpace());
+
+  CallArgList args;
+  commonBuildCXXMemberOrOperatorCall(*this, dtorDecl, thisVal, implicitParam,
+                                     implicitParamTy, ce, args, nullptr);
+  assert((ce || dtor.getDecl()) && "expected source location provider");
+  assert(!cir::MissingFeatures::opCallMustTail());
+  return emitCall(cgm.getTypes().arrangeCXXStructorDeclaration(dtor), callee,
+                  ReturnValueSlot(), args, nullptr,
+                  ce ? getLoc(ce->getExprLoc())
+                     : getLoc(dtor.getDecl()->getSourceRange()));
+}
+
 /// Emit a call to an operator new or operator delete function, as implicitly
 /// created by new-expressions and delete-expressions.
 static RValue emitNewDeleteCall(CIRGenFunction &cgf,
diff --git a/clang/lib/CIR/CodeGen/CIRGenCall.cpp b/clang/lib/CIR/CodeGen/CIRGenCall.cpp
index 938d143..fc208ff 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCall.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenCall.cpp
@@ -582,6 +582,14 @@ RValue CIRGenFunction::emitCall(const CIRGenFunctionInfo &funcInfo,
   cir::FuncOp directFuncOp;
   if (auto fnOp = dyn_cast<cir::FuncOp>(calleePtr)) {
     directFuncOp = fnOp;
+  } else if (auto getGlobalOp = mlir::dyn_cast<cir::GetGlobalOp>(calleePtr)) {
+    // FIXME(cir): This peephole optimization avoids indirect calls for
+    // builtins. This should be fixed in the builtin declaration instead by
+    // not emitting an unecessary get_global in the first place.
+    // However, this is also used for no-prototype functions.
+    mlir::Operation *globalOp = cgm.getGlobalValue(getGlobalOp.getName());
+    assert(globalOp && "undefined global function");
+    directFuncOp = mlir::cast<cir::FuncOp>(globalOp);
   } else {
     [[maybe_unused]] mlir::ValueTypeRange<mlir::ResultRange> resultTypes =
         calleePtr->getResultTypes();
diff --git a/clang/lib/CIR/CodeGen/CIRGenCall.h b/clang/lib/CIR/CodeGen/CIRGenCall.h
index bd11329..a78956b 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCall.h
+++ b/clang/lib/CIR/CodeGen/CIRGenCall.h
@@ -116,6 +116,11 @@ public:
     assert(isOrdinary());
     return reinterpret_cast<mlir::Operation *>(kindOrFunctionPtr);
   }
+
+  void setFunctionPointer(mlir::Operation *functionPtr) {
+    assert(isOrdinary());
+    kindOrFunctionPtr = SpecialKind(reinterpret_cast<uintptr_t>(functionPtr));
+  }
 };
 
 /// Type for representing both the decl and type of parameters to a function.
diff --git a/clang/lib/CIR/CodeGen/CIRGenClass.cpp b/clang/lib/CIR/CodeGen/CIRGenClass.cpp
index 8667bb6..50cca0e 100644
--- a/clang/lib/CIR/CodeGen/CIRGenClass.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenClass.cpp
@@ -12,6 +12,7 @@
 
 #include "CIRGenCXXABI.h"
 #include "CIRGenFunction.h"
+#include "CIRGenValue.h"
 
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/RecordLayout.h"
@@ -311,6 +312,116 @@ void CIRGenFunction::emitInitializerForField(FieldDecl *field, LValue lhs,
   assert(!cir::MissingFeatures::requiresCleanups());
 }
 
+/// Emit a loop to call a particular constructor for each of several members
+/// of an array.
+///
+/// \param ctor the constructor to call for each element
+/// \param arrayType the type of the array to initialize
+/// \param arrayBegin an arrayType*
+/// \param zeroInitialize true if each element should be
+///   zero-initialized before it is constructed
+void CIRGenFunction::emitCXXAggrConstructorCall(
+    const CXXConstructorDecl *ctor, const clang::ArrayType *arrayType,
+    Address arrayBegin, const CXXConstructExpr *e, bool newPointerIsChecked,
+    bool zeroInitialize) {
+  QualType elementType;
+  mlir::Value numElements = emitArrayLength(arrayType, elementType, arrayBegin);
+  emitCXXAggrConstructorCall(ctor, numElements, arrayBegin, e,
+                             newPointerIsChecked, zeroInitialize);
+}
+
+/// Emit a loop to call a particular constructor for each of several members
+/// of an array.
+///
+/// \param ctor the constructor to call for each element
+/// \param numElements the number of elements in the array;
+///   may be zero
+/// \param arrayBase a T*, where T is the type constructed by ctor
+/// \param zeroInitialize true if each element should be
+///   zero-initialized before it is constructed
+void CIRGenFunction::emitCXXAggrConstructorCall(
+    const CXXConstructorDecl *ctor, mlir::Value numElements, Address arrayBase,
+    const CXXConstructExpr *e, bool newPointerIsChecked, bool zeroInitialize) {
+  // It's legal for numElements to be zero.  This can happen both
+  // dynamically, because x can be zero in 'new A[x]', and statically,
+  // because of GCC extensions that permit zero-length arrays.  There
+  // are probably legitimate places where we could assume that this
+  // doesn't happen, but it's not clear that it's worth it.
+
+  // Optimize for a constant count.
+  auto constantCount = dyn_cast<cir::ConstantOp>(numElements.getDefiningOp());
+  if (constantCount) {
+    auto constIntAttr = mlir::dyn_cast<cir::IntAttr>(constantCount.getValue());
+    // Just skip out if the constant count is zero.
+    if (constIntAttr && constIntAttr.getUInt() == 0)
+      return;
+  } else {
+    // Otherwise, emit the check.
+    cgm.errorNYI(e->getSourceRange(), "dynamic-length array expression");
+  }
+
+  auto arrayTy = mlir::cast<cir::ArrayType>(arrayBase.getElementType());
+  mlir::Type elementType = arrayTy.getElementType();
+  cir::PointerType ptrToElmType = builder.getPointerTo(elementType);
+
+  // Tradional LLVM codegen emits a loop here. CIR lowers to a loop as part of
+  // LoweringPrepare.
+
+  // The alignment of the base, adjusted by the size of a single element,
+  // provides a conservative estimate of the alignment of every element.
+  // (This assumes we never start tracking offsetted alignments.)
+  //
+  // Note that these are complete objects and so we don't need to
+  // use the non-virtual size or alignment.
+  QualType type = getContext().getTypeDeclType(ctor->getParent());
+  CharUnits eltAlignment = arrayBase.getAlignment().alignmentOfArrayElement(
+      getContext().getTypeSizeInChars(type));
+
+  // Zero initialize the storage, if requested.
+  if (zeroInitialize)
+    emitNullInitialization(*currSrcLoc, arrayBase, type);
+
+  // C++ [class.temporary]p4:
+  // There are two contexts in which temporaries are destroyed at a different
+  // point than the end of the full-expression. The first context is when a
+  // default constructor is called to initialize an element of an array.
+  // If the constructor has one or more default arguments, the destruction of
+  // every temporary created in a default argument expression is sequenced
+  // before the construction of the next array element, if any.
+  {
+    assert(!cir::MissingFeatures::runCleanupsScope());
+
+    // Evaluate the constructor and its arguments in a regular
+    // partial-destroy cleanup.
+    if (getLangOpts().Exceptions &&
+        !ctor->getParent()->hasTrivialDestructor()) {
+      cgm.errorNYI(e->getSourceRange(), "partial array cleanups");
+    }
+
+    // Emit the constructor call that will execute for every array element.
+    mlir::Value arrayOp =
+        builder.createPtrBitcast(arrayBase.getPointer(), arrayTy);
+    builder.create<cir::ArrayCtor>(
+        *currSrcLoc, arrayOp, [&](mlir::OpBuilder &b, mlir::Location loc) {
+          mlir::BlockArgument arg =
+              b.getInsertionBlock()->addArgument(ptrToElmType, loc);
+          Address curAddr = Address(arg, elementType, eltAlignment);
+          assert(!cir::MissingFeatures::sanitizers());
+          auto currAVS = AggValueSlot::forAddr(
+              curAddr, type.getQualifiers(), AggValueSlot::IsDestructed,
+              AggValueSlot::IsNotAliased, AggValueSlot::DoesNotOverlap,
+              AggValueSlot::IsNotZeroed);
+          emitCXXConstructorCall(ctor, Ctor_Complete,
+                                 /*ForVirtualBase=*/false,
+                                 /*Delegating=*/false, currAVS, e);
+          builder.create<cir::YieldOp>(loc);
+        });
+  }
+
+  if (constantCount.use_empty())
+    constantCount.erase();
+}
+
 void CIRGenFunction::emitDelegateCXXConstructorCall(
     const CXXConstructorDecl *ctor, CXXCtorType ctorType,
     const FunctionArgList &args, SourceLocation loc) {
@@ -369,6 +480,19 @@ void CIRGenFunction::emitImplicitAssignmentOperatorBody(FunctionArgList &args) {
                        s->getStmtClassName());
 }
 
+void CIRGenFunction::destroyCXXObject(CIRGenFunction &cgf, Address addr,
+                                      QualType type) {
+  const RecordType *rtype = type->castAs<RecordType>();
+  const CXXRecordDecl *record = cast<CXXRecordDecl>(rtype->getDecl());
+  const CXXDestructorDecl *dtor = record->getDestructor();
+  // TODO(cir): Unlike traditional codegen, CIRGen should actually emit trivial
+  // dtors which shall be removed on later CIR passes. However, only remove this
+  // assertion after we have a test case to exercise this path.
+  assert(!dtor->isTrivial());
+  cgf.emitCXXDestructorCall(dtor, Dtor_Complete, /*forVirtualBase*/ false,
+                            /*delegating=*/false, addr, type);
+}
+
 void CIRGenFunction::emitDelegatingCXXConstructorCall(
     const CXXConstructorDecl *ctor, const FunctionArgList &args) {
   assert(ctor->isDelegatingConstructor());
@@ -392,6 +516,14 @@ void CIRGenFunction::emitDelegatingCXXConstructorCall(
   }
 }
 
+void CIRGenFunction::emitCXXDestructorCall(const CXXDestructorDecl *dd,
+                                           CXXDtorType type,
+                                           bool forVirtualBase, bool delegating,
+                                           Address thisAddr, QualType thisTy) {
+  cgm.getCXXABI().emitDestructorCall(*this, dd, type, forVirtualBase,
+                                     delegating, thisAddr, thisTy);
+}
+
 Address CIRGenFunction::getAddressOfBaseClass(
     Address value, const CXXRecordDecl *derived,
     llvm::iterator_range<CastExpr::path_const_iterator> path,
diff --git a/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp b/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp
new file mode 100644
index 0000000..be21ce9
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp
@@ -0,0 +1,69 @@
+//===--- CIRGenCleanup.cpp - Bookkeeping and code emission for cleanups ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code dealing with the IR generation for cleanups
+// and related information.
+//
+// A "cleanup" is a piece of code which needs to be executed whenever
+// control transfers out of a particular scope.  This can be
+// conditionalized to occur only on exceptional control flow, only on
+// normal control flow, or both.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CIRGenFunction.h"
+
+#include "clang/CIR/MissingFeatures.h"
+
+using namespace clang;
+using namespace clang::CIRGen;
+
+//===----------------------------------------------------------------------===//
+// CIRGenFunction cleanup related
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// EHScopeStack
+//===----------------------------------------------------------------------===//
+
+void EHScopeStack::Cleanup::anchor() {}
+
+static mlir::Block *getCurCleanupBlock(CIRGenFunction &cgf) {
+  mlir::OpBuilder::InsertionGuard guard(cgf.getBuilder());
+  mlir::Block *cleanup =
+      cgf.curLexScope->getOrCreateCleanupBlock(cgf.getBuilder());
+  return cleanup;
+}
+
+/// Pops a cleanup block. If the block includes a normal cleanup, the
+/// current insertion point is threaded through the cleanup, as are
+/// any branch fixups on the cleanup.
+void CIRGenFunction::popCleanupBlock() {
+  assert(!ehStack.cleanupStack.empty() && "cleanup stack is empty!");
+  mlir::OpBuilder::InsertionGuard guard(builder);
+  std::unique_ptr<EHScopeStack::Cleanup> cleanup =
+      ehStack.cleanupStack.pop_back_val();
+
+  assert(!cir::MissingFeatures::ehCleanupFlags());
+  mlir::Block *cleanupEntry = getCurCleanupBlock(*this);
+  builder.setInsertionPointToEnd(cleanupEntry);
+  cleanup->emit(*this);
+}
+
+/// Pops cleanup blocks until the given savepoint is reached.
+void CIRGenFunction::popCleanupBlocks(size_t oldCleanupStackDepth) {
+  assert(!cir::MissingFeatures::ehstackBranches());
+
+  assert(ehStack.getStackDepth() >= oldCleanupStackDepth);
+
+  // Pop cleanup blocks until we reach the base stack depth for the
+  // current scope.
+  while (ehStack.getStackDepth() > oldCleanupStackDepth) {
+    popCleanupBlock();
+  }
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
index afbe92a..9e8eaa5 100644
--- a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
@@ -183,8 +183,8 @@ void CIRGenFunction::emitAutoVarCleanups(
   const VarDecl &d = *emission.Variable;
 
   // Check the type for a cleanup.
-  if (d.needsDestruction(getContext()))
-    cgm.errorNYI(d.getSourceRange(), "emitAutoVarCleanups: type cleanup");
+  if (QualType::DestructionKind dtorKind = d.needsDestruction(getContext()))
+    emitAutoVarTypeCleanup(emission, dtorKind);
 
   assert(!cir::MissingFeatures::opAllocaPreciseLifetime());
 
@@ -520,7 +520,7 @@ void CIRGenFunction::emitExprAsInit(const Expr *init, const ValueDecl *d,
   llvm_unreachable("bad evaluation kind");
 }
 
-void CIRGenFunction::emitDecl(const Decl &d) {
+void CIRGenFunction::emitDecl(const Decl &d, bool evaluateConditionDecl) {
   switch (d.getKind()) {
   case Decl::BuiltinTemplate:
   case Decl::TranslationUnit:
@@ -608,11 +608,14 @@ void CIRGenFunction::emitDecl(const Decl &d) {
   case Decl::UsingDirective: // using namespace X; [C++]
     assert(!cir::MissingFeatures::generateDebugInfo());
     return;
-  case Decl::Var: {
+  case Decl::Var:
+  case Decl::Decomposition: {
     const VarDecl &vd = cast<VarDecl>(d);
     assert(vd.isLocalVarDecl() &&
            "Should not see file-scope variables inside a function!");
     emitVarDecl(vd);
+    if (evaluateConditionDecl)
+      maybeEmitDeferredVarDeclInit(&vd);
     return;
   }
   case Decl::OpenACCDeclare:
@@ -632,7 +635,6 @@ void CIRGenFunction::emitDecl(const Decl &d) {
   case Decl::ImplicitConceptSpecialization:
   case Decl::TopLevelStmt:
   case Decl::UsingPack:
-  case Decl::Decomposition: // This could be moved to join Decl::Var
   case Decl::OMPDeclareReduction:
   case Decl::OMPDeclareMapper:
     cgm.errorNYI(d.getSourceRange(),
@@ -648,3 +650,160 @@ void CIRGenFunction::emitNullabilityCheck(LValue lhs, mlir::Value rhs,
 
   assert(!cir::MissingFeatures::sanitizers());
 }
+
+/// Destroys all the elements of the given array, beginning from last to first.
+/// The array cannot be zero-length.
+///
+/// \param begin - a type* denoting the first element of the array
+/// \param end - a type* denoting one past the end of the array
+/// \param elementType - the element type of the array
+/// \param destroyer - the function to call to destroy elements
+void CIRGenFunction::emitArrayDestroy(mlir::Value begin, mlir::Value end,
+                                      QualType elementType,
+                                      CharUnits elementAlign,
+                                      Destroyer *destroyer) {
+  assert(!elementType->isArrayType());
+
+  // Differently from LLVM traditional codegen, use a higher level
+  // representation instead of lowering directly to a loop.
+  mlir::Type cirElementType = convertTypeForMem(elementType);
+  cir::PointerType ptrToElmType = builder.getPointerTo(cirElementType);
+
+  // Emit the dtor call that will execute for every array element.
+  cir::ArrayDtor::create(
+      builder, *currSrcLoc, begin, [&](mlir::OpBuilder &b, mlir::Location loc) {
+        auto arg = b.getInsertionBlock()->addArgument(ptrToElmType, loc);
+        Address curAddr = Address(arg, cirElementType, elementAlign);
+        assert(!cir::MissingFeatures::dtorCleanups());
+
+        // Perform the actual destruction there.
+        destroyer(*this, curAddr, elementType);
+
+        cir::YieldOp::create(builder, loc);
+      });
+}
+
+/// Immediately perform the destruction of the given object.
+///
+/// \param addr - the address of the object; a type*
+/// \param type - the type of the object; if an array type, all
+///   objects are destroyed in reverse order
+/// \param destroyer - the function to call to destroy individual
+///   elements
+void CIRGenFunction::emitDestroy(Address addr, QualType type,
+                                 Destroyer *destroyer) {
+  const ArrayType *arrayType = getContext().getAsArrayType(type);
+  if (!arrayType)
+    return destroyer(*this, addr, type);
+
+  mlir::Value length = emitArrayLength(arrayType, type, addr);
+
+  CharUnits elementAlign = addr.getAlignment().alignmentOfArrayElement(
+      getContext().getTypeSizeInChars(type));
+
+  auto constantCount = length.getDefiningOp<cir::ConstantOp>();
+  if (!constantCount) {
+    assert(!cir::MissingFeatures::vlas());
+    cgm.errorNYI("emitDestroy: variable length array");
+    return;
+  }
+
+  auto constIntAttr = mlir::dyn_cast<cir::IntAttr>(constantCount.getValue());
+  // If it's constant zero, we can just skip the entire thing.
+  if (constIntAttr && constIntAttr.getUInt() == 0)
+    return;
+
+  mlir::Value begin = addr.getPointer();
+  mlir::Value end; // This will be used for future non-constant counts.
+  emitArrayDestroy(begin, end, type, elementAlign, destroyer);
+
+  // If the array destroy didn't use the length op, we can erase it.
+  if (constantCount.use_empty())
+    constantCount.erase();
+}
+
+CIRGenFunction::Destroyer *
+CIRGenFunction::getDestroyer(QualType::DestructionKind kind) {
+  switch (kind) {
+  case QualType::DK_none:
+    llvm_unreachable("no destroyer for trivial dtor");
+  case QualType::DK_cxx_destructor:
+    return destroyCXXObject;
+  case QualType::DK_objc_strong_lifetime:
+  case QualType::DK_objc_weak_lifetime:
+  case QualType::DK_nontrivial_c_struct:
+    cgm.errorNYI("getDestroyer: other destruction kind");
+    return nullptr;
+  }
+  llvm_unreachable("Unknown DestructionKind");
+}
+
+namespace {
+struct DestroyObject final : EHScopeStack::Cleanup {
+  DestroyObject(Address addr, QualType type,
+                CIRGenFunction::Destroyer *destroyer)
+      : addr(addr), type(type), destroyer(destroyer) {}
+
+  Address addr;
+  QualType type;
+  CIRGenFunction::Destroyer *destroyer;
+
+  void emit(CIRGenFunction &cgf) override {
+    cgf.emitDestroy(addr, type, destroyer);
+  }
+};
+} // namespace
+
+/// Enter a destroy cleanup for the given local variable.
+void CIRGenFunction::emitAutoVarTypeCleanup(
+    const CIRGenFunction::AutoVarEmission &emission,
+    QualType::DestructionKind dtorKind) {
+  assert(dtorKind != QualType::DK_none);
+
+  // Note that for __block variables, we want to destroy the
+  // original stack object, not the possibly forwarded object.
+  Address addr = emission.getObjectAddress(*this);
+
+  const VarDecl *var = emission.Variable;
+  QualType type = var->getType();
+
+  CleanupKind cleanupKind = NormalAndEHCleanup;
+  CIRGenFunction::Destroyer *destroyer = nullptr;
+
+  switch (dtorKind) {
+  case QualType::DK_none:
+    llvm_unreachable("no cleanup for trivially-destructible variable");
+
+  case QualType::DK_cxx_destructor:
+    // If there's an NRVO flag on the emission, we need a different
+    // cleanup.
+    if (emission.NRVOFlag) {
+      cgm.errorNYI(var->getSourceRange(), "emitAutoVarTypeCleanup: NRVO");
+      return;
+    }
+    // Otherwise, this is handled below.
+    break;
+
+  case QualType::DK_objc_strong_lifetime:
+  case QualType::DK_objc_weak_lifetime:
+  case QualType::DK_nontrivial_c_struct:
+    cgm.errorNYI(var->getSourceRange(),
+                 "emitAutoVarTypeCleanup: other dtor kind");
+    return;
+  }
+
+  // If we haven't chosen a more specific destroyer, use the default.
+  if (!destroyer)
+    destroyer = getDestroyer(dtorKind);
+
+  assert(!cir::MissingFeatures::ehCleanupFlags());
+  ehStack.pushCleanup<DestroyObject>(cleanupKind, addr, type, destroyer);
+}
+
+void CIRGenFunction::maybeEmitDeferredVarDeclInit(const VarDecl *vd) {
+  if (auto *dd = dyn_cast_if_present<DecompositionDecl>(vd)) {
+    for (auto *b : dd->flat_bindings())
+      if (auto *hd = b->getHoldingVar())
+        emitVarDecl(*hd);
+  }
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
index 1f64801..a509ffa 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -584,6 +584,15 @@ LValue CIRGenFunction::emitDeclRefLValue(const DeclRefExpr *e) {
     return lv;
   }
 
+  if (const auto *bd = dyn_cast<BindingDecl>(nd)) {
+    if (e->refersToEnclosingVariableOrCapture()) {
+      assert(!cir::MissingFeatures::lambdaCaptures());
+      cgm.errorNYI(e->getSourceRange(), "emitDeclRefLValue: lambda captures");
+      return LValue();
+    }
+    return emitLValue(bd->getBinding());
+  }
+
   cgm.errorNYI(e->getSourceRange(), "emitDeclRefLValue: unhandled decl type");
   return LValue();
 }
@@ -949,7 +958,6 @@ LValue CIRGenFunction::emitCastLValue(const CastExpr *e) {
   case CK_Dynamic:
   case CK_ToUnion:
   case CK_BaseToDerived:
-  case CK_LValueBitCast:
   case CK_AddressSpaceConversion:
   case CK_ObjCObjectLValueCast:
   case CK_VectorSplat:
@@ -965,6 +973,18 @@ LValue CIRGenFunction::emitCastLValue(const CastExpr *e) {
     return {};
   }
 
+  case CK_LValueBitCast: {
+    // This must be a reinterpret_cast (or c-style equivalent).
+    const auto *ce = cast<ExplicitCastExpr>(e);
+
+    cgm.emitExplicitCastExprType(ce, this);
+    LValue LV = emitLValue(e->getSubExpr());
+    Address V = LV.getAddress().withElementType(
+        builder, convertTypeForMem(ce->getTypeAsWritten()->getPointeeType()));
+
+    return makeAddrLValue(V, e->getType(), LV.getBaseInfo());
+  }
+
   case CK_NoOp: {
     // CK_NoOp can model a qualification conversion, which can remove an array
     // bound and change the IR type.
@@ -1269,7 +1289,7 @@ RValue CIRGenFunction::getUndefRValue(QualType ty) {
 }
 
 RValue CIRGenFunction::emitCall(clang::QualType calleeTy,
-                                const CIRGenCallee &callee,
+                                const CIRGenCallee &origCallee,
                                 const clang::CallExpr *e,
                                 ReturnValueSlot returnValue) {
   // Get the actual function type. The callee type will always be a pointer to
@@ -1280,6 +1300,8 @@ RValue CIRGenFunction::emitCall(clang::QualType calleeTy,
   calleeTy = getContext().getCanonicalType(calleeTy);
   auto pointeeTy = cast<PointerType>(calleeTy)->getPointeeType();
 
+  CIRGenCallee callee = origCallee;
+
   if (getLangOpts().CPlusPlus)
     assert(!cir::MissingFeatures::sanitizers());
 
@@ -1296,7 +1318,44 @@ RValue CIRGenFunction::emitCall(clang::QualType calleeTy,
   const CIRGenFunctionInfo &funcInfo =
       cgm.getTypes().arrangeFreeFunctionCall(args, fnType);
 
-  assert(!cir::MissingFeatures::opCallNoPrototypeFunc());
+  // C99 6.5.2.2p6:
+  //   If the expression that denotes the called function has a type that does
+  //   not include a prototype, [the default argument promotions are performed].
+  //   If the number of arguments does not equal the number of parameters, the
+  //   behavior is undefined. If the function is defined with a type that
+  //   includes a prototype, and either the prototype ends with an ellipsis (,
+  //   ...) or the types of the arguments after promotion are not compatible
+  //   with the types of the parameters, the behavior is undefined. If the
+  //   function is defined with a type that does not include a prototype, and
+  //   the types of the arguments after promotion are not compatible with those
+  //   of the parameters after promotion, the behavior is undefined [except in
+  //   some trivial cases].
+  // That is, in the general case, we should assume that a call through an
+  // unprototyped function type works like a *non-variadic* call. The way we
+  // make this work is to cast to the exxact type fo the promoted arguments.
+  if (isa<FunctionNoProtoType>(fnType)) {
+    assert(!cir::MissingFeatures::opCallChain());
+    assert(!cir::MissingFeatures::addressSpace());
+    cir::FuncType calleeTy = getTypes().getFunctionType(funcInfo);
+    // get non-variadic function type
+    calleeTy = cir::FuncType::get(calleeTy.getInputs(),
+                                  calleeTy.getReturnType(), false);
+    auto calleePtrTy = cir::PointerType::get(calleeTy);
+
+    mlir::Operation *fn = callee.getFunctionPointer();
+    mlir::Value addr;
+    if (auto funcOp = mlir::dyn_cast<cir::FuncOp>(fn)) {
+      addr = builder.create<cir::GetGlobalOp>(
+          getLoc(e->getSourceRange()),
+          cir::PointerType::get(funcOp.getFunctionType()), funcOp.getSymName());
+    } else {
+      addr = fn->getResult(0);
+    }
+
+    fn = builder.createBitcast(addr, calleePtrTy).getDefiningOp();
+    callee.setFunctionPointer(fn);
+  }
+
   assert(!cir::MissingFeatures::opCallFnInfoOpts());
   assert(!cir::MissingFeatures::hip());
   assert(!cir::MissingFeatures::opCallMustTail());
@@ -1657,37 +1716,38 @@ void CIRGenFunction::emitCXXConstructExpr(const CXXConstructExpr *e,
     return;
   }
 
-  if (getContext().getAsArrayType(e->getType())) {
-    cgm.errorNYI(e->getSourceRange(), "emitCXXConstructExpr: array type");
-    return;
-  }
+  if (const ArrayType *arrayType = getContext().getAsArrayType(e->getType())) {
+    assert(!cir::MissingFeatures::sanitizers());
+    emitCXXAggrConstructorCall(cd, arrayType, dest.getAddress(), e, false);
+  } else {
 
-  clang::CXXCtorType type = Ctor_Complete;
-  bool forVirtualBase = false;
-  bool delegating = false;
-
-  switch (e->getConstructionKind()) {
-  case CXXConstructionKind::Complete:
-    type = Ctor_Complete;
-    break;
-  case CXXConstructionKind::Delegating:
-    // We should be emitting a constructor; GlobalDecl will assert this
-    type = curGD.getCtorType();
-    delegating = true;
-    break;
-  case CXXConstructionKind::VirtualBase:
-    // This should just set 'forVirtualBase' to true and fall through, but
-    // virtual base class support is otherwise missing, so this needs to wait
-    // until it can be tested.
-    cgm.errorNYI(e->getSourceRange(),
-                 "emitCXXConstructExpr: virtual base constructor");
-    return;
-  case CXXConstructionKind::NonVirtualBase:
-    type = Ctor_Base;
-    break;
-  }
+    clang::CXXCtorType type = Ctor_Complete;
+    bool forVirtualBase = false;
+    bool delegating = false;
+
+    switch (e->getConstructionKind()) {
+    case CXXConstructionKind::Complete:
+      type = Ctor_Complete;
+      break;
+    case CXXConstructionKind::Delegating:
+      // We should be emitting a constructor; GlobalDecl will assert this
+      type = curGD.getCtorType();
+      delegating = true;
+      break;
+    case CXXConstructionKind::VirtualBase:
+      // This should just set 'forVirtualBase' to true and fall through, but
+      // virtual base class support is otherwise missing, so this needs to wait
+      // until it can be tested.
+      cgm.errorNYI(e->getSourceRange(),
+                   "emitCXXConstructExpr: virtual base constructor");
+      return;
+    case CXXConstructionKind::NonVirtualBase:
+      type = Ctor_Base;
+      break;
+    }
 
-  emitCXXConstructorCall(cd, type, forVirtualBase, delegating, dest, e);
+    emitCXXConstructorCall(cd, type, forVirtualBase, delegating, dest, e);
+  }
 }
 
 RValue CIRGenFunction::emitReferenceBindingToExpr(const Expr *e) {
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp b/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp
index 0d12c5c..51aab95 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp
@@ -357,10 +357,97 @@ void AggExprEmitter::visitCXXParenListOrInitListExpr(
     emitArrayInit(dest.getAddress(), arrayTy, e->getType(), e, args,
                   arrayFiller);
     return;
+  } else if (e->getType()->isVariableArrayType()) {
+    cgf.cgm.errorNYI(e->getSourceRange(),
+                     "visitCXXParenListOrInitListExpr variable array type");
+    return;
+  }
+
+  if (e->getType()->isArrayType()) {
+    cgf.cgm.errorNYI(e->getSourceRange(),
+                     "visitCXXParenListOrInitListExpr array type");
+    return;
+  }
+
+  assert(e->getType()->isRecordType() && "Only support structs/unions here!");
+
+  // Do struct initialization; this code just sets each individual member
+  // to the approprate value.  This makes bitfield support automatic;
+  // the disadvantage is that the generated code is more difficult for
+  // the optimizer, especially with bitfields.
+  unsigned numInitElements = args.size();
+  RecordDecl *record = e->getType()->castAs<RecordType>()->getDecl();
+
+  // We'll need to enter cleanup scopes in case any of the element
+  // initializers throws an exception.
+  assert(!cir::MissingFeatures::requiresCleanups());
+
+  unsigned curInitIndex = 0;
+
+  // Emit initialization of base classes.
+  if (auto *cxxrd = dyn_cast<CXXRecordDecl>(record)) {
+    assert(numInitElements >= cxxrd->getNumBases() &&
+           "missing initializer for base class");
+    if (cxxrd->getNumBases() > 0) {
+      cgf.cgm.errorNYI(e->getSourceRange(),
+                       "visitCXXParenListOrInitListExpr base class init");
+      return;
+    }
+  }
+
+  LValue destLV = cgf.makeAddrLValue(dest.getAddress(), e->getType());
+
+  if (record->isUnion()) {
+    cgf.cgm.errorNYI(e->getSourceRange(),
+                     "visitCXXParenListOrInitListExpr union type");
+    return;
   }
 
-  cgf.cgm.errorNYI(
-      "visitCXXParenListOrInitListExpr Record or VariableSizeArray type");
+  // Here we iterate over the fields; this makes it simpler to both
+  // default-initialize fields and skip over unnamed fields.
+  for (const FieldDecl *field : record->fields()) {
+    // We're done once we hit the flexible array member.
+    if (field->getType()->isIncompleteArrayType())
+      break;
+
+    // Always skip anonymous bitfields.
+    if (field->isUnnamedBitField())
+      continue;
+
+    // We're done if we reach the end of the explicit initializers, we
+    // have a zeroed object, and the rest of the fields are
+    // zero-initializable.
+    if (curInitIndex == numInitElements && dest.isZeroed() &&
+        cgf.getTypes().isZeroInitializable(e->getType()))
+      break;
+    LValue lv =
+        cgf.emitLValueForFieldInitialization(destLV, field, field->getName());
+    // We never generate write-barriers for initialized fields.
+    assert(!cir::MissingFeatures::setNonGC());
+
+    if (curInitIndex < numInitElements) {
+      // Store the initializer into the field.
+      CIRGenFunction::SourceLocRAIIObject loc{
+          cgf, cgf.getLoc(record->getSourceRange())};
+      emitInitializationToLValue(args[curInitIndex++], lv);
+    } else {
+      // We're out of initializers; default-initialize to null
+      emitNullInitializationToLValue(cgf.getLoc(e->getSourceRange()), lv);
+    }
+
+    // Push a destructor if necessary.
+    // FIXME: if we have an array of structures, all explicitly
+    // initialized, we can end up pushing a linear number of cleanups.
+    if (field->getType().isDestructedType()) {
+      cgf.cgm.errorNYI(e->getSourceRange(),
+                       "visitCXXParenListOrInitListExpr destructor");
+      return;
+    }
+
+    // From classic codegen, maybe not useful for CIR:
+    // If the GEP didn't get used because of a dead zero init or something
+    // else, clean it up for -O0 builds and general tidiness.
+  }
 }
 
 // TODO(cir): This could be shared with classic codegen.
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp b/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp
index 6756a7c..ea60a95 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp
@@ -34,11 +34,20 @@ public:
   }
 
   mlir::Value emitLoadOfLValue(LValue lv, SourceLocation loc);
+
   /// Store the specified real/imag parts into the
   /// specified value pointer.
   void emitStoreOfComplex(mlir::Location loc, mlir::Value val, LValue lv,
                           bool isInit);
 
+  /// Emit a cast from complex value Val to DestType.
+  mlir::Value emitComplexToComplexCast(mlir::Value value, QualType srcType,
+                                       QualType destType, SourceLocation loc);
+
+  /// Emit a cast from scalar value Val to DestType.
+  mlir::Value emitScalarToComplexCast(mlir::Value value, QualType srcType,
+                                      QualType destType, SourceLocation loc);
+
   mlir::Value
   VisitAbstractConditionalOperator(const AbstractConditionalOperator *e);
   mlir::Value VisitArraySubscriptExpr(Expr *e);
@@ -51,7 +60,7 @@ public:
   mlir::Value VisitDeclRefExpr(DeclRefExpr *e);
   mlir::Value VisitGenericSelectionExpr(GenericSelectionExpr *e);
   mlir::Value VisitImplicitCastExpr(ImplicitCastExpr *e);
-  mlir::Value VisitInitListExpr(const InitListExpr *e);
+  mlir::Value VisitInitListExpr(InitListExpr *e);
 
   mlir::Value VisitCompoundLiteralExpr(CompoundLiteralExpr *e) {
     return emitLoadOfLValue(e);
@@ -82,6 +91,14 @@ public:
   }
 
   mlir::Value VisitUnaryDeref(const Expr *e);
+
+  mlir::Value VisitUnaryPlus(const UnaryOperator *e);
+
+  mlir::Value VisitPlusMinus(const UnaryOperator *e, cir::UnaryOpKind kind,
+                             QualType promotionType);
+
+  mlir::Value VisitUnaryMinus(const UnaryOperator *e);
+
   mlir::Value VisitUnaryNot(const UnaryOperator *e);
 
   struct BinOpInfo {
@@ -101,6 +118,7 @@ public:
 
   mlir::Value emitBinAdd(const BinOpInfo &op);
   mlir::Value emitBinSub(const BinOpInfo &op);
+  mlir::Value emitBinMul(const BinOpInfo &op);
 
   QualType getPromotionType(QualType ty, bool isDivOpCode = false) {
     if (auto *complexTy = ty->getAs<ComplexType>()) {
@@ -133,6 +151,7 @@ public:
 
   HANDLEBINOP(Add)
   HANDLEBINOP(Sub)
+  HANDLEBINOP(Mul)
 #undef HANDLEBINOP
 };
 } // namespace
@@ -164,14 +183,148 @@ LValue ComplexExprEmitter::emitBinAssignLValue(const BinaryOperator *e,
 mlir::Value ComplexExprEmitter::emitCast(CastKind ck, Expr *op,
                                          QualType destTy) {
   switch (ck) {
+  case CK_Dependent:
+    llvm_unreachable("dependent type must be resolved before the CIR codegen");
+
   case CK_NoOp:
   case CK_LValueToRValue:
     return Visit(op);
-  default:
-    break;
+
+  case CK_AtomicToNonAtomic:
+  case CK_NonAtomicToAtomic:
+  case CK_UserDefinedConversion: {
+    cgf.cgm.errorNYI(
+        "ComplexExprEmitter::emitCast Atmoic & UserDefinedConversion");
+    return {};
   }
-  cgf.cgm.errorNYI("ComplexType Cast");
-  return {};
+
+  case CK_LValueBitCast: {
+    LValue origLV = cgf.emitLValue(op);
+    Address addr =
+        origLV.getAddress().withElementType(builder, cgf.convertType(destTy));
+    LValue destLV = cgf.makeAddrLValue(addr, destTy);
+    return emitLoadOfLValue(destLV, op->getExprLoc());
+  }
+
+  case CK_LValueToRValueBitCast: {
+    LValue sourceLVal = cgf.emitLValue(op);
+    Address addr = sourceLVal.getAddress().withElementType(
+        builder, cgf.convertTypeForMem(destTy));
+    LValue destLV = cgf.makeAddrLValue(addr, destTy);
+    assert(!cir::MissingFeatures::opTBAA());
+    return emitLoadOfLValue(destLV, op->getExprLoc());
+  }
+
+  case CK_BitCast:
+  case CK_BaseToDerived:
+  case CK_DerivedToBase:
+  case CK_UncheckedDerivedToBase:
+  case CK_Dynamic:
+  case CK_ToUnion:
+  case CK_ArrayToPointerDecay:
+  case CK_FunctionToPointerDecay:
+  case CK_NullToPointer:
+  case CK_NullToMemberPointer:
+  case CK_BaseToDerivedMemberPointer:
+  case CK_DerivedToBaseMemberPointer:
+  case CK_MemberPointerToBoolean:
+  case CK_ReinterpretMemberPointer:
+  case CK_ConstructorConversion:
+  case CK_IntegralToPointer:
+  case CK_PointerToIntegral:
+  case CK_PointerToBoolean:
+  case CK_ToVoid:
+  case CK_VectorSplat:
+  case CK_IntegralCast:
+  case CK_BooleanToSignedIntegral:
+  case CK_IntegralToBoolean:
+  case CK_IntegralToFloating:
+  case CK_FloatingToIntegral:
+  case CK_FloatingToBoolean:
+  case CK_FloatingCast:
+  case CK_CPointerToObjCPointerCast:
+  case CK_BlockPointerToObjCPointerCast:
+  case CK_AnyPointerToBlockPointerCast:
+  case CK_ObjCObjectLValueCast:
+  case CK_FloatingComplexToReal:
+  case CK_FloatingComplexToBoolean:
+  case CK_IntegralComplexToReal:
+  case CK_IntegralComplexToBoolean:
+  case CK_ARCProduceObject:
+  case CK_ARCConsumeObject:
+  case CK_ARCReclaimReturnedObject:
+  case CK_ARCExtendBlockObject:
+  case CK_CopyAndAutoreleaseBlockObject:
+  case CK_BuiltinFnToFnPtr:
+  case CK_ZeroToOCLOpaqueType:
+  case CK_AddressSpaceConversion:
+  case CK_IntToOCLSampler:
+  case CK_FloatingToFixedPoint:
+  case CK_FixedPointToFloating:
+  case CK_FixedPointCast:
+  case CK_FixedPointToBoolean:
+  case CK_FixedPointToIntegral:
+  case CK_IntegralToFixedPoint:
+  case CK_MatrixCast:
+  case CK_HLSLVectorTruncation:
+  case CK_HLSLArrayRValue:
+  case CK_HLSLElementwiseCast:
+  case CK_HLSLAggregateSplatCast:
+    llvm_unreachable("invalid cast kind for complex value");
+
+  case CK_FloatingRealToComplex:
+  case CK_IntegralRealToComplex: {
+    assert(!cir::MissingFeatures::cgFPOptionsRAII());
+    return emitScalarToComplexCast(cgf.emitScalarExpr(op), op->getType(),
+                                   destTy, op->getExprLoc());
+  }
+
+  case CK_FloatingComplexCast:
+  case CK_FloatingComplexToIntegralComplex:
+  case CK_IntegralComplexCast:
+  case CK_IntegralComplexToFloatingComplex: {
+    assert(!cir::MissingFeatures::cgFPOptionsRAII());
+    return emitComplexToComplexCast(Visit(op), op->getType(), destTy,
+                                    op->getExprLoc());
+  }
+  }
+
+  llvm_unreachable("unknown cast resulting in complex value");
+}
+
+mlir::Value ComplexExprEmitter::VisitUnaryPlus(const UnaryOperator *e) {
+  QualType promotionTy = getPromotionType(e->getSubExpr()->getType());
+  mlir::Value result = VisitPlusMinus(e, cir::UnaryOpKind::Plus, promotionTy);
+  if (!promotionTy.isNull()) {
+    cgf.cgm.errorNYI("ComplexExprEmitter::VisitUnaryPlus emitUnPromotedValue");
+    return {};
+  }
+  return result;
+}
+
+mlir::Value ComplexExprEmitter::VisitPlusMinus(const UnaryOperator *e,
+                                               cir::UnaryOpKind kind,
+                                               QualType promotionType) {
+  assert(kind == cir::UnaryOpKind::Plus ||
+         kind == cir::UnaryOpKind::Minus &&
+             "Invalid UnaryOp kind for ComplexType Plus or Minus");
+
+  mlir::Value op;
+  if (!promotionType.isNull())
+    op = cgf.emitPromotedComplexExpr(e->getSubExpr(), promotionType);
+  else
+    op = Visit(e->getSubExpr());
+  return builder.createUnaryOp(cgf.getLoc(e->getExprLoc()), kind, op);
+}
+
+mlir::Value ComplexExprEmitter::VisitUnaryMinus(const UnaryOperator *e) {
+  QualType promotionTy = getPromotionType(e->getSubExpr()->getType());
+  mlir::Value result = VisitPlusMinus(e, cir::UnaryOpKind::Minus, promotionTy);
+  if (!promotionTy.isNull()) {
+    cgf.cgm.errorNYI("ComplexExprEmitter::VisitUnaryMinus emitUnPromotedValue");
+    return {};
+  }
+  return result;
 }
 
 mlir::Value ComplexExprEmitter::emitConstant(
@@ -207,6 +360,49 @@ void ComplexExprEmitter::emitStoreOfComplex(mlir::Location loc, mlir::Value val,
   builder.createStore(loc, val, destAddr);
 }
 
+mlir::Value ComplexExprEmitter::emitComplexToComplexCast(mlir::Value val,
+                                                         QualType srcType,
+                                                         QualType destType,
+                                                         SourceLocation loc) {
+  if (srcType == destType)
+    return val;
+
+  // Get the src/dest element type.
+  QualType srcElemTy = srcType->castAs<ComplexType>()->getElementType();
+  QualType destElemTy = destType->castAs<ComplexType>()->getElementType();
+
+  cir::CastKind castOpKind;
+  if (srcElemTy->isFloatingType() && destElemTy->isFloatingType())
+    castOpKind = cir::CastKind::float_complex;
+  else if (srcElemTy->isFloatingType() && destElemTy->isIntegerType())
+    castOpKind = cir::CastKind::float_complex_to_int_complex;
+  else if (srcElemTy->isIntegerType() && destElemTy->isFloatingType())
+    castOpKind = cir::CastKind::int_complex_to_float_complex;
+  else if (srcElemTy->isIntegerType() && destElemTy->isIntegerType())
+    castOpKind = cir::CastKind::int_complex;
+  else
+    llvm_unreachable("unexpected src type or dest type");
+
+  return builder.createCast(cgf.getLoc(loc), castOpKind, val,
+                            cgf.convertType(destType));
+}
+
+mlir::Value ComplexExprEmitter::emitScalarToComplexCast(mlir::Value val,
+                                                        QualType srcType,
+                                                        QualType destType,
+                                                        SourceLocation loc) {
+  cir::CastKind castOpKind;
+  if (srcType->isFloatingType())
+    castOpKind = cir::CastKind::float_to_complex;
+  else if (srcType->isIntegerType())
+    castOpKind = cir::CastKind::int_to_complex;
+  else
+    llvm_unreachable("unexpected src type");
+
+  return builder.createCast(cgf.getLoc(loc), castOpKind, val,
+                            cgf.convertType(destType));
+}
+
 mlir::Value ComplexExprEmitter::VisitAbstractConditionalOperator(
     const AbstractConditionalOperator *e) {
   mlir::Value condValue = Visit(e->getCond());
@@ -304,7 +500,7 @@ mlir::Value ComplexExprEmitter::VisitImplicitCastExpr(ImplicitCastExpr *e) {
   return emitCast(e->getCastKind(), e->getSubExpr(), e->getType());
 }
 
-mlir::Value ComplexExprEmitter::VisitInitListExpr(const InitListExpr *e) {
+mlir::Value ComplexExprEmitter::VisitInitListExpr(InitListExpr *e) {
   mlir::Location loc = cgf.getLoc(e->getExprLoc());
   if (e->getNumInits() == 2) {
     mlir::Value real = cgf.emitScalarExpr(e->getInit(0));
@@ -312,10 +508,8 @@ mlir::Value ComplexExprEmitter::VisitInitListExpr(const InitListExpr *e) {
     return builder.createComplexCreate(loc, real, imag);
   }
 
-  if (e->getNumInits() == 1) {
-    cgf.cgm.errorNYI("Create Complex with InitList with size 1");
-    return {};
-  }
+  if (e->getNumInits() == 1)
+    return Visit(e->getInit(0));
 
   assert(e->getNumInits() == 0 && "Unexpected number of inits");
   mlir::Type complexTy = cgf.convertType(e->getType());
@@ -385,13 +579,22 @@ mlir::Value ComplexExprEmitter::emitPromoted(const Expr *e,
     return emitBin##OP(emitBinOps(bo, promotionTy));
       HANDLE_BINOP(Add)
       HANDLE_BINOP(Sub)
+      HANDLE_BINOP(Mul)
 #undef HANDLE_BINOP
     default:
       break;
     }
-  } else if (isa<UnaryOperator>(e)) {
-    cgf.cgm.errorNYI("emitPromoted UnaryOperator");
-    return {};
+  } else if (const auto *unaryOp = dyn_cast<UnaryOperator>(e)) {
+    switch (unaryOp->getOpcode()) {
+    case UO_Minus:
+    case UO_Plus: {
+      auto kind = unaryOp->getOpcode() == UO_Plus ? cir::UnaryOpKind::Plus
+                                                  : cir::UnaryOpKind::Minus;
+      return VisitPlusMinus(unaryOp, kind, promotionTy);
+    }
+    default:
+      break;
+    }
   }
 
   mlir::Value result = Visit(const_cast<Expr *>(e));
@@ -436,6 +639,31 @@ mlir::Value ComplexExprEmitter::emitBinSub(const BinOpInfo &op) {
   return builder.create<cir::ComplexSubOp>(op.loc, op.lhs, op.rhs);
 }
 
+static cir::ComplexRangeKind
+getComplexRangeAttr(LangOptions::ComplexRangeKind range) {
+  switch (range) {
+  case LangOptions::CX_Full:
+    return cir::ComplexRangeKind::Full;
+  case LangOptions::CX_Improved:
+    return cir::ComplexRangeKind::Improved;
+  case LangOptions::CX_Promoted:
+    return cir::ComplexRangeKind::Promoted;
+  case LangOptions::CX_Basic:
+    return cir::ComplexRangeKind::Basic;
+  case LangOptions::CX_None:
+    // The default value for ComplexRangeKind is Full is no option is selected
+    return cir::ComplexRangeKind::Full;
+  }
+}
+
+mlir::Value ComplexExprEmitter::emitBinMul(const BinOpInfo &op) {
+  assert(!cir::MissingFeatures::fastMathFlags());
+  assert(!cir::MissingFeatures::cgFPOptionsRAII());
+  cir::ComplexRangeKind rangeKind =
+      getComplexRangeAttr(op.fpFeatures.getComplexRange());
+  return builder.create<cir::ComplexMulOp>(op.loc, op.lhs, op.rhs, rangeKind);
+}
+
 LValue CIRGenFunction::emitComplexAssignmentLValue(const BinaryOperator *e) {
   assert(e->getOpcode() == BO_Assign && "Expected assign op");
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
index eba6bff..2523b0f 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
@@ -88,6 +88,10 @@ public:
   //                               Utilities
   //===--------------------------------------------------------------------===//
 
+  mlir::Value emitComplexToScalarConversion(mlir::Location loc,
+                                            mlir::Value value, CastKind kind,
+                                            QualType destTy);
+
   mlir::Value emitPromotedValue(mlir::Value result, QualType promotionType) {
     return builder.createFloatingCast(result, cgf.convertType(promotionType));
   }
@@ -1125,7 +1129,7 @@ LValue ScalarExprEmitter::emitCompoundAssignLValue(
   // 'An assignment expression has the value of the left operand after the
   // assignment...'.
   if (lhsLV.isBitField())
-    cgf.cgm.errorNYI(e->getSourceRange(), "store through bitfield lvalue");
+    cgf.emitStoreThroughBitfieldLValue(RValue::get(result), lhsLV);
   else
     cgf.emitStoreThroughLValue(RValue::get(result), lhsLV);
 
@@ -1135,6 +1139,31 @@ LValue ScalarExprEmitter::emitCompoundAssignLValue(
   return lhsLV;
 }
 
+mlir::Value ScalarExprEmitter::emitComplexToScalarConversion(mlir::Location lov,
+                                                             mlir::Value value,
+                                                             CastKind kind,
+                                                             QualType destTy) {
+  cir::CastKind castOpKind;
+  switch (kind) {
+  case CK_FloatingComplexToReal:
+    castOpKind = cir::CastKind::float_complex_to_real;
+    break;
+  case CK_IntegralComplexToReal:
+    castOpKind = cir::CastKind::int_complex_to_real;
+    break;
+  case CK_FloatingComplexToBoolean:
+    castOpKind = cir::CastKind::float_complex_to_bool;
+    break;
+  case CK_IntegralComplexToBoolean:
+    castOpKind = cir::CastKind::int_complex_to_bool;
+    break;
+  default:
+    llvm_unreachable("invalid complex-to-scalar cast kind");
+  }
+
+  return builder.createCast(lov, castOpKind, value, cgf.convertType(destTy));
+}
+
 mlir::Value ScalarExprEmitter::emitPromoted(const Expr *e,
                                             QualType promotionType) {
   e = e->IgnoreParens();
@@ -1758,6 +1787,15 @@ mlir::Value ScalarExprEmitter::VisitCastExpr(CastExpr *ce) {
                                 ce->getExprLoc(), opts);
   }
 
+  case CK_FloatingComplexToReal:
+  case CK_IntegralComplexToReal:
+  case CK_FloatingComplexToBoolean:
+  case CK_IntegralComplexToBoolean: {
+    mlir::Value value = cgf.emitComplexExpr(subExpr);
+    return emitComplexToScalarConversion(cgf.getLoc(ce->getExprLoc()), value,
+                                         kind, destTy);
+  }
+
   case CK_FloatingRealToComplex:
   case CK_FloatingComplexCast:
   case CK_IntegralRealToComplex:
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
index 7e1a44c..3e9de17 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
@@ -26,7 +26,11 @@ namespace clang::CIRGen {
 
 CIRGenFunction::CIRGenFunction(CIRGenModule &cgm, CIRGenBuilderTy &builder,
                                bool suppressNewContext)
-    : CIRGenTypeCache(cgm), cgm{cgm}, builder(builder) {}
+    : CIRGenTypeCache(cgm), cgm{cgm}, builder(builder) {
+  ehStack.setCGF(this);
+  currentCleanupStackDepth = 0;
+  assert(ehStack.getStackDepth() == 0);
+}
 
 CIRGenFunction::~CIRGenFunction() {}
 
@@ -227,6 +231,14 @@ void CIRGenFunction::LexicalScope::cleanup() {
   CIRGenBuilderTy &builder = cgf.builder;
   LexicalScope *localScope = cgf.curLexScope;
 
+  auto applyCleanup = [&]() {
+    if (performCleanup) {
+      // ApplyDebugLocation
+      assert(!cir::MissingFeatures::generateDebugInfo());
+      forceCleanup();
+    }
+  };
+
   if (returnBlock != nullptr) {
     // Write out the return block, which loads the value from `__retval` and
     // issues the `cir.return`.
@@ -235,32 +247,42 @@ void CIRGenFunction::LexicalScope::cleanup() {
     (void)emitReturn(*returnLoc);
   }
 
-  mlir::Block *curBlock = builder.getBlock();
-  if (isGlobalInit() && !curBlock)
-    return;
-  if (curBlock->mightHaveTerminator() && curBlock->getTerminator())
-    return;
-
-  // Get rid of any empty block at the end of the scope.
-  bool entryBlock = builder.getInsertionBlock()->isEntryBlock();
-  if (!entryBlock && curBlock->empty()) {
-    curBlock->erase();
-    if (returnBlock != nullptr && returnBlock->getUses().empty())
-      returnBlock->erase();
-    return;
-  }
-
-  // Reached the end of the scope.
-  {
+  auto insertCleanupAndLeave = [&](mlir::Block *insPt) {
     mlir::OpBuilder::InsertionGuard guard(builder);
-    builder.setInsertionPointToEnd(curBlock);
+    builder.setInsertionPointToEnd(insPt);
+
+    // If we still don't have a cleanup block, it means that `applyCleanup`
+    // below might be able to get us one.
+    mlir::Block *cleanupBlock = localScope->getCleanupBlock(builder);
+
+    // Leverage and defers to RunCleanupsScope's dtor and scope handling.
+    applyCleanup();
+
+    // If we now have one after `applyCleanup`, hook it up properly.
+    if (!cleanupBlock && localScope->getCleanupBlock(builder)) {
+      cleanupBlock = localScope->getCleanupBlock(builder);
+      builder.create<cir::BrOp>(insPt->back().getLoc(), cleanupBlock);
+      if (!cleanupBlock->mightHaveTerminator()) {
+        mlir::OpBuilder::InsertionGuard guard(builder);
+        builder.setInsertionPointToEnd(cleanupBlock);
+        builder.create<cir::YieldOp>(localScope->endLoc);
+      }
+    }
 
     if (localScope->depth == 0) {
       // Reached the end of the function.
       if (returnBlock != nullptr) {
-        if (returnBlock->getUses().empty())
+        if (returnBlock->getUses().empty()) {
           returnBlock->erase();
-        else {
+        } else {
+          // Thread return block via cleanup block.
+          if (cleanupBlock) {
+            for (mlir::BlockOperand &blockUse : returnBlock->getUses()) {
+              cir::BrOp brOp = mlir::cast<cir::BrOp>(blockUse.getOwner());
+              brOp.setSuccessor(cleanupBlock);
+            }
+          }
+
           builder.create<cir::BrOp>(*returnLoc, returnBlock);
           return;
         }
@@ -268,13 +290,50 @@ void CIRGenFunction::LexicalScope::cleanup() {
       emitImplicitReturn();
       return;
     }
-    // Reached the end of a non-function scope.  Some scopes, such as those
-    // used with the ?: operator, can return a value.
-    if (!localScope->isTernary() && !curBlock->mightHaveTerminator()) {
+
+    // End of any local scope != function
+    // Ternary ops have to deal with matching arms for yielding types
+    // and do return a value, it must do its own cir.yield insertion.
+    if (!localScope->isTernary() && !insPt->mightHaveTerminator()) {
       !retVal ? builder.create<cir::YieldOp>(localScope->endLoc)
               : builder.create<cir::YieldOp>(localScope->endLoc, retVal);
     }
+  };
+
+  // If a cleanup block has been created at some point, branch to it
+  // and set the insertion point to continue at the cleanup block.
+  // Terminators are then inserted either in the cleanup block or
+  // inline in this current block.
+  mlir::Block *cleanupBlock = localScope->getCleanupBlock(builder);
+  if (cleanupBlock)
+    insertCleanupAndLeave(cleanupBlock);
+
+  // Now deal with any pending block wrap up like implicit end of
+  // scope.
+
+  mlir::Block *curBlock = builder.getBlock();
+  if (isGlobalInit() && !curBlock)
+    return;
+  if (curBlock->mightHaveTerminator() && curBlock->getTerminator())
+    return;
+
+  // Get rid of any empty block at the end of the scope.
+  bool entryBlock = builder.getInsertionBlock()->isEntryBlock();
+  if (!entryBlock && curBlock->empty()) {
+    curBlock->erase();
+    if (returnBlock != nullptr && returnBlock->getUses().empty())
+      returnBlock->erase();
+    return;
   }
+
+  // If there's a cleanup block, branch to it, nothing else to do.
+  if (cleanupBlock) {
+    builder.create<cir::BrOp>(curBlock->back().getLoc(), cleanupBlock);
+    return;
+  }
+
+  // No pre-existent cleanup block, emit cleanup code and yield/return.
+  insertCleanupAndLeave(curBlock);
 }
 
 cir::ReturnOp CIRGenFunction::LexicalScope::emitReturn(mlir::Location loc) {
@@ -408,7 +467,19 @@ void CIRGenFunction::startFunction(GlobalDecl gd, QualType returnType,
   }
 }
 
-void CIRGenFunction::finishFunction(SourceLocation endLoc) {}
+void CIRGenFunction::finishFunction(SourceLocation endLoc) {
+  // Pop any cleanups that might have been associated with the
+  // parameters.  Do this in whatever block we're currently in; it's
+  // important to do this before we enter the return block or return
+  // edges will be *really* confused.
+  // TODO(cir): Use prologueCleanupDepth here.
+  bool hasCleanups = ehStack.getStackDepth() != currentCleanupStackDepth;
+  if (hasCleanups) {
+    assert(!cir::MissingFeatures::generateDebugInfo());
+    // FIXME(cir): should we clearInsertionPoint? breaks many testcases
+    popCleanupBlocks(currentCleanupStackDepth);
+  }
+}
 
 mlir::LogicalResult CIRGenFunction::emitFunctionBody(const clang::Stmt *body) {
   auto result = mlir::LogicalResult::success();
@@ -593,11 +664,12 @@ void CIRGenFunction::emitDestructorBody(FunctionArgList &args) {
 
     assert(!cir::MissingFeatures::dtorCleanups());
 
-    // TODO(cir): A complete destructor is supposed to call the base destructor.
-    // Since we have to emit both dtor kinds we just fall through for now and.
-    // As long as we don't support virtual bases this should be functionally
-    // equivalent.
-    assert(!cir::MissingFeatures::completeDtors());
+    if (!isTryBody) {
+      QualType thisTy = dtor->getFunctionObjectParameterType();
+      emitCXXDestructorCall(dtor, Dtor_Base, /*forVirtualBase=*/false,
+                            /*delegating=*/false, loadCXXThisAddress(), thisTy);
+      break;
+    }
 
     // Fallthrough: act like we're in the base variant.
     [[fallthrough]];
@@ -807,4 +879,175 @@ bool CIRGenFunction::shouldNullCheckClassCastValue(const CastExpr *ce) {
   return true;
 }
 
+/// Computes the length of an array in elements, as well as the base
+/// element type and a properly-typed first element pointer.
+mlir::Value
+CIRGenFunction::emitArrayLength(const clang::ArrayType *origArrayType,
+                                QualType &baseType, Address &addr) {
+  const clang::ArrayType *arrayType = origArrayType;
+
+  // If it's a VLA, we have to load the stored size.  Note that
+  // this is the size of the VLA in bytes, not its size in elements.
+  if (isa<VariableArrayType>(arrayType)) {
+    assert(cir::MissingFeatures::vlas());
+    cgm.errorNYI(*currSrcLoc, "VLAs");
+    return builder.getConstInt(*currSrcLoc, SizeTy, 0);
+  }
+
+  uint64_t countFromCLAs = 1;
+  QualType eltType;
+
+  auto cirArrayType = mlir::dyn_cast<cir::ArrayType>(addr.getElementType());
+
+  while (cirArrayType) {
+    assert(isa<ConstantArrayType>(arrayType));
+    countFromCLAs *= cirArrayType.getSize();
+    eltType = arrayType->getElementType();
+
+    cirArrayType =
+        mlir::dyn_cast<cir::ArrayType>(cirArrayType.getElementType());
+
+    arrayType = getContext().getAsArrayType(arrayType->getElementType());
+    assert((!cirArrayType || arrayType) &&
+           "CIR and Clang types are out-of-sync");
+  }
+
+  if (arrayType) {
+    // From this point onwards, the Clang array type has been emitted
+    // as some other type (probably a packed struct). Compute the array
+    // size, and just emit the 'begin' expression as a bitcast.
+    cgm.errorNYI(*currSrcLoc, "length for non-array underlying types");
+  }
+
+  baseType = eltType;
+  return builder.getConstInt(*currSrcLoc, SizeTy, countFromCLAs);
+}
+
+// TODO(cir): Most of this function can be shared between CIRGen
+// and traditional LLVM codegen
+void CIRGenFunction::emitVariablyModifiedType(QualType type) {
+  assert(type->isVariablyModifiedType() &&
+         "Must pass variably modified type to EmitVLASizes!");
+
+  // We're going to walk down into the type and look for VLA
+  // expressions.
+  do {
+    assert(type->isVariablyModifiedType());
+
+    const Type *ty = type.getTypePtr();
+    switch (ty->getTypeClass()) {
+    case Type::CountAttributed:
+    case Type::PackIndexing:
+    case Type::ArrayParameter:
+    case Type::HLSLAttributedResource:
+    case Type::HLSLInlineSpirv:
+    case Type::PredefinedSugar:
+      cgm.errorNYI("CIRGenFunction::emitVariablyModifiedType");
+      break;
+
+#define TYPE(Class, Base)
+#define ABSTRACT_TYPE(Class, Base)
+#define NON_CANONICAL_TYPE(Class, Base)
+#define DEPENDENT_TYPE(Class, Base) case Type::Class:
+#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base)
+#include "clang/AST/TypeNodes.inc"
+      llvm_unreachable(
+          "dependent type must be resolved before the CIR codegen");
+
+    // These types are never variably-modified.
+    case Type::Builtin:
+    case Type::Complex:
+    case Type::Vector:
+    case Type::ExtVector:
+    case Type::ConstantMatrix:
+    case Type::Record:
+    case Type::Enum:
+    case Type::Using:
+    case Type::TemplateSpecialization:
+    case Type::ObjCTypeParam:
+    case Type::ObjCObject:
+    case Type::ObjCInterface:
+    case Type::ObjCObjectPointer:
+    case Type::BitInt:
+      llvm_unreachable("type class is never variably-modified!");
+
+    case Type::Elaborated:
+      type = cast<clang::ElaboratedType>(ty)->getNamedType();
+      break;
+
+    case Type::Adjusted:
+      type = cast<clang::AdjustedType>(ty)->getAdjustedType();
+      break;
+
+    case Type::Decayed:
+      type = cast<clang::DecayedType>(ty)->getPointeeType();
+      break;
+
+    case Type::Pointer:
+      type = cast<clang::PointerType>(ty)->getPointeeType();
+      break;
+
+    case Type::BlockPointer:
+      type = cast<clang::BlockPointerType>(ty)->getPointeeType();
+      break;
+
+    case Type::LValueReference:
+    case Type::RValueReference:
+      type = cast<clang::ReferenceType>(ty)->getPointeeType();
+      break;
+
+    case Type::MemberPointer:
+      type = cast<clang::MemberPointerType>(ty)->getPointeeType();
+      break;
+
+    case Type::ConstantArray:
+    case Type::IncompleteArray:
+      // Losing element qualification here is fine.
+      type = cast<clang::ArrayType>(ty)->getElementType();
+      break;
+
+    case Type::VariableArray: {
+      cgm.errorNYI("CIRGenFunction::emitVariablyModifiedType VLA");
+      break;
+    }
+
+    case Type::FunctionProto:
+    case Type::FunctionNoProto:
+      type = cast<clang::FunctionType>(ty)->getReturnType();
+      break;
+
+    case Type::Paren:
+    case Type::TypeOf:
+    case Type::UnaryTransform:
+    case Type::Attributed:
+    case Type::BTFTagAttributed:
+    case Type::SubstTemplateTypeParm:
+    case Type::MacroQualified:
+      // Keep walking after single level desugaring.
+      type = type.getSingleStepDesugaredType(getContext());
+      break;
+
+    case Type::Typedef:
+    case Type::Decltype:
+    case Type::Auto:
+    case Type::DeducedTemplateSpecialization:
+      // Stop walking: nothing to do.
+      return;
+
+    case Type::TypeOfExpr:
+      // Stop walking: emit typeof expression.
+      emitIgnoredExpr(cast<clang::TypeOfExprType>(ty)->getUnderlyingExpr());
+      return;
+
+    case Type::Atomic:
+      type = cast<clang::AtomicType>(ty)->getValueType();
+      break;
+
+    case Type::Pipe:
+      type = cast<clang::PipeType>(ty)->getElementType();
+      break;
+    }
+  } while (type->isVariablyModifiedType());
+}
+
 } // namespace clang::CIRGen
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index 12484196..f9c8636 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -18,6 +18,7 @@
 #include "CIRGenModule.h"
 #include "CIRGenTypeCache.h"
 #include "CIRGenValue.h"
+#include "EHScopeStack.h"
 
 #include "Address.h"
 
@@ -61,6 +62,9 @@ public:
   /// The compiler-generated variable that holds the return value.
   std::optional<mlir::Value> fnRetAlloca;
 
+  /// Tracks function scope overall cleanup handling.
+  EHScopeStack ehStack;
+
   /// CXXThisDecl - When generating code for a C++ member function,
   /// this will hold the implicit 'this' declaration.
   ImplicitParamDecl *cxxabiThisDecl = nullptr;
@@ -595,14 +599,65 @@ public:
                      FunctionArgList args, clang::SourceLocation loc,
                      clang::SourceLocation startLoc);
 
+  /// Takes the old cleanup stack size and emits the cleanup blocks
+  /// that have been added.
+  void popCleanupBlocks(size_t oldCleanupStackDepth);
+  void popCleanupBlock();
+
+  /// Enters a new scope for capturing cleanups, all of which
+  /// will be executed once the scope is exited.
+  class RunCleanupsScope {
+    size_t cleanupStackDepth, oldCleanupStackDepth;
+
+  protected:
+    bool performCleanup;
+
+  private:
+    RunCleanupsScope(const RunCleanupsScope &) = delete;
+    void operator=(const RunCleanupsScope &) = delete;
+
+  protected:
+    CIRGenFunction &cgf;
+
+    /// Enter a new cleanup scope.
+    explicit RunCleanupsScope(CIRGenFunction &cgf)
+        : performCleanup(true), cgf(cgf) {
+      cleanupStackDepth = cgf.ehStack.getStackDepth();
+      oldCleanupStackDepth = cgf.currentCleanupStackDepth;
+      cgf.currentCleanupStackDepth = cleanupStackDepth;
+    }
+
+    /// Exit this cleanup scope, emitting any accumulated cleanups.
+    ~RunCleanupsScope() {
+      if (performCleanup)
+        forceCleanup();
+    }
+
+    /// Force the emission of cleanups now, instead of waiting
+    /// until this object is destroyed.
+    void forceCleanup() {
+      assert(performCleanup && "Already forced cleanup");
+      {
+        mlir::OpBuilder::InsertionGuard guard(cgf.getBuilder());
+        cgf.popCleanupBlocks(cleanupStackDepth);
+        performCleanup = false;
+        cgf.currentCleanupStackDepth = oldCleanupStackDepth;
+      }
+    }
+  };
+
+  // Cleanup stack depth of the RunCleanupsScope that was pushed most recently.
+  size_t currentCleanupStackDepth;
+
+public:
   /// Represents a scope, including function bodies, compound statements, and
   /// the substatements of if/while/do/for/switch/try statements.  This class
   /// handles any automatic cleanup, along with the return value.
-  struct LexicalScope {
+  struct LexicalScope : public RunCleanupsScope {
   private:
-    // TODO(CIR): This will live in the base class RunCleanupScope once that
-    // class is upstreamed.
-    CIRGenFunction &cgf;
+    // Block containing cleanup code for things initialized in this
+    // lexical context (scope).
+    mlir::Block *cleanupBlock = nullptr;
 
     // Points to the scope entry block. This is useful, for instance, for
     // helping to insert allocas before finalizing any recursive CodeGen from
@@ -632,8 +687,8 @@ public:
     unsigned depth = 0;
 
     LexicalScope(CIRGenFunction &cgf, mlir::Location loc, mlir::Block *eb)
-        : cgf(cgf), entryBlock(eb), parentScope(cgf.curLexScope), beginLoc(loc),
-          endLoc(loc) {
+        : RunCleanupsScope(cgf), entryBlock(eb), parentScope(cgf.curLexScope),
+          beginLoc(loc), endLoc(loc) {
 
       assert(entryBlock && "LexicalScope requires an entry block");
       cgf.curLexScope = this;
@@ -671,6 +726,27 @@ public:
     void setAsSwitch() { scopeKind = Kind::Switch; }
     void setAsTernary() { scopeKind = Kind::Ternary; }
 
+    // Lazy create cleanup block or return what's available.
+    mlir::Block *getOrCreateCleanupBlock(mlir::OpBuilder &builder) {
+      if (cleanupBlock)
+        return cleanupBlock;
+      cleanupBlock = createCleanupBlock(builder);
+      return cleanupBlock;
+    }
+
+    mlir::Block *getCleanupBlock(mlir::OpBuilder &builder) {
+      return cleanupBlock;
+    }
+
+    mlir::Block *createCleanupBlock(mlir::OpBuilder &builder) {
+      // Create the cleanup block but dont hook it up around just yet.
+      mlir::OpBuilder::InsertionGuard guard(builder);
+      mlir::Region *r = builder.getBlock() ? builder.getBlock()->getParent()
+                                           : &cgf.curFn->getRegion(0);
+      cleanupBlock = builder.createBlock(r);
+      return cleanupBlock;
+    }
+
     // ---
     // Return handling.
     // ---
@@ -721,6 +797,12 @@ public:
 
   LexicalScope *curLexScope = nullptr;
 
+  typedef void Destroyer(CIRGenFunction &cgf, Address addr, QualType ty);
+
+  static Destroyer destroyCXXObject;
+
+  Destroyer *getDestroyer(clang::QualType::DestructionKind kind);
+
   /// ----------------------
   /// CIR emit functions
   /// ----------------------
@@ -766,6 +848,12 @@ public:
   /// even if no aggregate location is provided.
   RValue emitAnyExprToTemp(const clang::Expr *e);
 
+  void emitArrayDestroy(mlir::Value begin, mlir::Value end,
+                        QualType elementType, CharUnits elementAlign,
+                        Destroyer *destroyer);
+
+  mlir::Value emitArrayLength(const clang::ArrayType *arrayType,
+                              QualType &baseType, Address &addr);
   LValue emitArraySubscriptExpr(const clang::ArraySubscriptExpr *e);
 
   Address emitArrayToPointerDecay(const Expr *array);
@@ -779,6 +867,10 @@ public:
 
   void emitAutoVarCleanups(const AutoVarEmission &emission);
   void emitAutoVarInit(const AutoVarEmission &emission);
+  void emitAutoVarTypeCleanup(const AutoVarEmission &emission,
+                              clang::QualType::DestructionKind dtorKind);
+
+  void maybeEmitDeferredVarDeclInit(const VarDecl *vd);
 
   void emitBaseInitializer(mlir::Location loc, const CXXRecordDecl *classDecl,
                            CXXCtorInitializer *baseInit);
@@ -836,6 +928,9 @@ public:
   LValue emitCompoundLiteralLValue(const CompoundLiteralExpr *e);
 
   void emitConstructorBody(FunctionArgList &args);
+
+  void emitDestroy(Address addr, QualType type, Destroyer *destroyer);
+
   void emitDestructorBody(FunctionArgList &args);
 
   mlir::LogicalResult emitContinueStmt(const clang::ContinueStmt &s);
@@ -843,6 +938,16 @@ public:
   void emitCXXConstructExpr(const clang::CXXConstructExpr *e,
                             AggValueSlot dest);
 
+  void emitCXXAggrConstructorCall(const CXXConstructorDecl *ctor,
+                                  const clang::ArrayType *arrayType,
+                                  Address arrayBegin, const CXXConstructExpr *e,
+                                  bool newPointerIsChecked,
+                                  bool zeroInitialize = false);
+  void emitCXXAggrConstructorCall(const CXXConstructorDecl *ctor,
+                                  mlir::Value numElements, Address arrayBase,
+                                  const CXXConstructExpr *e,
+                                  bool newPointerIsChecked,
+                                  bool zeroInitialize);
   void emitCXXConstructorCall(const clang::CXXConstructorDecl *d,
                               clang::CXXCtorType type, bool forVirtualBase,
                               bool delegating, AggValueSlot thisAVS,
@@ -853,6 +958,15 @@ public:
                               bool delegating, Address thisAddr,
                               CallArgList &args, clang::SourceLocation loc);
 
+  void emitCXXDestructorCall(const CXXDestructorDecl *dd, CXXDtorType type,
+                             bool forVirtualBase, bool delegating,
+                             Address thisAddr, QualType thisTy);
+
+  RValue emitCXXDestructorCall(GlobalDecl dtor, const CIRGenCallee &callee,
+                               mlir::Value thisVal, QualType thisTy,
+                               mlir::Value implicitParam,
+                               QualType implicitParamTy, const CallExpr *e);
+
   mlir::LogicalResult emitCXXForRangeStmt(const CXXForRangeStmt &s,
                                           llvm::ArrayRef<const Attr *> attrs);
 
@@ -947,7 +1061,7 @@ public:
 
   void emitCompoundStmtWithoutScope(const clang::CompoundStmt &s);
 
-  void emitDecl(const clang::Decl &d);
+  void emitDecl(const clang::Decl &d, bool evaluateConditionDecl = false);
   mlir::LogicalResult emitDeclStmt(const clang::DeclStmt &s);
   LValue emitDeclRefLValue(const clang::DeclRefExpr *e);
 
@@ -1093,6 +1207,8 @@ public:
   /// inside a function, including static vars etc.
   void emitVarDecl(const clang::VarDecl &d);
 
+  void emitVariablyModifiedType(QualType ty);
+
   mlir::LogicalResult emitWhileStmt(const clang::WhileStmt &s);
 
   /// Given an assignment `*lhs = rhs`, emit a test that checks if \p rhs is
diff --git a/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp b/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp
index 1496d87..e5e4c68 100644
--- a/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp
@@ -46,6 +46,11 @@ public:
   void emitCXXDestructors(const clang::CXXDestructorDecl *d) override;
   void emitCXXStructor(clang::GlobalDecl gd) override;
 
+  void emitDestructorCall(CIRGenFunction &cgf, const CXXDestructorDecl *dd,
+                          CXXDtorType type, bool forVirtualBase,
+                          bool delegating, Address thisAddr,
+                          QualType thisTy) override;
+
   bool useThunkForDtorVariant(const CXXDestructorDecl *dtor,
                               CXXDtorType dt) const override {
     // Itanium does not emit any destructor variant as an inline thunk.
@@ -108,8 +113,6 @@ static StructorCIRGen getCIRGenToUse(CIRGenModule &cgm,
 
   GlobalDecl aliasDecl;
   if (const auto *dd = dyn_cast<CXXDestructorDecl>(md)) {
-    // The assignment is correct here, but other support for this is NYI.
-    cgm.errorNYI(md->getSourceRange(), "getCIRGenToUse: dtor");
     aliasDecl = GlobalDecl(dd, Dtor_Complete);
   } else {
     const auto *cd = cast<CXXConstructorDecl>(md);
@@ -240,6 +243,25 @@ bool CIRGenItaniumCXXABI::needsVTTParameter(GlobalDecl gd) {
   return false;
 }
 
+void CIRGenItaniumCXXABI::emitDestructorCall(
+    CIRGenFunction &cgf, const CXXDestructorDecl *dd, CXXDtorType type,
+    bool forVirtualBase, bool delegating, Address thisAddr, QualType thisTy) {
+  GlobalDecl gd(dd, type);
+  if (needsVTTParameter(gd)) {
+    cgm.errorNYI(dd->getSourceRange(), "emitDestructorCall: VTT");
+  }
+
+  mlir::Value vtt = nullptr;
+  ASTContext &astContext = cgm.getASTContext();
+  QualType vttTy = astContext.getPointerType(astContext.VoidPtrTy);
+  assert(!cir::MissingFeatures::appleKext());
+  CIRGenCallee callee =
+      CIRGenCallee::forDirect(cgm.getAddrOfCXXStructor(gd), gd);
+
+  cgf.emitCXXDestructorCall(gd, callee, thisAddr.getPointer(), thisTy, vtt,
+                            vttTy, nullptr);
+}
+
 CIRGenCXXABI *clang::CIRGen::CreateCIRGenItaniumCXXABI(CIRGenModule &cgm) {
   switch (cgm.getASTContext().getCXXABIKind()) {
   case TargetCXXABI::GenericItanium:
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index 3502705..d0f9fc3 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -1103,6 +1103,60 @@ cir::GlobalLinkageKind CIRGenModule::getCIRLinkageForDeclarator(
   return cir::GlobalLinkageKind::ExternalLinkage;
 }
 
+/// This function is called when we implement a function with no prototype, e.g.
+/// "int foo() {}". If there are existing call uses of the old function in the
+/// module, this adjusts them to call the new function directly.
+///
+/// This is not just a cleanup: the always_inline pass requires direct calls to
+/// functions to be able to inline them.  If there is a bitcast in the way, it
+/// won't inline them. Instcombine normally deletes these calls, but it isn't
+/// run at -O0.
+void CIRGenModule::replaceUsesOfNonProtoTypeWithRealFunction(
+    mlir::Operation *old, cir::FuncOp newFn) {
+  // If we're redefining a global as a function, don't transform it.
+  auto oldFn = mlir::dyn_cast<cir::FuncOp>(old);
+  if (!oldFn)
+    return;
+
+  // TODO(cir): this RAUW ignores the features below.
+  assert(!cir::MissingFeatures::opFuncExceptions());
+  assert(!cir::MissingFeatures::opFuncParameterAttributes());
+  assert(!cir::MissingFeatures::opFuncOperandBundles());
+  if (oldFn->getAttrs().size() <= 1)
+    errorNYI(old->getLoc(),
+             "replaceUsesOfNonProtoTypeWithRealFunction: Attribute forwarding");
+
+  // Mark new function as originated from a no-proto declaration.
+  newFn.setNoProto(oldFn.getNoProto());
+
+  // Iterate through all calls of the no-proto function.
+  std::optional<mlir::SymbolTable::UseRange> symUses =
+      oldFn.getSymbolUses(oldFn->getParentOp());
+  for (const mlir::SymbolTable::SymbolUse &use : symUses.value()) {
+    mlir::OpBuilder::InsertionGuard guard(builder);
+
+    if (auto noProtoCallOp = mlir::dyn_cast<cir::CallOp>(use.getUser())) {
+      builder.setInsertionPoint(noProtoCallOp);
+
+      // Patch call type with the real function type.
+      cir::CallOp realCallOp = builder.createCallOp(
+          noProtoCallOp.getLoc(), newFn, noProtoCallOp.getOperands());
+
+      // Replace old no proto call with fixed call.
+      noProtoCallOp.replaceAllUsesWith(realCallOp);
+      noProtoCallOp.erase();
+    } else if (auto getGlobalOp =
+                   mlir::dyn_cast<cir::GetGlobalOp>(use.getUser())) {
+      // Replace type
+      getGlobalOp.getAddr().setType(
+          cir::PointerType::get(newFn.getFunctionType()));
+    } else {
+      errorNYI(use.getUser()->getLoc(),
+               "replaceUsesOfNonProtoTypeWithRealFunction: unexpected use");
+    }
+  }
+}
+
 cir::GlobalLinkageKind
 CIRGenModule::getCIRLinkageVarDefinition(const VarDecl *vd, bool isConstant) {
   assert(!isConstant && "constant variables NYI");
@@ -1208,6 +1262,15 @@ cir::GlobalOp CIRGenModule::getGlobalForStringLiteral(const StringLiteral *s,
   return gv;
 }
 
+void CIRGenModule::emitExplicitCastExprType(const ExplicitCastExpr *e,
+                                            CIRGenFunction *cgf) {
+  if (cgf && e->getType()->isVariablyModifiedType())
+    cgf->emitVariablyModifiedType(e->getType());
+
+  assert(!cir::MissingFeatures::generateDebugInfo() &&
+         "emitExplicitCastExprType");
+}
+
 void CIRGenModule::emitDeclContext(const DeclContext *dc) {
   for (Decl *decl : dc->decls()) {
     // Unlike other DeclContexts, the contents of an ObjCImplDecl at TU scope
@@ -1235,6 +1298,7 @@ void CIRGenModule::emitTopLevelDecl(Decl *decl) {
              decl->getDeclKindName());
     break;
 
+  case Decl::CXXConversion:
   case Decl::CXXMethod:
   case Decl::Function: {
     auto *fd = cast<FunctionDecl>(decl);
@@ -1244,8 +1308,13 @@ void CIRGenModule::emitTopLevelDecl(Decl *decl) {
     break;
   }
 
-  case Decl::Var: {
+  case Decl::Var:
+  case Decl::Decomposition: {
     auto *vd = cast<VarDecl>(decl);
+    if (isa<DecompositionDecl>(decl)) {
+      errorNYI(decl->getSourceRange(), "global variable decompositions");
+      break;
+    }
     emitGlobal(vd);
     break;
   }
@@ -1267,8 +1336,14 @@ void CIRGenModule::emitTopLevelDecl(Decl *decl) {
     break;
 
   // No code generation needed.
-  case Decl::UsingShadow:
+  case Decl::ClassTemplate:
+  case Decl::Concept:
+  case Decl::CXXDeductionGuide:
   case Decl::Empty:
+  case Decl::FunctionTemplate:
+  case Decl::StaticAssert:
+  case Decl::TypeAliasTemplate:
+  case Decl::UsingShadow:
     break;
 
   case Decl::CXXConstructor:
@@ -1530,10 +1605,10 @@ static bool shouldAssumeDSOLocal(const CIRGenModule &cgm,
 
   const llvm::Triple &tt = cgm.getTriple();
   const CodeGenOptions &cgOpts = cgm.getCodeGenOpts();
-  if (tt.isWindowsGNUEnvironment()) {
-    // In MinGW, variables without DLLImport can still be automatically
-    // imported from a DLL by the linker; don't mark variables that
-    // potentially could come from another DLL as DSO local.
+  if (tt.isOSCygMing()) {
+    // In MinGW and Cygwin, variables without DLLImport can still be
+    // automatically imported from a DLL by the linker; don't mark variables
+    // that potentially could come from another DLL as DSO local.
 
     // With EmulatedTLS, TLS variables can be autoimported from other DLLs
     // (and this actually happens in the public interface of libstdc++), so
@@ -1692,8 +1767,7 @@ cir::FuncOp CIRGenModule::getOrCreateCIRFunction(
   // Lookup the entry, lazily creating it if necessary.
   mlir::Operation *entry = getGlobalValue(mangledName);
   if (entry) {
-    if (!isa<cir::FuncOp>(entry))
-      errorNYI(d->getSourceRange(), "getOrCreateCIRFunction: non-FuncOp");
+    assert(mlir::isa<cir::FuncOp>(entry));
 
     assert(!cir::MissingFeatures::weakRefReference());
 
@@ -1729,6 +1803,30 @@ cir::FuncOp CIRGenModule::getOrCreateCIRFunction(
       invalidLoc ? theModule->getLoc() : getLoc(funcDecl->getSourceRange()),
       mangledName, mlir::cast<cir::FuncType>(funcType), funcDecl);
 
+  // If we already created a function with the same mangled name (but different
+  // type) before, take its name and add it to the list of functions to be
+  // replaced with F at the end of CodeGen.
+  //
+  // This happens if there is a prototype for a function (e.g. "int f()") and
+  // then a definition of a different type (e.g. "int f(int x)").
+  if (entry) {
+
+    // Fetch a generic symbol-defining operation and its uses.
+    auto symbolOp = mlir::cast<mlir::SymbolOpInterface>(entry);
+
+    // This might be an implementation of a function without a prototype, in
+    // which case, try to do special replacement of calls which match the new
+    // prototype. The really key thing here is that we also potentially drop
+    // arguments from the call site so as to make a direct call, which makes the
+    // inliner happier and suppresses a number of optimizer warnings (!) about
+    // dropping arguments.
+    if (symbolOp.getSymbolUses(symbolOp->getParentOp()))
+      replaceUsesOfNonProtoTypeWithRealFunction(entry, funcOp);
+
+    // Obliterate no-proto declaration.
+    entry->erase();
+  }
+
   if (d)
     setFunctionAttributes(gd, funcOp, /*isIncompleteFunction=*/false, isThunk);
 
@@ -1805,7 +1903,9 @@ CIRGenModule::createCIRFunction(mlir::Location loc, StringRef name,
     func = builder.create<cir::FuncOp>(loc, name, funcType);
 
     assert(!cir::MissingFeatures::opFuncAstDeclAttr());
-    assert(!cir::MissingFeatures::opFuncNoProto());
+
+    if (funcDecl && !funcDecl->hasPrototype())
+      func.setNoProto(true);
 
     assert(func.isDeclaration() && "expected empty body");
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h
index 16922b1..5d07d38 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.h
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.h
@@ -252,6 +252,11 @@ public:
   getAddrOfGlobal(clang::GlobalDecl gd,
                   ForDefinition_t isForDefinition = NotForDefinition);
 
+  /// Emit type info if type of an expression is a variably modified
+  /// type. Also emit proper debug info for cast types.
+  void emitExplicitCastExprType(const ExplicitCastExpr *e,
+                                CIRGenFunction *cgf = nullptr);
+
   /// Emit code for a single global function or variable declaration. Forward
   /// declarations are emitted lazily.
   void emitGlobal(clang::GlobalDecl gd);
@@ -308,6 +313,9 @@ public:
 
   static void setInitializer(cir::GlobalOp &op, mlir::Attribute value);
 
+  void replaceUsesOfNonProtoTypeWithRealFunction(mlir::Operation *old,
+                                                 cir::FuncOp newFn);
+
   cir::FuncOp
   getOrCreateCIRFunction(llvm::StringRef mangledName, mlir::Type funcType,
                          clang::GlobalDecl gd, bool forVTable,
diff --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp b/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp
index 05e8848..0c8ff4bd 100644
--- a/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp
@@ -91,6 +91,9 @@ struct CIRRecordLowering final {
     return astContext.getTargetInfo().getABI().starts_with("aapcs");
   }
 
+  /// Helper function to check if the target machine is BigEndian.
+  bool isBigEndian() const { return astContext.getTargetInfo().isBigEndian(); }
+
   CharUnits bitsToCharUnits(uint64_t bitOffset) {
     return astContext.toCharUnitsFromBits(bitOffset);
   }
@@ -438,9 +441,7 @@ CIRRecordLowering::accumulateBitFields(RecordDecl::field_iterator field,
           } else if (cirGenTypes.getCGModule()
                          .getCodeGenOpts()
                          .FineGrainedBitfieldAccesses) {
-            assert(!cir::MissingFeatures::nonFineGrainedBitfields());
-            cirGenTypes.getCGModule().errorNYI(field->getSourceRange(),
-                                               "NYI FineGrainedBitfield");
+            installBest = true;
           } else {
             // Otherwise, we're not installing. Update the bit size
             // of the current span to go all the way to limitOffset, which is
@@ -773,7 +774,104 @@ void CIRRecordLowering::computeVolatileBitfields() {
       !cirGenTypes.getCGModule().getCodeGenOpts().AAPCSBitfieldWidth)
     return;
 
-  assert(!cir::MissingFeatures::armComputeVolatileBitfields());
+  for (auto &[field, info] : bitFields) {
+    mlir::Type resLTy = cirGenTypes.convertTypeForMem(field->getType());
+
+    if (astContext.toBits(astRecordLayout.getAlignment()) <
+        getSizeInBits(resLTy).getQuantity())
+      continue;
+
+    // CIRRecordLowering::setBitFieldInfo() pre-adjusts the bit-field offsets
+    // for big-endian targets, but it assumes a container of width
+    // info.storageSize. Since AAPCS uses a different container size (width
+    // of the type), we first undo that calculation here and redo it once
+    // the bit-field offset within the new container is calculated.
+    const unsigned oldOffset =
+        isBigEndian() ? info.storageSize - (info.offset + info.size)
+                      : info.offset;
+    // Offset to the bit-field from the beginning of the struct.
+    const unsigned absoluteOffset =
+        astContext.toBits(info.storageOffset) + oldOffset;
+
+    // Container size is the width of the bit-field type.
+    const unsigned storageSize = getSizeInBits(resLTy).getQuantity();
+    // Nothing to do if the access uses the desired
+    // container width and is naturally aligned.
+    if (info.storageSize == storageSize && (oldOffset % storageSize == 0))
+      continue;
+
+    // Offset within the container.
+    unsigned offset = absoluteOffset & (storageSize - 1);
+    // Bail out if an aligned load of the container cannot cover the entire
+    // bit-field. This can happen for example, if the bit-field is part of a
+    // packed struct. AAPCS does not define access rules for such cases, we let
+    // clang to follow its own rules.
+    if (offset + info.size > storageSize)
+      continue;
+
+    // Re-adjust offsets for big-endian targets.
+    if (isBigEndian())
+      offset = storageSize - (offset + info.size);
+
+    const CharUnits storageOffset =
+        astContext.toCharUnitsFromBits(absoluteOffset & ~(storageSize - 1));
+    const CharUnits end = storageOffset +
+                          astContext.toCharUnitsFromBits(storageSize) -
+                          CharUnits::One();
+
+    const ASTRecordLayout &layout =
+        astContext.getASTRecordLayout(field->getParent());
+    // If we access outside memory outside the record, than bail out.
+    const CharUnits recordSize = layout.getSize();
+    if (end >= recordSize)
+      continue;
+
+    // Bail out if performing this load would access non-bit-fields members.
+    bool conflict = false;
+    for (const auto *f : recordDecl->fields()) {
+      // Allow sized bit-fields overlaps.
+      if (f->isBitField() && !f->isZeroLengthBitField())
+        continue;
+
+      const CharUnits fOffset = astContext.toCharUnitsFromBits(
+          layout.getFieldOffset(f->getFieldIndex()));
+
+      // As C11 defines, a zero sized bit-field defines a barrier, so
+      // fields after and before it should be race condition free.
+      // The AAPCS acknowledges it and imposes no restritions when the
+      // natural container overlaps a zero-length bit-field.
+      if (f->isZeroLengthBitField()) {
+        if (end > fOffset && storageOffset < fOffset) {
+          conflict = true;
+          break;
+        }
+      }
+
+      const CharUnits fEnd =
+          fOffset +
+          astContext.toCharUnitsFromBits(astContext.toBits(
+              getSizeInBits(cirGenTypes.convertTypeForMem(f->getType())))) -
+          CharUnits::One();
+      // If no overlap, continue.
+      if (end < fOffset || fEnd < storageOffset)
+        continue;
+
+      // The desired load overlaps a non-bit-field member, bail out.
+      conflict = true;
+      break;
+    }
+
+    if (conflict)
+      continue;
+    // Write the new bit-field access parameters.
+    // As the storage offset now is defined as the number of elements from the
+    // start of the structure, we should divide the Offset by the element size.
+    info.volatileStorageOffset =
+        storageOffset /
+        astContext.toCharUnitsFromBits(storageSize).getQuantity();
+    info.volatileStorageSize = storageSize;
+    info.volatileOffset = offset;
+  }
 }
 
 void CIRRecordLowering::accumulateBases(const CXXRecordDecl *cxxRecordDecl) {
diff --git a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
index 9193f6f..75da229e 100644
--- a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
@@ -363,8 +363,8 @@ mlir::LogicalResult CIRGenFunction::emitIfStmt(const IfStmt &s) {
 mlir::LogicalResult CIRGenFunction::emitDeclStmt(const DeclStmt &s) {
   assert(builder.getInsertionBlock() && "expected valid insertion point");
 
-  for (const Decl *I : s.decls())
-    emitDecl(*I);
+  for (const Decl *i : s.decls())
+    emitDecl(*i, /*evaluateConditionDecl=*/true);
 
   return mlir::success();
 }
@@ -409,7 +409,10 @@ mlir::LogicalResult CIRGenFunction::emitReturnStmt(const ReturnStmt &s) {
   }
 
   auto *retBlock = curLexScope->getOrCreateRetBlock(*this, loc);
+  // This should emit a branch through the cleanup block if one exists.
   builder.create<cir::BrOp>(loc, retBlock);
+  if (ehStack.getStackDepth() != currentCleanupStackDepth)
+    cgm.errorNYI(s.getSourceRange(), "return with cleanup stack");
   builder.createBlock(builder.getBlock()->getParent());
 
   return mlir::success();
@@ -872,7 +875,7 @@ mlir::LogicalResult CIRGenFunction::emitSwitchStmt(const clang::SwitchStmt &s) {
         return mlir::failure();
 
     if (s.getConditionVariable())
-      emitDecl(*s.getConditionVariable());
+      emitDecl(*s.getConditionVariable(), /*evaluateConditionDecl=*/true);
 
     mlir::Value condV = emitScalarExpr(s.getCond());
 
diff --git a/clang/lib/CIR/CodeGen/CMakeLists.txt b/clang/lib/CIR/CodeGen/CMakeLists.txt
index 03ea60c..ca3a329 100644
--- a/clang/lib/CIR/CodeGen/CMakeLists.txt
+++ b/clang/lib/CIR/CodeGen/CMakeLists.txt
@@ -11,6 +11,7 @@ add_clang_library(clangCIR
   CIRGenBuilder.cpp
   CIRGenCall.cpp
   CIRGenClass.cpp
+  CIRGenCleanup.cpp
   CIRGenCXX.cpp
   CIRGenCXXABI.cpp
   CIRGenCXXExpr.cpp
diff --git a/clang/lib/CIR/CodeGen/EHScopeStack.h b/clang/lib/CIR/CodeGen/EHScopeStack.h
new file mode 100644
index 0000000..22750ac
--- /dev/null
+++ b/clang/lib/CIR/CodeGen/EHScopeStack.h
@@ -0,0 +1,99 @@
+//===-- EHScopeStack.h - Stack for cleanup CIR generation -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// These classes should be the minimum interface required for other parts of
+// CIR CodeGen to emit cleanups.  The implementation is in CIRGenCleanup.cpp and
+// other implemenentation details that are not widely needed are in
+// CIRGenCleanup.h.
+//
+// TODO(cir): this header should be shared between LLVM and CIR codegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANG_LIB_CIR_CODEGEN_EHSCOPESTACK_H
+#define CLANG_LIB_CIR_CODEGEN_EHSCOPESTACK_H
+
+#include "llvm/ADT/SmallVector.h"
+
+namespace clang::CIRGen {
+
+class CIRGenFunction;
+
+enum CleanupKind : unsigned {
+  /// Denotes a cleanup that should run when a scope is exited using exceptional
+  /// control flow (a throw statement leading to stack unwinding, ).
+  EHCleanup = 0x1,
+
+  /// Denotes a cleanup that should run when a scope is exited using normal
+  /// control flow (falling off the end of the scope, return, goto, ...).
+  NormalCleanup = 0x2,
+
+  NormalAndEHCleanup = EHCleanup | NormalCleanup,
+
+  LifetimeMarker = 0x8,
+  NormalEHLifetimeMarker = LifetimeMarker | NormalAndEHCleanup,
+};
+
+/// A stack of scopes which respond to exceptions, including cleanups
+/// and catch blocks.
+class EHScopeStack {
+public:
+  /// Information for lazily generating a cleanup.  Subclasses must be
+  /// POD-like: cleanups will not be destructed, and they will be
+  /// allocated on the cleanup stack and freely copied and moved
+  /// around.
+  ///
+  /// Cleanup implementations should generally be declared in an
+  /// anonymous namespace.
+  class Cleanup {
+    // Anchor the construction vtable.
+    virtual void anchor();
+
+  public:
+    Cleanup(const Cleanup &) = default;
+    Cleanup(Cleanup &&) {}
+    Cleanup() = default;
+
+    virtual ~Cleanup() = default;
+
+    /// Emit the cleanup.  For normal cleanups, this is run in the
+    /// same EH context as when the cleanup was pushed, i.e. the
+    /// immediately-enclosing context of the cleanup scope.  For
+    /// EH cleanups, this is run in a terminate context.
+    ///
+    // \param flags cleanup kind.
+    virtual void emit(CIRGenFunction &cgf) = 0;
+  };
+
+  // Classic codegen has a finely tuned custom allocator and a complex stack
+  // management scheme. We'll probably eventually want to find a way to share
+  // that implementation. For now, we will use a very simplified implementation
+  // to get cleanups working.
+  llvm::SmallVector<std::unique_ptr<Cleanup>, 8> cleanupStack;
+
+private:
+  /// The CGF this Stack belong to
+  CIRGenFunction *cgf = nullptr;
+
+public:
+  EHScopeStack() = default;
+  ~EHScopeStack() = default;
+
+  /// Push a lazily-created cleanup on the stack.
+  template <class T, class... As> void pushCleanup(CleanupKind kind, As... a) {
+    cleanupStack.push_back(std::make_unique<T>(a...));
+  }
+
+  void setCGF(CIRGenFunction *inCGF) { cgf = inCGF; }
+
+  size_t getStackDepth() const { return cleanupStack.size(); }
+};
+
+} // namespace clang::CIRGen
+
+#endif // CLANG_LIB_CIR_CODEGEN_EHSCOPESTACK_H
diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
index f0416b6..1c3a310 100644
--- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
+++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
@@ -17,6 +17,7 @@
 
 #include "mlir/Interfaces/ControlFlowInterfaces.h"
 #include "mlir/Interfaces/FunctionImplementation.h"
+#include "mlir/Support/LLVM.h"
 
 #include "clang/CIR/Dialect/IR/CIROpsDialect.cpp.inc"
 #include "clang/CIR/Dialect/IR/CIROpsEnums.cpp.inc"
@@ -338,7 +339,7 @@ static LogicalResult checkConstantTypes(mlir::Operation *op, mlir::Type opType,
   }
 
   if (mlir::isa<cir::ConstArrayAttr, cir::ConstVectorAttr,
-                cir::ConstComplexAttr>(attrType))
+                cir::ConstComplexAttr, cir::PoisonAttr>(attrType))
     return success();
 
   assert(isa<TypedAttr>(attrType) && "What else could we be looking at here?");
@@ -489,6 +490,104 @@ LogicalResult cir::CastOp::verify() {
       return emitOpError() << "requires two types differ in addrspace only";
     return success();
   }
+  case cir::CastKind::float_to_complex: {
+    if (!mlir::isa<cir::FPTypeInterface>(srcType))
+      return emitOpError() << "requires !cir.float type for source";
+    auto resComplexTy = mlir::dyn_cast<cir::ComplexType>(resType);
+    if (!resComplexTy)
+      return emitOpError() << "requires !cir.complex type for result";
+    if (srcType != resComplexTy.getElementType())
+      return emitOpError() << "requires source type match result element type";
+    return success();
+  }
+  case cir::CastKind::int_to_complex: {
+    if (!mlir::isa<cir::IntType>(srcType))
+      return emitOpError() << "requires !cir.int type for source";
+    auto resComplexTy = mlir::dyn_cast<cir::ComplexType>(resType);
+    if (!resComplexTy)
+      return emitOpError() << "requires !cir.complex type for result";
+    if (srcType != resComplexTy.getElementType())
+      return emitOpError() << "requires source type match result element type";
+    return success();
+  }
+  case cir::CastKind::float_complex_to_real: {
+    auto srcComplexTy = mlir::dyn_cast<cir::ComplexType>(srcType);
+    if (!srcComplexTy)
+      return emitOpError() << "requires !cir.complex type for source";
+    if (!mlir::isa<cir::FPTypeInterface>(resType))
+      return emitOpError() << "requires !cir.float type for result";
+    if (srcComplexTy.getElementType() != resType)
+      return emitOpError() << "requires source element type match result type";
+    return success();
+  }
+  case cir::CastKind::int_complex_to_real: {
+    auto srcComplexTy = mlir::dyn_cast<cir::ComplexType>(srcType);
+    if (!srcComplexTy)
+      return emitOpError() << "requires !cir.complex type for source";
+    if (!mlir::isa<cir::IntType>(resType))
+      return emitOpError() << "requires !cir.int type for result";
+    if (srcComplexTy.getElementType() != resType)
+      return emitOpError() << "requires source element type match result type";
+    return success();
+  }
+  case cir::CastKind::float_complex_to_bool: {
+    auto srcComplexTy = mlir::dyn_cast<cir::ComplexType>(srcType);
+    if (!srcComplexTy || !srcComplexTy.isFloatingPointComplex())
+      return emitOpError()
+             << "requires floating point !cir.complex type for source";
+    if (!mlir::isa<cir::BoolType>(resType))
+      return emitOpError() << "requires !cir.bool type for result";
+    return success();
+  }
+  case cir::CastKind::int_complex_to_bool: {
+    auto srcComplexTy = mlir::dyn_cast<cir::ComplexType>(srcType);
+    if (!srcComplexTy || !srcComplexTy.isIntegerComplex())
+      return emitOpError()
+             << "requires floating point !cir.complex type for source";
+    if (!mlir::isa<cir::BoolType>(resType))
+      return emitOpError() << "requires !cir.bool type for result";
+    return success();
+  }
+  case cir::CastKind::float_complex: {
+    auto srcComplexTy = mlir::dyn_cast<cir::ComplexType>(srcType);
+    if (!srcComplexTy || !srcComplexTy.isFloatingPointComplex())
+      return emitOpError()
+             << "requires floating point !cir.complex type for source";
+    auto resComplexTy = mlir::dyn_cast<cir::ComplexType>(resType);
+    if (!resComplexTy || !resComplexTy.isFloatingPointComplex())
+      return emitOpError()
+             << "requires floating point !cir.complex type for result";
+    return success();
+  }
+  case cir::CastKind::float_complex_to_int_complex: {
+    auto srcComplexTy = mlir::dyn_cast<cir::ComplexType>(srcType);
+    if (!srcComplexTy || !srcComplexTy.isFloatingPointComplex())
+      return emitOpError()
+             << "requires floating point !cir.complex type for source";
+    auto resComplexTy = mlir::dyn_cast<cir::ComplexType>(resType);
+    if (!resComplexTy || !resComplexTy.isIntegerComplex())
+      return emitOpError() << "requires integer !cir.complex type for result";
+    return success();
+  }
+  case cir::CastKind::int_complex: {
+    auto srcComplexTy = mlir::dyn_cast<cir::ComplexType>(srcType);
+    if (!srcComplexTy || !srcComplexTy.isIntegerComplex())
+      return emitOpError() << "requires integer !cir.complex type for source";
+    auto resComplexTy = mlir::dyn_cast<cir::ComplexType>(resType);
+    if (!resComplexTy || !resComplexTy.isIntegerComplex())
+      return emitOpError() << "requires integer !cir.complex type for result";
+    return success();
+  }
+  case cir::CastKind::int_complex_to_float_complex: {
+    auto srcComplexTy = mlir::dyn_cast<cir::ComplexType>(srcType);
+    if (!srcComplexTy || !srcComplexTy.isIntegerComplex())
+      return emitOpError() << "requires integer !cir.complex type for source";
+    auto resComplexTy = mlir::dyn_cast<cir::ComplexType>(resType);
+    if (!resComplexTy || !resComplexTy.isFloatingPointComplex())
+      return emitOpError()
+             << "requires floating point !cir.complex type for result";
+    return success();
+  }
   default:
     llvm_unreachable("Unknown CastOp kind?");
   }
@@ -530,6 +629,11 @@ static Value tryFoldCastChain(cir::CastOp op) {
 }
 
 OpFoldResult cir::CastOp::fold(FoldAdaptor adaptor) {
+  if (mlir::isa_and_present<cir::PoisonAttr>(adaptor.getSrc())) {
+    // Propagate poison value
+    return cir::PoisonAttr::get(getContext(), getType());
+  }
+
   if (getSrc().getType() == getType()) {
     switch (getKind()) {
     case cir::CastKind::integral: {
@@ -1366,10 +1470,14 @@ ParseResult cir::FuncOp::parse(OpAsmParser &parser, OperationState &state) {
   llvm::SMLoc loc = parser.getCurrentLocation();
   mlir::Builder &builder = parser.getBuilder();
 
+  mlir::StringAttr noProtoNameAttr = getNoProtoAttrName(state.name);
   mlir::StringAttr visNameAttr = getSymVisibilityAttrName(state.name);
   mlir::StringAttr visibilityNameAttr = getGlobalVisibilityAttrName(state.name);
   mlir::StringAttr dsoLocalNameAttr = getDsoLocalAttrName(state.name);
 
+  if (parser.parseOptionalKeyword(noProtoNameAttr).succeeded())
+    state.addAttribute(noProtoNameAttr, parser.getBuilder().getUnitAttr());
+
   // Default to external linkage if no keyword is provided.
   state.addAttribute(getLinkageAttrNameString(),
                      GlobalLinkageKindAttr::get(
@@ -1474,6 +1582,9 @@ mlir::Region *cir::FuncOp::getCallableRegion() {
 }
 
 void cir::FuncOp::print(OpAsmPrinter &p) {
+  if (getNoProto())
+    p << " no_proto";
+
   if (getComdat())
     p << " comdat";
 
@@ -1684,6 +1795,12 @@ static bool isBoolNot(cir::UnaryOp op) {
 //
 // and the argument of the first one (%0) will be used instead.
 OpFoldResult cir::UnaryOp::fold(FoldAdaptor adaptor) {
+  if (auto poison =
+          mlir::dyn_cast_if_present<cir::PoisonAttr>(adaptor.getInput())) {
+    // Propagate poison values
+    return poison;
+  }
+
   if (isBoolNot(*this))
     if (auto previous = dyn_cast_or_null<UnaryOp>(getInput().getDefiningOp()))
       if (isBoolNot(previous))
@@ -2133,6 +2250,143 @@ LogicalResult cir::ComplexImagPtrOp::verify() {
 }
 
 //===----------------------------------------------------------------------===//
+// Bit manipulation operations
+//===----------------------------------------------------------------------===//
+
+static OpFoldResult
+foldUnaryBitOp(mlir::Attribute inputAttr,
+               llvm::function_ref<llvm::APInt(const llvm::APInt &)> func,
+               bool poisonZero = false) {
+  if (mlir::isa_and_present<cir::PoisonAttr>(inputAttr)) {
+    // Propagate poison value
+    return inputAttr;
+  }
+
+  auto input = mlir::dyn_cast_if_present<IntAttr>(inputAttr);
+  if (!input)
+    return nullptr;
+
+  llvm::APInt inputValue = input.getValue();
+  if (poisonZero && inputValue.isZero())
+    return cir::PoisonAttr::get(input.getType());
+
+  llvm::APInt resultValue = func(inputValue);
+  return IntAttr::get(input.getType(), resultValue);
+}
+
+OpFoldResult BitClrsbOp::fold(FoldAdaptor adaptor) {
+  return foldUnaryBitOp(adaptor.getInput(), [](const llvm::APInt &inputValue) {
+    unsigned resultValue =
+        inputValue.getBitWidth() - inputValue.getSignificantBits();
+    return llvm::APInt(inputValue.getBitWidth(), resultValue);
+  });
+}
+
+OpFoldResult BitClzOp::fold(FoldAdaptor adaptor) {
+  return foldUnaryBitOp(
+      adaptor.getInput(),
+      [](const llvm::APInt &inputValue) {
+        unsigned resultValue = inputValue.countLeadingZeros();
+        return llvm::APInt(inputValue.getBitWidth(), resultValue);
+      },
+      getPoisonZero());
+}
+
+OpFoldResult BitCtzOp::fold(FoldAdaptor adaptor) {
+  return foldUnaryBitOp(
+      adaptor.getInput(),
+      [](const llvm::APInt &inputValue) {
+        return llvm::APInt(inputValue.getBitWidth(),
+                           inputValue.countTrailingZeros());
+      },
+      getPoisonZero());
+}
+
+OpFoldResult BitFfsOp::fold(FoldAdaptor adaptor) {
+  return foldUnaryBitOp(adaptor.getInput(), [](const llvm::APInt &inputValue) {
+    unsigned trailingZeros = inputValue.countTrailingZeros();
+    unsigned result =
+        trailingZeros == inputValue.getBitWidth() ? 0 : trailingZeros + 1;
+    return llvm::APInt(inputValue.getBitWidth(), result);
+  });
+}
+
+OpFoldResult BitParityOp::fold(FoldAdaptor adaptor) {
+  return foldUnaryBitOp(adaptor.getInput(), [](const llvm::APInt &inputValue) {
+    return llvm::APInt(inputValue.getBitWidth(), inputValue.popcount() % 2);
+  });
+}
+
+OpFoldResult BitPopcountOp::fold(FoldAdaptor adaptor) {
+  return foldUnaryBitOp(adaptor.getInput(), [](const llvm::APInt &inputValue) {
+    return llvm::APInt(inputValue.getBitWidth(), inputValue.popcount());
+  });
+}
+
+OpFoldResult BitReverseOp::fold(FoldAdaptor adaptor) {
+  return foldUnaryBitOp(adaptor.getInput(), [](const llvm::APInt &inputValue) {
+    return inputValue.reverseBits();
+  });
+}
+
+OpFoldResult ByteSwapOp::fold(FoldAdaptor adaptor) {
+  return foldUnaryBitOp(adaptor.getInput(), [](const llvm::APInt &inputValue) {
+    return inputValue.byteSwap();
+  });
+}
+
+OpFoldResult RotateOp::fold(FoldAdaptor adaptor) {
+  if (mlir::isa_and_present<cir::PoisonAttr>(adaptor.getInput()) ||
+      mlir::isa_and_present<cir::PoisonAttr>(adaptor.getAmount())) {
+    // Propagate poison values
+    return cir::PoisonAttr::get(getType());
+  }
+
+  auto input = mlir::dyn_cast_if_present<IntAttr>(adaptor.getInput());
+  auto amount = mlir::dyn_cast_if_present<IntAttr>(adaptor.getAmount());
+  if (!input && !amount)
+    return nullptr;
+
+  // We could fold cir.rotate even if one of its two operands is not a constant:
+  //   - `cir.rotate left/right %0, 0` could be folded into just %0 even if %0
+  //     is not a constant.
+  //   - `cir.rotate left/right 0/0b111...111, %0` could be folded into 0 or
+  //     0b111...111 even if %0 is not a constant.
+
+  llvm::APInt inputValue;
+  if (input) {
+    inputValue = input.getValue();
+    if (inputValue.isZero() || inputValue.isAllOnes()) {
+      // An input value of all 0s or all 1s will not change after rotation
+      return input;
+    }
+  }
+
+  uint64_t amountValue;
+  if (amount) {
+    amountValue = amount.getValue().urem(getInput().getType().getWidth());
+    if (amountValue == 0) {
+      // A shift amount of 0 will not change the input value
+      return getInput();
+    }
+  }
+
+  if (!input || !amount)
+    return nullptr;
+
+  assert(inputValue.getBitWidth() == getInput().getType().getWidth() &&
+         "input value must have the same bit width as the input type");
+
+  llvm::APInt resultValue;
+  if (isRotateLeft())
+    resultValue = inputValue.rotl(amountValue);
+  else
+    resultValue = inputValue.rotr(amountValue);
+
+  return IntAttr::get(input.getContext(), input.getType(), resultValue);
+}
+
+//===----------------------------------------------------------------------===//
 // TableGen'd op method definitions
 //===----------------------------------------------------------------------===//
 
diff --git a/clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp b/clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp
index e505db5..2eaa60c 100644
--- a/clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp
@@ -143,7 +143,8 @@ void CIRCanonicalizePass::runOnOperation() {
     if (isa<BrOp, BrCondOp, CastOp, ScopeOp, SwitchOp, SelectOp, UnaryOp,
             ComplexCreateOp, ComplexImagOp, ComplexRealOp, VecCmpOp,
             VecCreateOp, VecExtractOp, VecShuffleOp, VecShuffleDynamicOp,
-            VecTernaryOp>(op))
+            VecTernaryOp, BitClrsbOp, BitClzOp, BitCtzOp, BitFfsOp, BitParityOp,
+            BitPopcountOp, BitReverseOp, ByteSwapOp, RotateOp>(op))
       ops.push_back(op);
   });
 
diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
index 8f848c7..66260eb 100644
--- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
@@ -8,10 +8,12 @@
 
 #include "PassDetail.h"
 #include "clang/AST/ASTContext.h"
+#include "clang/AST/CharUnits.h"
 #include "clang/CIR/Dialect/Builder/CIRBaseBuilder.h"
 #include "clang/CIR/Dialect/IR/CIRDialect.h"
 #include "clang/CIR/Dialect/IR/CIROpsEnums.h"
 #include "clang/CIR/Dialect/Passes.h"
+#include "clang/CIR/MissingFeatures.h"
 
 #include <memory>
 
@@ -24,11 +26,251 @@ struct LoweringPreparePass : public LoweringPrepareBase<LoweringPreparePass> {
   void runOnOperation() override;
 
   void runOnOp(mlir::Operation *op);
+  void lowerCastOp(cir::CastOp op);
+  void lowerComplexMulOp(cir::ComplexMulOp op);
   void lowerUnaryOp(cir::UnaryOp op);
+  void lowerArrayDtor(cir::ArrayDtor op);
+  void lowerArrayCtor(cir::ArrayCtor op);
+
+  cir::FuncOp buildRuntimeFunction(
+      mlir::OpBuilder &builder, llvm::StringRef name, mlir::Location loc,
+      cir::FuncType type,
+      cir::GlobalLinkageKind linkage = cir::GlobalLinkageKind::ExternalLinkage);
+
+  ///
+  /// AST related
+  /// -----------
+
+  clang::ASTContext *astCtx;
+
+  /// Tracks current module.
+  mlir::ModuleOp mlirModule;
+
+  void setASTContext(clang::ASTContext *c) { astCtx = c; }
 };
 
 } // namespace
 
+cir::FuncOp LoweringPreparePass::buildRuntimeFunction(
+    mlir::OpBuilder &builder, llvm::StringRef name, mlir::Location loc,
+    cir::FuncType type, cir::GlobalLinkageKind linkage) {
+  cir::FuncOp f = dyn_cast_or_null<FuncOp>(SymbolTable::lookupNearestSymbolFrom(
+      mlirModule, StringAttr::get(mlirModule->getContext(), name)));
+  if (!f) {
+    f = builder.create<cir::FuncOp>(loc, name, type);
+    f.setLinkageAttr(
+        cir::GlobalLinkageKindAttr::get(builder.getContext(), linkage));
+    mlir::SymbolTable::setSymbolVisibility(
+        f, mlir::SymbolTable::Visibility::Private);
+
+    assert(!cir::MissingFeatures::opFuncExtraAttrs());
+  }
+  return f;
+}
+
+static mlir::Value lowerScalarToComplexCast(mlir::MLIRContext &ctx,
+                                            cir::CastOp op) {
+  cir::CIRBaseBuilderTy builder(ctx);
+  builder.setInsertionPoint(op);
+
+  mlir::Value src = op.getSrc();
+  mlir::Value imag = builder.getNullValue(src.getType(), op.getLoc());
+  return builder.createComplexCreate(op.getLoc(), src, imag);
+}
+
+static mlir::Value lowerComplexToScalarCast(mlir::MLIRContext &ctx,
+                                            cir::CastOp op,
+                                            cir::CastKind elemToBoolKind) {
+  cir::CIRBaseBuilderTy builder(ctx);
+  builder.setInsertionPoint(op);
+
+  mlir::Value src = op.getSrc();
+  if (!mlir::isa<cir::BoolType>(op.getType()))
+    return builder.createComplexReal(op.getLoc(), src);
+
+  // Complex cast to bool: (bool)(a+bi) => (bool)a || (bool)b
+  mlir::Value srcReal = builder.createComplexReal(op.getLoc(), src);
+  mlir::Value srcImag = builder.createComplexImag(op.getLoc(), src);
+
+  cir::BoolType boolTy = builder.getBoolTy();
+  mlir::Value srcRealToBool =
+      builder.createCast(op.getLoc(), elemToBoolKind, srcReal, boolTy);
+  mlir::Value srcImagToBool =
+      builder.createCast(op.getLoc(), elemToBoolKind, srcImag, boolTy);
+  return builder.createLogicalOr(op.getLoc(), srcRealToBool, srcImagToBool);
+}
+
+static mlir::Value lowerComplexToComplexCast(mlir::MLIRContext &ctx,
+                                             cir::CastOp op,
+                                             cir::CastKind scalarCastKind) {
+  CIRBaseBuilderTy builder(ctx);
+  builder.setInsertionPoint(op);
+
+  mlir::Value src = op.getSrc();
+  auto dstComplexElemTy =
+      mlir::cast<cir::ComplexType>(op.getType()).getElementType();
+
+  mlir::Value srcReal = builder.createComplexReal(op.getLoc(), src);
+  mlir::Value srcImag = builder.createComplexImag(op.getLoc(), src);
+
+  mlir::Value dstReal = builder.createCast(op.getLoc(), scalarCastKind, srcReal,
+                                           dstComplexElemTy);
+  mlir::Value dstImag = builder.createCast(op.getLoc(), scalarCastKind, srcImag,
+                                           dstComplexElemTy);
+  return builder.createComplexCreate(op.getLoc(), dstReal, dstImag);
+}
+
+void LoweringPreparePass::lowerCastOp(cir::CastOp op) {
+  mlir::MLIRContext &ctx = getContext();
+  mlir::Value loweredValue = [&]() -> mlir::Value {
+    switch (op.getKind()) {
+    case cir::CastKind::float_to_complex:
+    case cir::CastKind::int_to_complex:
+      return lowerScalarToComplexCast(ctx, op);
+    case cir::CastKind::float_complex_to_real:
+    case cir::CastKind::int_complex_to_real:
+      return lowerComplexToScalarCast(ctx, op, op.getKind());
+    case cir::CastKind::float_complex_to_bool:
+      return lowerComplexToScalarCast(ctx, op, cir::CastKind::float_to_bool);
+    case cir::CastKind::int_complex_to_bool:
+      return lowerComplexToScalarCast(ctx, op, cir::CastKind::int_to_bool);
+    case cir::CastKind::float_complex:
+      return lowerComplexToComplexCast(ctx, op, cir::CastKind::floating);
+    case cir::CastKind::float_complex_to_int_complex:
+      return lowerComplexToComplexCast(ctx, op, cir::CastKind::float_to_int);
+    case cir::CastKind::int_complex:
+      return lowerComplexToComplexCast(ctx, op, cir::CastKind::integral);
+    case cir::CastKind::int_complex_to_float_complex:
+      return lowerComplexToComplexCast(ctx, op, cir::CastKind::int_to_float);
+    default:
+      return nullptr;
+    }
+  }();
+
+  if (loweredValue) {
+    op.replaceAllUsesWith(loweredValue);
+    op.erase();
+  }
+}
+
+static mlir::Value buildComplexBinOpLibCall(
+    LoweringPreparePass &pass, CIRBaseBuilderTy &builder,
+    llvm::StringRef (*libFuncNameGetter)(llvm::APFloat::Semantics),
+    mlir::Location loc, cir::ComplexType ty, mlir::Value lhsReal,
+    mlir::Value lhsImag, mlir::Value rhsReal, mlir::Value rhsImag) {
+  cir::FPTypeInterface elementTy =
+      mlir::cast<cir::FPTypeInterface>(ty.getElementType());
+
+  llvm::StringRef libFuncName = libFuncNameGetter(
+      llvm::APFloat::SemanticsToEnum(elementTy.getFloatSemantics()));
+  llvm::SmallVector<mlir::Type, 4> libFuncInputTypes(4, elementTy);
+
+  cir::FuncType libFuncTy = cir::FuncType::get(libFuncInputTypes, ty);
+
+  // Insert a declaration for the runtime function to be used in Complex
+  // multiplication and division when needed
+  cir::FuncOp libFunc;
+  {
+    mlir::OpBuilder::InsertionGuard ipGuard{builder};
+    builder.setInsertionPointToStart(pass.mlirModule.getBody());
+    libFunc = pass.buildRuntimeFunction(builder, libFuncName, loc, libFuncTy);
+  }
+
+  cir::CallOp call =
+      builder.createCallOp(loc, libFunc, {lhsReal, lhsImag, rhsReal, rhsImag});
+  return call.getResult();
+}
+
+static llvm::StringRef
+getComplexMulLibCallName(llvm::APFloat::Semantics semantics) {
+  switch (semantics) {
+  case llvm::APFloat::S_IEEEhalf:
+    return "__mulhc3";
+  case llvm::APFloat::S_IEEEsingle:
+    return "__mulsc3";
+  case llvm::APFloat::S_IEEEdouble:
+    return "__muldc3";
+  case llvm::APFloat::S_PPCDoubleDouble:
+    return "__multc3";
+  case llvm::APFloat::S_x87DoubleExtended:
+    return "__mulxc3";
+  case llvm::APFloat::S_IEEEquad:
+    return "__multc3";
+  default:
+    llvm_unreachable("unsupported floating point type");
+  }
+}
+
+static mlir::Value lowerComplexMul(LoweringPreparePass &pass,
+                                   CIRBaseBuilderTy &builder,
+                                   mlir::Location loc, cir::ComplexMulOp op,
+                                   mlir::Value lhsReal, mlir::Value lhsImag,
+                                   mlir::Value rhsReal, mlir::Value rhsImag) {
+  // (a+bi) * (c+di) = (ac-bd) + (ad+bc)i
+  mlir::Value resultRealLhs =
+      builder.createBinop(loc, lhsReal, cir::BinOpKind::Mul, rhsReal);
+  mlir::Value resultRealRhs =
+      builder.createBinop(loc, lhsImag, cir::BinOpKind::Mul, rhsImag);
+  mlir::Value resultImagLhs =
+      builder.createBinop(loc, lhsReal, cir::BinOpKind::Mul, rhsImag);
+  mlir::Value resultImagRhs =
+      builder.createBinop(loc, lhsImag, cir::BinOpKind::Mul, rhsReal);
+  mlir::Value resultReal = builder.createBinop(
+      loc, resultRealLhs, cir::BinOpKind::Sub, resultRealRhs);
+  mlir::Value resultImag = builder.createBinop(
+      loc, resultImagLhs, cir::BinOpKind::Add, resultImagRhs);
+  mlir::Value algebraicResult =
+      builder.createComplexCreate(loc, resultReal, resultImag);
+
+  cir::ComplexType complexTy = op.getType();
+  cir::ComplexRangeKind rangeKind = op.getRange();
+  if (mlir::isa<cir::IntType>(complexTy.getElementType()) ||
+      rangeKind == cir::ComplexRangeKind::Basic ||
+      rangeKind == cir::ComplexRangeKind::Improved ||
+      rangeKind == cir::ComplexRangeKind::Promoted)
+    return algebraicResult;
+
+  assert(!cir::MissingFeatures::fastMathFlags());
+
+  // Check whether the real part and the imaginary part of the result are both
+  // NaN. If so, emit a library call to compute the multiplication instead.
+  // We check a value against NaN by comparing the value against itself.
+  mlir::Value resultRealIsNaN = builder.createIsNaN(loc, resultReal);
+  mlir::Value resultImagIsNaN = builder.createIsNaN(loc, resultImag);
+  mlir::Value resultRealAndImagAreNaN =
+      builder.createLogicalAnd(loc, resultRealIsNaN, resultImagIsNaN);
+
+  return builder
+      .create<cir::TernaryOp>(
+          loc, resultRealAndImagAreNaN,
+          [&](mlir::OpBuilder &, mlir::Location) {
+            mlir::Value libCallResult = buildComplexBinOpLibCall(
+                pass, builder, &getComplexMulLibCallName, loc, complexTy,
+                lhsReal, lhsImag, rhsReal, rhsImag);
+            builder.createYield(loc, libCallResult);
+          },
+          [&](mlir::OpBuilder &, mlir::Location) {
+            builder.createYield(loc, algebraicResult);
+          })
+      .getResult();
+}
+
+void LoweringPreparePass::lowerComplexMulOp(cir::ComplexMulOp op) {
+  cir::CIRBaseBuilderTy builder(getContext());
+  builder.setInsertionPointAfter(op);
+  mlir::Location loc = op.getLoc();
+  mlir::TypedValue<cir::ComplexType> lhs = op.getLhs();
+  mlir::TypedValue<cir::ComplexType> rhs = op.getRhs();
+  mlir::Value lhsReal = builder.createComplexReal(loc, lhs);
+  mlir::Value lhsImag = builder.createComplexImag(loc, lhs);
+  mlir::Value rhsReal = builder.createComplexReal(loc, rhs);
+  mlir::Value rhsImag = builder.createComplexImag(loc, rhs);
+  mlir::Value loweredResult = lowerComplexMul(*this, builder, loc, op, lhsReal,
+                                              lhsImag, rhsReal, rhsImag);
+  op.replaceAllUsesWith(loweredResult);
+  op.erase();
+}
+
 void LoweringPreparePass::lowerUnaryOp(cir::UnaryOp op) {
   mlir::Type ty = op.getType();
   if (!mlir::isa<cir::ComplexType>(ty))
@@ -56,7 +298,8 @@ void LoweringPreparePass::lowerUnaryOp(cir::UnaryOp op) {
 
   case cir::UnaryOpKind::Plus:
   case cir::UnaryOpKind::Minus:
-    llvm_unreachable("Complex unary Plus/Minus NYI");
+    resultReal = builder.createUnaryOp(loc, opKind, operandReal);
+    resultImag = builder.createUnaryOp(loc, opKind, operandImag);
     break;
 
   case cir::UnaryOpKind::Not:
@@ -71,18 +314,120 @@ void LoweringPreparePass::lowerUnaryOp(cir::UnaryOp op) {
   op.erase();
 }
 
+static void lowerArrayDtorCtorIntoLoop(cir::CIRBaseBuilderTy &builder,
+                                       clang::ASTContext *astCtx,
+                                       mlir::Operation *op, mlir::Type eltTy,
+                                       mlir::Value arrayAddr, uint64_t arrayLen,
+                                       bool isCtor) {
+  // Generate loop to call into ctor/dtor for every element.
+  mlir::Location loc = op->getLoc();
+
+  // TODO: instead of getting the size from the AST context, create alias for
+  // PtrDiffTy and unify with CIRGen stuff.
+  const unsigned sizeTypeSize =
+      astCtx->getTypeSize(astCtx->getSignedSizeType());
+  uint64_t endOffset = isCtor ? arrayLen : arrayLen - 1;
+  mlir::Value endOffsetVal =
+      builder.getUnsignedInt(loc, endOffset, sizeTypeSize);
+
+  auto begin = cir::CastOp::create(builder, loc, eltTy,
+                                   cir::CastKind::array_to_ptrdecay, arrayAddr);
+  mlir::Value end =
+      cir::PtrStrideOp::create(builder, loc, eltTy, begin, endOffsetVal);
+  mlir::Value start = isCtor ? begin : end;
+  mlir::Value stop = isCtor ? end : begin;
+
+  mlir::Value tmpAddr = builder.createAlloca(
+      loc, /*addr type*/ builder.getPointerTo(eltTy),
+      /*var type*/ eltTy, "__array_idx", builder.getAlignmentAttr(1));
+  builder.createStore(loc, start, tmpAddr);
+
+  cir::DoWhileOp loop = builder.createDoWhile(
+      loc,
+      /*condBuilder=*/
+      [&](mlir::OpBuilder &b, mlir::Location loc) {
+        auto currentElement = b.create<cir::LoadOp>(loc, eltTy, tmpAddr);
+        mlir::Type boolTy = cir::BoolType::get(b.getContext());
+        auto cmp = builder.create<cir::CmpOp>(loc, boolTy, cir::CmpOpKind::ne,
+                                              currentElement, stop);
+        builder.createCondition(cmp);
+      },
+      /*bodyBuilder=*/
+      [&](mlir::OpBuilder &b, mlir::Location loc) {
+        auto currentElement = b.create<cir::LoadOp>(loc, eltTy, tmpAddr);
+
+        cir::CallOp ctorCall;
+        op->walk([&](cir::CallOp c) { ctorCall = c; });
+        assert(ctorCall && "expected ctor call");
+
+        // Array elements get constructed in order but destructed in reverse.
+        mlir::Value stride;
+        if (isCtor)
+          stride = builder.getUnsignedInt(loc, 1, sizeTypeSize);
+        else
+          stride = builder.getSignedInt(loc, -1, sizeTypeSize);
+
+        ctorCall->moveBefore(stride.getDefiningOp());
+        ctorCall->setOperand(0, currentElement);
+        auto nextElement = cir::PtrStrideOp::create(builder, loc, eltTy,
+                                                    currentElement, stride);
+
+        // Store the element pointer to the temporary variable
+        builder.createStore(loc, nextElement, tmpAddr);
+        builder.createYield(loc);
+      });
+
+  op->replaceAllUsesWith(loop);
+  op->erase();
+}
+
+void LoweringPreparePass::lowerArrayDtor(cir::ArrayDtor op) {
+  CIRBaseBuilderTy builder(getContext());
+  builder.setInsertionPointAfter(op.getOperation());
+
+  mlir::Type eltTy = op->getRegion(0).getArgument(0).getType();
+  assert(!cir::MissingFeatures::vlas());
+  auto arrayLen =
+      mlir::cast<cir::ArrayType>(op.getAddr().getType().getPointee()).getSize();
+  lowerArrayDtorCtorIntoLoop(builder, astCtx, op, eltTy, op.getAddr(), arrayLen,
+                             false);
+}
+
+void LoweringPreparePass::lowerArrayCtor(cir::ArrayCtor op) {
+  cir::CIRBaseBuilderTy builder(getContext());
+  builder.setInsertionPointAfter(op.getOperation());
+
+  mlir::Type eltTy = op->getRegion(0).getArgument(0).getType();
+  assert(!cir::MissingFeatures::vlas());
+  auto arrayLen =
+      mlir::cast<cir::ArrayType>(op.getAddr().getType().getPointee()).getSize();
+  lowerArrayDtorCtorIntoLoop(builder, astCtx, op, eltTy, op.getAddr(), arrayLen,
+                             true);
+}
+
 void LoweringPreparePass::runOnOp(mlir::Operation *op) {
-  if (auto unary = dyn_cast<cir::UnaryOp>(op))
+  if (auto arrayCtor = dyn_cast<ArrayCtor>(op))
+    lowerArrayCtor(arrayCtor);
+  else if (auto arrayDtor = dyn_cast<cir::ArrayDtor>(op))
+    lowerArrayDtor(arrayDtor);
+  else if (auto cast = mlir::dyn_cast<cir::CastOp>(op))
+    lowerCastOp(cast);
+  else if (auto complexMul = mlir::dyn_cast<cir::ComplexMulOp>(op))
+    lowerComplexMulOp(complexMul);
+  else if (auto unary = mlir::dyn_cast<cir::UnaryOp>(op))
     lowerUnaryOp(unary);
 }
 
 void LoweringPreparePass::runOnOperation() {
   mlir::Operation *op = getOperation();
+  if (isa<::mlir::ModuleOp>(op))
+    mlirModule = cast<::mlir::ModuleOp>(op);
 
   llvm::SmallVector<mlir::Operation *> opsToTransform;
 
   op->walk([&](mlir::Operation *op) {
-    if (mlir::isa<cir::UnaryOp>(op))
+    if (mlir::isa<cir::ArrayCtor, cir::ArrayDtor, cir::CastOp,
+                  cir::ComplexMulOp, cir::UnaryOp>(op))
       opsToTransform.push_back(op);
   });
 
@@ -93,3 +438,10 @@ void LoweringPreparePass::runOnOperation() {
 std::unique_ptr<Pass> mlir::createLoweringPreparePass() {
   return std::make_unique<LoweringPreparePass>();
 }
+
+std::unique_ptr<Pass>
+mlir::createLoweringPreparePass(clang::ASTContext *astCtx) {
+  auto pass = std::make_unique<LoweringPreparePass>();
+  pass->setASTContext(astCtx);
+  return std::move(pass);
+}
diff --git a/clang/lib/CIR/Lowering/CIRPasses.cpp b/clang/lib/CIR/Lowering/CIRPasses.cpp
index 5607abc..bb9781b 100644
--- a/clang/lib/CIR/Lowering/CIRPasses.cpp
+++ b/clang/lib/CIR/Lowering/CIRPasses.cpp
@@ -31,7 +31,7 @@ mlir::LogicalResult runCIRToCIRPasses(mlir::ModuleOp theModule,
   if (enableCIRSimplify)
     pm.addPass(mlir::createCIRSimplifyPass());
 
-  pm.addPass(mlir::createLoweringPreparePass());
+  pm.addPass(mlir::createLoweringPreparePass(&astContext));
 
   pm.enableVerifier(enableVerifier);
   (void)mlir::applyPassManagerCLOptions(pm);
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index f075be8..957a51a 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -460,6 +460,17 @@ mlir::LogicalResult CIRToLLVMAssumeOpLowering::matchAndRewrite(
   return mlir::success();
 }
 
+mlir::LogicalResult CIRToLLVMAssumeSepStorageOpLowering::matchAndRewrite(
+    cir::AssumeSepStorageOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  auto cond = rewriter.create<mlir::LLVM::ConstantOp>(op.getLoc(),
+                                                      rewriter.getI1Type(), 1);
+  rewriter.replaceOpWithNewOp<mlir::LLVM::AssumeOp>(
+      op, cond, mlir::LLVM::AssumeSeparateStorageTag{}, adaptor.getPtr1(),
+      adaptor.getPtr2());
+  return mlir::success();
+}
+
 mlir::LogicalResult CIRToLLVMBitClrsbOpLowering::matchAndRewrite(
     cir::BitClrsbOp op, OpAdaptor adaptor,
     mlir::ConversionPatternRewriter &rewriter) const {
@@ -510,6 +521,32 @@ mlir::LogicalResult CIRToLLVMBitCtzOpLowering::matchAndRewrite(
   return mlir::LogicalResult::success();
 }
 
+mlir::LogicalResult CIRToLLVMBitFfsOpLowering::matchAndRewrite(
+    cir::BitFfsOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  auto resTy = getTypeConverter()->convertType(op.getType());
+  auto ctz = rewriter.create<mlir::LLVM::CountTrailingZerosOp>(
+      op.getLoc(), resTy, adaptor.getInput(), /*is_zero_poison=*/true);
+
+  auto one = rewriter.create<mlir::LLVM::ConstantOp>(op.getLoc(), resTy, 1);
+  auto ctzAddOne = rewriter.create<mlir::LLVM::AddOp>(op.getLoc(), ctz, one);
+
+  auto zeroInputTy = rewriter.create<mlir::LLVM::ConstantOp>(
+      op.getLoc(), adaptor.getInput().getType(), 0);
+  auto isZero = rewriter.create<mlir::LLVM::ICmpOp>(
+      op.getLoc(),
+      mlir::LLVM::ICmpPredicateAttr::get(rewriter.getContext(),
+                                         mlir::LLVM::ICmpPredicate::eq),
+      adaptor.getInput(), zeroInputTy);
+
+  auto zero = rewriter.create<mlir::LLVM::ConstantOp>(op.getLoc(), resTy, 0);
+  auto res = rewriter.create<mlir::LLVM::SelectOp>(op.getLoc(), isZero, zero,
+                                                   ctzAddOne);
+  rewriter.replaceOp(op, res);
+
+  return mlir::LogicalResult::success();
+}
+
 mlir::LogicalResult CIRToLLVMBitParityOpLowering::matchAndRewrite(
     cir::BitParityOp op, OpAdaptor adaptor,
     mlir::ConversionPatternRewriter &rewriter) const {
@@ -908,13 +945,45 @@ rewriteCallOrInvoke(mlir::Operation *op, mlir::ValueRange callOperands,
                            memoryEffects, noUnwind, willReturn);
 
   mlir::LLVM::LLVMFunctionType llvmFnTy;
+
+  // Temporary to handle the case where we need to prepend an operand if the
+  // callee is an alias.
+  SmallVector<mlir::Value> adjustedCallOperands;
+
   if (calleeAttr) { // direct call
-    mlir::FunctionOpInterface fn =
-        mlir::SymbolTable::lookupNearestSymbolFrom<mlir::FunctionOpInterface>(
-            op, calleeAttr);
-    assert(fn && "Did not find function for call");
-    llvmFnTy = cast<mlir::LLVM::LLVMFunctionType>(
-        converter->convertType(fn.getFunctionType()));
+    mlir::Operation *callee =
+        mlir::SymbolTable::lookupNearestSymbolFrom(op, calleeAttr);
+    if (auto fn = mlir::dyn_cast<mlir::FunctionOpInterface>(callee)) {
+      llvmFnTy = converter->convertType<mlir::LLVM::LLVMFunctionType>(
+          fn.getFunctionType());
+      assert(llvmFnTy && "Failed to convert function type");
+    } else if (auto alias = mlir::cast<mlir::LLVM::AliasOp>(callee)) {
+      // If the callee was an alias. In that case,
+      // we need to prepend the address of the alias to the operands. The
+      // way aliases work in the LLVM dialect is a little counter-intuitive.
+      // The AliasOp itself is a pseudo-function that returns the address of
+      // the global value being aliased, but when we generate the call we
+      // need to insert an operation that gets the address of the AliasOp.
+      // This all gets sorted out when the LLVM dialect is lowered to LLVM IR.
+      auto symAttr = mlir::cast<mlir::FlatSymbolRefAttr>(calleeAttr);
+      auto addrOfAlias =
+          mlir::LLVM::AddressOfOp::create(
+              rewriter, op->getLoc(),
+              mlir::LLVM::LLVMPointerType::get(rewriter.getContext()), symAttr)
+              .getResult();
+      adjustedCallOperands.push_back(addrOfAlias);
+
+      // Now add the regular operands and assign this to the range value.
+      llvm::append_range(adjustedCallOperands, callOperands);
+      callOperands = adjustedCallOperands;
+
+      // Clear the callee attribute because we're calling an alias.
+      calleeAttr = {};
+      llvmFnTy = mlir::cast<mlir::LLVM::LLVMFunctionType>(alias.getType());
+    } else {
+      // Was this an ifunc?
+      return op->emitError("Unexpected callee type!");
+    }
   } else { // indirect call
     assert(!op->getOperands().empty() &&
            "operands list must no be empty for the indirect call");
@@ -1016,6 +1085,12 @@ mlir::LogicalResult CIRToLLVMConstantOpLowering::matchAndRewrite(
     mlir::ConversionPatternRewriter &rewriter) const {
   mlir::Attribute attr = op.getValue();
 
+  if (mlir::isa<cir::PoisonAttr>(attr)) {
+    rewriter.replaceOpWithNewOp<mlir::LLVM::PoisonOp>(
+        op, getTypeConverter()->convertType(op.getType()));
+    return mlir::success();
+  }
+
   if (mlir::isa<mlir::IntegerType>(op.getType())) {
     // Verified cir.const operations cannot actually be of these types, but the
     // lowering pass may generate temporary cir.const operations with these
@@ -1155,6 +1230,30 @@ void CIRToLLVMFuncOpLowering::lowerFuncAttributes(
   }
 }
 
+mlir::LogicalResult CIRToLLVMFuncOpLowering::matchAndRewriteAlias(
+    cir::FuncOp op, llvm::StringRef aliasee, mlir::Type ty, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  SmallVector<mlir::NamedAttribute, 4> attributes;
+  lowerFuncAttributes(op, /*filterArgAndResAttrs=*/false, attributes);
+
+  mlir::Location loc = op.getLoc();
+  auto aliasOp = rewriter.replaceOpWithNewOp<mlir::LLVM::AliasOp>(
+      op, ty, convertLinkage(op.getLinkage()), op.getName(), op.getDsoLocal(),
+      /*threadLocal=*/false, attributes);
+
+  // Create the alias body
+  mlir::OpBuilder builder(op.getContext());
+  mlir::Block *block = builder.createBlock(&aliasOp.getInitializerRegion());
+  builder.setInsertionPointToStart(block);
+  // The type of AddressOfOp is always a pointer.
+  assert(!cir::MissingFeatures::addressSpace());
+  mlir::Type ptrTy = mlir::LLVM::LLVMPointerType::get(ty.getContext());
+  auto addrOp = mlir::LLVM::AddressOfOp::create(builder, loc, ptrTy, aliasee);
+  mlir::LLVM::ReturnOp::create(builder, loc, addrOp);
+
+  return mlir::success();
+}
+
 mlir::LogicalResult CIRToLLVMFuncOpLowering::matchAndRewrite(
     cir::FuncOp op, OpAdaptor adaptor,
     mlir::ConversionPatternRewriter &rewriter) const {
@@ -1179,6 +1278,11 @@ mlir::LogicalResult CIRToLLVMFuncOpLowering::matchAndRewrite(
       resultType ? resultType : mlir::LLVM::LLVMVoidType::get(getContext()),
       signatureConversion.getConvertedTypes(),
       /*isVarArg=*/fnType.isVarArg());
+
+  // If this is an alias, it needs to be lowered to llvm::AliasOp.
+  if (std::optional<llvm::StringRef> aliasee = op.getAliasee())
+    return matchAndRewriteAlias(op, *aliasee, llvmFnTy, adaptor, rewriter);
+
   // LLVMFuncOp expects a single FileLine Location instead of a fused
   // location.
   mlir::Location loc = op.getLoc();
@@ -2066,11 +2170,13 @@ void ConvertCIRToLLVMPass::runOnOperation() {
   patterns.add<
       // clang-format off
                CIRToLLVMAssumeOpLowering,
+               CIRToLLVMAssumeSepStorageOpLowering,
                CIRToLLVMBaseClassAddrOpLowering,
                CIRToLLVMBinOpLowering,
                CIRToLLVMBitClrsbOpLowering,
                CIRToLLVMBitClzOpLowering,
                CIRToLLVMBitCtzOpLowering,
+               CIRToLLVMBitFfsOpLowering,
                CIRToLLVMBitParityOpLowering,
                CIRToLLVMBitPopcountOpLowering,
                CIRToLLVMBitReverseOpLowering,
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
index 3faf1e9..f339d43 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
@@ -44,6 +44,16 @@ public:
                   mlir::ConversionPatternRewriter &) const override;
 };
 
+class CIRToLLVMAssumeSepStorageOpLowering
+    : public mlir::OpConversionPattern<cir::AssumeSepStorageOp> {
+public:
+  using mlir::OpConversionPattern<cir::AssumeSepStorageOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(cir::AssumeSepStorageOp op, OpAdaptor,
+                  mlir::ConversionPatternRewriter &) const override;
+};
+
 class CIRToLLVMBitClrsbOpLowering
     : public mlir::OpConversionPattern<cir::BitClrsbOp> {
 public:
@@ -74,6 +84,16 @@ public:
                   mlir::ConversionPatternRewriter &) const override;
 };
 
+class CIRToLLVMBitFfsOpLowering
+    : public mlir::OpConversionPattern<cir::BitFfsOp> {
+public:
+  using mlir::OpConversionPattern<cir::BitFfsOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(cir::BitFfsOp op, OpAdaptor,
+                  mlir::ConversionPatternRewriter &) const override;
+};
+
 class CIRToLLVMBitParityOpLowering
     : public mlir::OpConversionPattern<cir::BitParityOp> {
 public:
@@ -247,6 +267,11 @@ class CIRToLLVMFuncOpLowering : public mlir::OpConversionPattern<cir::FuncOp> {
       cir::FuncOp func, bool filterArgAndResAttrs,
       mlir::SmallVectorImpl<mlir::NamedAttribute> &result) const;
 
+  mlir::LogicalResult
+  matchAndRewriteAlias(cir::FuncOp op, llvm::StringRef aliasee, mlir::Type ty,
+                       OpAdaptor adaptor,
+                       mlir::ConversionPatternRewriter &rewriter) const;
+
 public:
   using mlir::OpConversionPattern<cir::FuncOp>::OpConversionPattern;
 
diff --git a/clang/lib/CodeGen/ABIInfo.cpp b/clang/lib/CodeGen/ABIInfo.cpp
index d981d69..3ef430e1 100644
--- a/clang/lib/CodeGen/ABIInfo.cpp
+++ b/clang/lib/CodeGen/ABIInfo.cpp
@@ -218,8 +218,8 @@ void ABIInfo::appendAttributeMangling(StringRef AttrStr,
     // only have "+" prefixes here.
     assert(LHS.starts_with("+") && RHS.starts_with("+") &&
            "Features should always have a prefix.");
-    return TI.getFMVPriority({LHS.substr(1)}) >
-           TI.getFMVPriority({RHS.substr(1)});
+    return TI.getFMVPriority({LHS.substr(1)})
+        .ugt(TI.getFMVPriority({RHS.substr(1)}));
   });
 
   bool IsFirst = true;
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 1b72578..0b8b824 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -1027,12 +1027,6 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
               MPM.addPass(
                   createModuleToFunctionPassAdaptor(ObjCARCExpandPass()));
           });
-      PB.registerPipelineEarlySimplificationEPCallback(
-          [](ModulePassManager &MPM, OptimizationLevel Level,
-             ThinOrFullLTOPhase) {
-            if (Level != OptimizationLevel::O0)
-              MPM.addPass(ObjCARCAPElimPass());
-          });
       PB.registerScalarOptimizerLateEPCallback(
           [](FunctionPassManager &FPM, OptimizationLevel Level) {
             if (Level != OptimizationLevel::O0)
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 5f2eb76..e1f7ea0 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -1148,7 +1148,8 @@ llvm::Value *CodeGenFunction::emitCountedByPointerSize(
   assert(E->getCastKind() == CK_LValueToRValue &&
          "must be an LValue to RValue cast");
 
-  const MemberExpr *ME = dyn_cast<MemberExpr>(E->getSubExpr());
+  const MemberExpr *ME =
+      dyn_cast<MemberExpr>(E->getSubExpr()->IgnoreParenNoopCasts(getContext()));
   if (!ME)
     return nullptr;
 
@@ -4108,6 +4109,22 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     return RValue::get(Result);
   }
 
+  case Builtin::BI__builtin_elementwise_maximumnum: {
+    Value *Op0 = EmitScalarExpr(E->getArg(0));
+    Value *Op1 = EmitScalarExpr(E->getArg(1));
+    Value *Result = Builder.CreateBinaryIntrinsic(
+        Intrinsic::maximumnum, Op0, Op1, nullptr, "elt.maximumnum");
+    return RValue::get(Result);
+  }
+
+  case Builtin::BI__builtin_elementwise_minimumnum: {
+    Value *Op0 = EmitScalarExpr(E->getArg(0));
+    Value *Op1 = EmitScalarExpr(E->getArg(1));
+    Value *Result = Builder.CreateBinaryIntrinsic(
+        Intrinsic::minimumnum, Op0, Op1, nullptr, "elt.minimumnum");
+    return RValue::get(Result);
+  }
+
   case Builtin::BI__builtin_reduce_max: {
     auto GetIntrinsicID = [this](QualType QT) {
       if (auto *VecTy = QT->getAs<VectorType>())
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 0bceece..d9bd443 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2852,20 +2852,28 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
       if (AI.getInReg())
         Attrs.addAttribute(llvm::Attribute::InReg);
 
-      // Depending on the ABI, this may be either a byval or a dead_on_return
-      // argument.
-      if (AI.getIndirectByVal()) {
-        Attrs.addByValAttr(getTypes().ConvertTypeForMem(ParamType));
-      } else {
-        // Add dead_on_return when the object's lifetime ends in the callee.
-        // This includes trivially-destructible objects, as well as objects
-        // whose destruction / clean-up is carried out within the callee (e.g.,
-        // Obj-C ARC-managed structs, MSVC callee-destroyed objects).
-        if (!ParamType.isDestructedType() || !ParamType->isRecordType() ||
-            ParamType->castAs<RecordType>()
-                ->getDecl()
-                ->isParamDestroyedInCallee())
-          Attrs.addAttribute(llvm::Attribute::DeadOnReturn);
+      // HLSL out and inout parameters must not be marked with ByVal or
+      // DeadOnReturn attributes because stores to these parameters by the
+      // callee are visible to the caller.
+      if (auto ParamABI = FI.getExtParameterInfo(ArgNo).getABI();
+          ParamABI != ParameterABI::HLSLOut &&
+          ParamABI != ParameterABI::HLSLInOut) {
+
+        // Depending on the ABI, this may be either a byval or a dead_on_return
+        // argument.
+        if (AI.getIndirectByVal()) {
+          Attrs.addByValAttr(getTypes().ConvertTypeForMem(ParamType));
+        } else {
+          // Add dead_on_return when the object's lifetime ends in the callee.
+          // This includes trivially-destructible objects, as well as objects
+          // whose destruction / clean-up is carried out within the callee
+          // (e.g., Obj-C ARC-managed structs, MSVC callee-destroyed objects).
+          if (!ParamType.isDestructedType() || !ParamType->isRecordType() ||
+              ParamType->castAs<RecordType>()
+                  ->getDecl()
+                  ->isParamDestroyedInCallee())
+            Attrs.addAttribute(llvm::Attribute::DeadOnReturn);
+        }
       }
 
       auto *Decl = ParamType->getAsRecordDecl();
diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp
index 5ee9089..827385f 100644
--- a/clang/lib/CodeGen/CGCoroutine.cpp
+++ b/clang/lib/CodeGen/CGCoroutine.cpp
@@ -435,7 +435,7 @@ CodeGenFunction::generateAwaitSuspendWrapper(Twine const &CoroName,
   llvm::FunctionType *LTy = CGM.getTypes().GetFunctionType(FI);
 
   llvm::Function *Fn = llvm::Function::Create(
-      LTy, llvm::GlobalValue::PrivateLinkage, FuncName, &CGM.getModule());
+      LTy, llvm::GlobalValue::InternalLinkage, FuncName, &CGM.getModule());
 
   Fn->addParamAttr(0, llvm::Attribute::AttrKind::NonNull);
   Fn->addParamAttr(0, llvm::Attribute::AttrKind::NoUndef);
diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index e24c68e..7a69b5d 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -2641,6 +2641,8 @@ void CGDebugInfo::emitVTableSymbol(llvm::GlobalVariable *VTable,
                                    const CXXRecordDecl *RD) {
   if (!CGM.getTarget().getCXXABI().isItaniumFamily())
     return;
+  if (DebugKind <= llvm::codegenoptions::DebugLineTablesOnly)
+    return;
 
   ASTContext &Context = CGM.getContext();
   StringRef SymbolName = "_vtable$";
@@ -6062,11 +6064,10 @@ void CGDebugInfo::EmitPseudoVariable(CGBuilderTy &Builder,
     // ptr, in this case its debug info may not match the actual type of object
     // being used as in the next instruction, so we will need to emit a pseudo
     // variable for type-casted value.
-    auto DeclareTypeMatches = [&](auto *DbgDeclare) {
+    auto DeclareTypeMatches = [&](llvm::DbgVariableRecord *DbgDeclare) {
       return DbgDeclare->getVariable()->getType() == Type;
     };
-    if (any_of(llvm::findDbgDeclares(Var), DeclareTypeMatches) ||
-        any_of(llvm::findDVRDeclares(Var), DeclareTypeMatches))
+    if (any_of(llvm::findDVRDeclares(Var), DeclareTypeMatches))
       return;
   }
 
@@ -6436,7 +6437,7 @@ CodeGenFunction::LexicalScope::~LexicalScope() {
 static std::string SanitizerHandlerToCheckLabel(SanitizerHandler Handler) {
   std::string Label;
   switch (Handler) {
-#define SANITIZER_CHECK(Enum, Name, Version)                                   \
+#define SANITIZER_CHECK(Enum, Name, Version, Msg)                              \
   case Enum:                                                                   \
     Label = "__ubsan_check_" #Name;                                            \
     break;
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 85c7688..5a3d4e4 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -85,6 +85,17 @@ enum VariableTypeDescriptorKind : uint16_t {
 //                        Miscellaneous Helper Methods
 //===--------------------------------------------------------------------===//
 
+static llvm::StringRef GetUBSanTrapForHandler(SanitizerHandler ID) {
+  switch (ID) {
+#define SANITIZER_CHECK(Enum, Name, Version, Msg)                              \
+  case SanitizerHandler::Enum:                                                 \
+    return Msg;
+    LIST_SANITIZER_CHECKS
+#undef SANITIZER_CHECK
+  }
+  llvm_unreachable("unhandled switch case");
+}
+
 /// CreateTempAlloca - This creates a alloca and inserts it into the entry
 /// block.
 RawAddress
@@ -3649,7 +3660,7 @@ struct SanitizerHandlerInfo {
 }
 
 const SanitizerHandlerInfo SanitizerHandlers[] = {
-#define SANITIZER_CHECK(Enum, Name, Version) {#Name, Version},
+#define SANITIZER_CHECK(Enum, Name, Version, Msg) {#Name, Version},
     LIST_SANITIZER_CHECKS
 #undef SANITIZER_CHECK
 };
@@ -3954,6 +3965,8 @@ void CodeGenFunction::EmitCfiCheckFail() {
   StartFunction(GlobalDecl(), CGM.getContext().VoidTy, F, FI, Args,
                 SourceLocation());
 
+  ApplyDebugLocation ADL = ApplyDebugLocation::CreateArtificial(*this);
+
   // This function is not affected by NoSanitizeList. This function does
   // not have a source location, but "src:*" would still apply. Revert any
   // changes to SanOpts made in StartFunction.
@@ -4051,6 +4064,15 @@ void CodeGenFunction::EmitTrapCheck(llvm::Value *Checked,
 
   llvm::BasicBlock *&TrapBB = TrapBBs[CheckHandlerID];
 
+  llvm::DILocation *TrapLocation = Builder.getCurrentDebugLocation();
+  llvm::StringRef TrapMessage = GetUBSanTrapForHandler(CheckHandlerID);
+
+  if (getDebugInfo() && !TrapMessage.empty() &&
+      CGM.getCodeGenOpts().SanitizeDebugTrapReasons && TrapLocation) {
+    TrapLocation = getDebugInfo()->CreateTrapFailureMessageFor(
+        TrapLocation, "Undefined Behavior Sanitizer", TrapMessage);
+  }
+
   NoMerge = NoMerge || !CGM.getCodeGenOpts().OptimizationLevel ||
             (CurCodeDecl && CurCodeDecl->hasAttr<OptimizeNoneAttr>());
 
@@ -4059,8 +4081,8 @@ void CodeGenFunction::EmitTrapCheck(llvm::Value *Checked,
     auto Call = TrapBB->begin();
     assert(isa<llvm::CallInst>(Call) && "Expected call in trap BB");
 
-    Call->applyMergedLocation(Call->getDebugLoc(),
-                              Builder.getCurrentDebugLocation());
+    Call->applyMergedLocation(Call->getDebugLoc(), TrapLocation);
+
     Builder.CreateCondBr(Checked, Cont, TrapBB,
                          MDHelper.createLikelyBranchWeights());
   } else {
@@ -4069,6 +4091,8 @@ void CodeGenFunction::EmitTrapCheck(llvm::Value *Checked,
                          MDHelper.createLikelyBranchWeights());
     EmitBlock(TrapBB);
 
+    ApplyDebugLocation applyTrapDI(*this, TrapLocation);
+
     llvm::CallInst *TrapCall =
         Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::ubsantrap),
                            llvm::ConstantInt::get(CGM.Int8Ty, CheckHandlerID));
diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp
index 359e30c..b8238a4 100644
--- a/clang/lib/CodeGen/CGExprCXX.cpp
+++ b/clang/lib/CodeGen/CGExprCXX.cpp
@@ -2146,30 +2146,9 @@ void CodeGenFunction::EmitCXXDeleteExpr(const CXXDeleteExpr *E) {
     return;
   }
 
-  // We might be deleting a pointer to array.  If so, GEP down to the
-  // first non-array element.
-  // (this assumes that A(*)[3][7] is converted to [3 x [7 x %A]]*)
-  if (DeleteTy->isConstantArrayType()) {
-    llvm::Value *Zero = Builder.getInt32(0);
-    SmallVector<llvm::Value*,8> GEP;
-
-    GEP.push_back(Zero); // point at the outermost array
-
-    // For each layer of array type we're pointing at:
-    while (const ConstantArrayType *Arr
-             = getContext().getAsConstantArrayType(DeleteTy)) {
-      // 1. Unpeel the array type.
-      DeleteTy = Arr->getElementType();
-
-      // 2. GEP to the first element of the array.
-      GEP.push_back(Zero);
-    }
-
-    Ptr = Builder.CreateInBoundsGEP(Ptr, GEP, ConvertTypeForMem(DeleteTy),
-                                    Ptr.getAlignment(), "del.first");
-  }
-
-  assert(ConvertTypeForMem(DeleteTy) == Ptr.getElementType());
+  // We might be deleting a pointer to array.
+  DeleteTy = getContext().getBaseElementType(DeleteTy);
+  Ptr = Ptr.withElementType(ConvertTypeForMem(DeleteTy));
 
   if (E->isArrayForm()) {
     EmitArrayDelete(*this, E, Ptr, DeleteTy);
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index ce2dd4d..91237cf 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -7080,6 +7080,110 @@ private:
     return ConstLength.getSExtValue() != 1;
   }
 
+  /// A helper class to copy structures with overlapped elements, i.e. those
+  /// which have mappings of both "s" and "s.mem".  Consecutive elements that
+  /// are not explicitly copied have mapping nodes synthesized for them,
+  /// taking care to avoid generating zero-sized copies.
+  class CopyOverlappedEntryGaps {
+    CodeGenFunction &CGF;
+    MapCombinedInfoTy &CombinedInfo;
+    OpenMPOffloadMappingFlags Flags = OpenMPOffloadMappingFlags::OMP_MAP_NONE;
+    const ValueDecl *MapDecl = nullptr;
+    const Expr *MapExpr = nullptr;
+    Address BP = Address::invalid();
+    bool IsNonContiguous = false;
+    uint64_t DimSize = 0;
+    // These elements track the position as the struct is iterated over
+    // (in order of increasing element address).
+    const RecordDecl *LastParent = nullptr;
+    uint64_t Cursor = 0;
+    unsigned LastIndex = -1u;
+    Address LB = Address::invalid();
+
+  public:
+    CopyOverlappedEntryGaps(CodeGenFunction &CGF,
+                            MapCombinedInfoTy &CombinedInfo,
+                            OpenMPOffloadMappingFlags Flags,
+                            const ValueDecl *MapDecl, const Expr *MapExpr,
+                            Address BP, Address LB, bool IsNonContiguous,
+                            uint64_t DimSize)
+        : CGF(CGF), CombinedInfo(CombinedInfo), Flags(Flags), MapDecl(MapDecl),
+          MapExpr(MapExpr), BP(BP), IsNonContiguous(IsNonContiguous),
+          DimSize(DimSize), LB(LB) {}
+
+    void processField(
+        const OMPClauseMappableExprCommon::MappableComponent &MC,
+        const FieldDecl *FD,
+        llvm::function_ref<LValue(CodeGenFunction &, const MemberExpr *)>
+            EmitMemberExprBase) {
+      const RecordDecl *RD = FD->getParent();
+      const ASTRecordLayout &RL = CGF.getContext().getASTRecordLayout(RD);
+      uint64_t FieldOffset = RL.getFieldOffset(FD->getFieldIndex());
+      uint64_t FieldSize =
+          CGF.getContext().getTypeSize(FD->getType().getCanonicalType());
+      Address ComponentLB = Address::invalid();
+
+      if (FD->getType()->isLValueReferenceType()) {
+        const auto *ME = cast<MemberExpr>(MC.getAssociatedExpression());
+        LValue BaseLVal = EmitMemberExprBase(CGF, ME);
+        ComponentLB =
+            CGF.EmitLValueForFieldInitialization(BaseLVal, FD).getAddress();
+      } else {
+        ComponentLB =
+            CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()).getAddress();
+      }
+
+      if (!LastParent)
+        LastParent = RD;
+      if (FD->getParent() == LastParent) {
+        if (FD->getFieldIndex() != LastIndex + 1)
+          copyUntilField(FD, ComponentLB);
+      } else {
+        LastParent = FD->getParent();
+        if (((int64_t)FieldOffset - (int64_t)Cursor) > 0)
+          copyUntilField(FD, ComponentLB);
+      }
+      Cursor = FieldOffset + FieldSize;
+      LastIndex = FD->getFieldIndex();
+      LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
+    }
+
+    void copyUntilField(const FieldDecl *FD, Address ComponentLB) {
+      llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
+      llvm::Value *LBPtr = LB.emitRawPointer(CGF);
+      llvm::Value *Size =
+          CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr, LBPtr);
+      copySizedChunk(LBPtr, Size);
+    }
+
+    void copyUntilEnd(Address HB) {
+      if (LastParent) {
+        const ASTRecordLayout &RL =
+            CGF.getContext().getASTRecordLayout(LastParent);
+        if ((uint64_t)CGF.getContext().toBits(RL.getSize()) <= Cursor)
+          return;
+      }
+      llvm::Value *LBPtr = LB.emitRawPointer(CGF);
+      llvm::Value *Size = CGF.Builder.CreatePtrDiff(
+          CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF),
+          LBPtr);
+      copySizedChunk(LBPtr, Size);
+    }
+
+    void copySizedChunk(llvm::Value *Base, llvm::Value *Size) {
+      CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
+      CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
+      CombinedInfo.DevicePtrDecls.push_back(nullptr);
+      CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
+      CombinedInfo.Pointers.push_back(Base);
+      CombinedInfo.Sizes.push_back(
+          CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
+      CombinedInfo.Types.push_back(Flags);
+      CombinedInfo.Mappers.push_back(nullptr);
+      CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize : 1);
+    }
+  };
+
   /// Generate the base pointers, section pointers, sizes, map type bits, and
   /// user-defined mappers (all included in \a CombinedInfo) for the provided
   /// map type, map or motion modifiers, and expression components.
@@ -7570,63 +7674,22 @@ private:
               getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
                              /*AddPtrFlag=*/false,
                              /*AddIsTargetParamFlag=*/false, IsNonContiguous);
-          llvm::Value *Size = nullptr;
+          CopyOverlappedEntryGaps CopyGaps(CGF, CombinedInfo, Flags, MapDecl,
+                                           MapExpr, BP, LB, IsNonContiguous,
+                                           DimSize);
           // Do bitcopy of all non-overlapped structure elements.
           for (OMPClauseMappableExprCommon::MappableExprComponentListRef
                    Component : OverlappedElements) {
-            Address ComponentLB = Address::invalid();
             for (const OMPClauseMappableExprCommon::MappableComponent &MC :
                  Component) {
               if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
-                const auto *FD = dyn_cast<FieldDecl>(VD);
-                if (FD && FD->getType()->isLValueReferenceType()) {
-                  const auto *ME =
-                      cast<MemberExpr>(MC.getAssociatedExpression());
-                  LValue BaseLVal = EmitMemberExprBase(CGF, ME);
-                  ComponentLB =
-                      CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
-                          .getAddress();
-                } else {
-                  ComponentLB =
-                      CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
-                          .getAddress();
+                if (const auto *FD = dyn_cast<FieldDecl>(VD)) {
+                  CopyGaps.processField(MC, FD, EmitMemberExprBase);
                 }
-                llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
-                llvm::Value *LBPtr = LB.emitRawPointer(CGF);
-                Size = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr,
-                                                 LBPtr);
-                break;
               }
             }
-            assert(Size && "Failed to determine structure size");
-            CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
-            CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
-            CombinedInfo.DevicePtrDecls.push_back(nullptr);
-            CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
-            CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
-            CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
-                Size, CGF.Int64Ty, /*isSigned=*/true));
-            CombinedInfo.Types.push_back(Flags);
-            CombinedInfo.Mappers.push_back(nullptr);
-            CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
-                                                                      : 1);
-            LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
           }
-          CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
-          CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
-          CombinedInfo.DevicePtrDecls.push_back(nullptr);
-          CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
-          CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
-          llvm::Value *LBPtr = LB.emitRawPointer(CGF);
-          Size = CGF.Builder.CreatePtrDiff(
-              CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF),
-              LBPtr);
-          CombinedInfo.Sizes.push_back(
-              CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
-          CombinedInfo.Types.push_back(Flags);
-          CombinedInfo.Mappers.push_back(nullptr);
-          CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
-                                                                    : 1);
+          CopyGaps.copyUntilEnd(HB);
           break;
         }
         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index e065006..1a8c6f0 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -846,11 +846,13 @@ void CodeGenFunction::EmitGotoStmt(const GotoStmt &S) {
   if (HaveInsertPoint())
     EmitStopPoint(&S);
 
+  ApplyAtomGroup Grp(getDebugInfo());
   EmitBranchThroughCleanup(getJumpDestForLabel(S.getLabel()));
 }
 
 
 void CodeGenFunction::EmitIndirectGotoStmt(const IndirectGotoStmt &S) {
+  ApplyAtomGroup Grp(getDebugInfo());
   if (const LabelDecl *Target = S.getConstantTarget()) {
     EmitBranchThroughCleanup(getJumpDestForLabel(Target));
     return;
@@ -869,6 +871,8 @@ void CodeGenFunction::EmitIndirectGotoStmt(const IndirectGotoStmt &S) {
   cast<llvm::PHINode>(IndGotoBB->begin())->addIncoming(V, CurBB);
 
   EmitBranch(IndGotoBB);
+  if (CurBB && CurBB->getTerminator())
+    addInstToCurrentSourceAtom(CurBB->getTerminator(), nullptr);
 }
 
 void CodeGenFunction::EmitIfStmt(const IfStmt &S) {
@@ -2672,6 +2676,9 @@ static void UpdateAsmCallInst(llvm::CallBase &Result, bool HasSideEffect,
                                          llvm::ConstantAsMetadata::get(Loc)));
   }
 
+  // Make inline-asm calls Key for the debug info feature Key Instructions.
+  CGF.addInstToNewSourceAtom(&Result, nullptr);
+
   if (!NoConvergent && CGF.getLangOpts().assumeFunctionsAreConvergent())
     // Conservatively, mark all inline asm blocks in CUDA or OpenCL as
     // convergent (meaning, they may call an intrinsically convergent op, such
@@ -2750,6 +2757,7 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S,
       }
     }
 
+    ApplyAtomGroup Grp(CGF.getDebugInfo());
     LValue Dest = ResultRegDests[i];
     // ResultTypeRequiresCast elements correspond to the first
     // ResultTypeRequiresCast.size() elements of RegResults.
@@ -2757,7 +2765,8 @@ EmitAsmStores(CodeGenFunction &CGF, const AsmStmt &S,
       unsigned Size = CGF.getContext().getTypeSize(ResultRegQualTys[i]);
       Address A = Dest.getAddress().withElementType(ResultRegTypes[i]);
       if (CGF.getTargetHooks().isScalarizableAsmOperand(CGF, TruncTy)) {
-        Builder.CreateStore(Tmp, A);
+        llvm::StoreInst *S = Builder.CreateStore(Tmp, A);
+        CGF.addInstToCurrentSourceAtom(S, S->getValueOperand());
         continue;
       }
 
diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp
index eb5b604..dc54c97 100644
--- a/clang/lib/CodeGen/CodeGenAction.cpp
+++ b/clang/lib/CodeGen/CodeGenAction.cpp
@@ -908,6 +908,8 @@ bool CodeGenAction::loadLinkModules(CompilerInstance &CI) {
 bool CodeGenAction::hasIRSupport() const { return true; }
 
 void CodeGenAction::EndSourceFileAction() {
+  ASTFrontendAction::EndSourceFileAction();
+
   // If the consumer creation failed, do nothing.
   if (!getCompilerInstance().hasASTConsumer())
     return;
@@ -932,7 +934,7 @@ CodeGenerator *CodeGenAction::getCodeGenerator() const {
 bool CodeGenAction::BeginSourceFileAction(CompilerInstance &CI) {
   if (CI.getFrontendOpts().GenReducedBMI)
     CI.getLangOpts().setCompilingModule(LangOptions::CMK_ModuleInterface);
-  return true;
+  return ASTFrontendAction::BeginSourceFileAction(CI);
 }
 
 static std::unique_ptr<raw_pwrite_stream>
@@ -976,7 +978,7 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
         CI.getPreprocessor());
 
   std::unique_ptr<BackendConsumer> Result(new BackendConsumer(
-      CI, BA, &CI.getVirtualFileSystem(), *VMContext, std::move(LinkModules),
+      CI, BA, CI.getVirtualFileSystemPtr(), *VMContext, std::move(LinkModules),
       InFile, std::move(OS), CoverageInfo));
   BEConsumer = Result.get();
 
@@ -1154,7 +1156,7 @@ void CodeGenAction::ExecuteAction() {
 
   // Set clang diagnostic handler. To do this we need to create a fake
   // BackendConsumer.
-  BackendConsumer Result(CI, BA, &CI.getVirtualFileSystem(), *VMContext,
+  BackendConsumer Result(CI, BA, CI.getVirtualFileSystemPtr(), *VMContext,
                          std::move(LinkModules), "", nullptr, nullptr,
                          TheModule.get());
 
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 236cc3d..834b1c0 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -4418,8 +4418,9 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) {
 static void ReplaceUsesOfNonProtoTypeWithRealFunction(llvm::GlobalValue *Old,
                                                       llvm::Function *NewFn);
 
-static uint64_t getFMVPriority(const TargetInfo &TI,
-                               const CodeGenFunction::FMVResolverOption &RO) {
+static llvm::APInt
+getFMVPriority(const TargetInfo &TI,
+               const CodeGenFunction::FMVResolverOption &RO) {
   llvm::SmallVector<StringRef, 8> Features{RO.Features};
   if (RO.Architecture)
     Features.push_back(*RO.Architecture);
@@ -4544,7 +4545,7 @@ void CodeGenModule::emitMultiVersionFunctions() {
     llvm::stable_sort(
         Options, [&TI](const CodeGenFunction::FMVResolverOption &LHS,
                        const CodeGenFunction::FMVResolverOption &RHS) {
-          return getFMVPriority(TI, LHS) > getFMVPriority(TI, RHS);
+          return getFMVPriority(TI, LHS).ugt(getFMVPriority(TI, RHS));
         });
     CodeGenFunction CGF(*this);
     CGF.EmitMultiVersionResolver(ResolverFunc, Options);
diff --git a/clang/lib/CodeGen/ObjectFilePCHContainerWriter.cpp b/clang/lib/CodeGen/ObjectFilePCHContainerWriter.cpp
index 95971e5..074f2a5 100644
--- a/clang/lib/CodeGen/ObjectFilePCHContainerWriter.cpp
+++ b/clang/lib/CodeGen/ObjectFilePCHContainerWriter.cpp
@@ -146,7 +146,7 @@ public:
       : CI(CI), Diags(CI.getDiagnostics()), MainFileName(MainFileName),
         OutputFileName(OutputFileName), Ctx(nullptr),
         MMap(CI.getPreprocessor().getHeaderSearchInfo().getModuleMap()),
-        FS(&CI.getVirtualFileSystem()),
+        FS(CI.getVirtualFileSystemPtr()),
         HeaderSearchOpts(CI.getHeaderSearchOpts()),
         PreprocessorOpts(CI.getPreprocessorOpts()),
         TargetOpts(CI.getTargetOpts()), LangOpts(CI.getLangOpts()),
diff --git a/clang/lib/CodeGen/SanitizerHandler.h b/clang/lib/CodeGen/SanitizerHandler.h
index bb42e39..a66e7ab 100644
--- a/clang/lib/CodeGen/SanitizerHandler.h
+++ b/clang/lib/CodeGen/SanitizerHandler.h
@@ -14,35 +14,69 @@
 #define LLVM_CLANG_LIB_CODEGEN_SANITIZER_HANDLER_H
 
 #define LIST_SANITIZER_CHECKS                                                  \
-  SANITIZER_CHECK(AddOverflow, add_overflow, 0)                                \
-  SANITIZER_CHECK(BuiltinUnreachable, builtin_unreachable, 0)                  \
-  SANITIZER_CHECK(CFICheckFail, cfi_check_fail, 0)                             \
-  SANITIZER_CHECK(DivremOverflow, divrem_overflow, 0)                          \
-  SANITIZER_CHECK(DynamicTypeCacheMiss, dynamic_type_cache_miss, 0)            \
-  SANITIZER_CHECK(FloatCastOverflow, float_cast_overflow, 0)                   \
-  SANITIZER_CHECK(FunctionTypeMismatch, function_type_mismatch, 0)             \
-  SANITIZER_CHECK(ImplicitConversion, implicit_conversion, 0)                  \
-  SANITIZER_CHECK(InvalidBuiltin, invalid_builtin, 0)                          \
-  SANITIZER_CHECK(InvalidObjCCast, invalid_objc_cast, 0)                       \
-  SANITIZER_CHECK(LoadInvalidValue, load_invalid_value, 0)                     \
-  SANITIZER_CHECK(MissingReturn, missing_return, 0)                            \
-  SANITIZER_CHECK(MulOverflow, mul_overflow, 0)                                \
-  SANITIZER_CHECK(NegateOverflow, negate_overflow, 0)                          \
-  SANITIZER_CHECK(NullabilityArg, nullability_arg, 0)                          \
-  SANITIZER_CHECK(NullabilityReturn, nullability_return, 1)                    \
-  SANITIZER_CHECK(NonnullArg, nonnull_arg, 0)                                  \
-  SANITIZER_CHECK(NonnullReturn, nonnull_return, 1)                            \
-  SANITIZER_CHECK(OutOfBounds, out_of_bounds, 0)                               \
-  SANITIZER_CHECK(PointerOverflow, pointer_overflow, 0)                        \
-  SANITIZER_CHECK(ShiftOutOfBounds, shift_out_of_bounds, 0)                    \
-  SANITIZER_CHECK(SubOverflow, sub_overflow, 0)                                \
-  SANITIZER_CHECK(TypeMismatch, type_mismatch, 1)                              \
-  SANITIZER_CHECK(AlignmentAssumption, alignment_assumption, 0)                \
-  SANITIZER_CHECK(VLABoundNotPositive, vla_bound_not_positive, 0)              \
-  SANITIZER_CHECK(BoundsSafety, bounds_safety, 0)
+  SANITIZER_CHECK(AddOverflow, add_overflow, 0, "Integer addition overflowed") \
+  SANITIZER_CHECK(BuiltinUnreachable, builtin_unreachable, 0,                  \
+                  "_builtin_unreachable(), execution reached an unreachable "  \
+                  "program point")                                             \
+  SANITIZER_CHECK(CFICheckFail, cfi_check_fail, 0,                             \
+                  "Control flow integrity check failed")                       \
+  SANITIZER_CHECK(DivremOverflow, divrem_overflow, 0,                          \
+                  "Integer divide or remainder overflowed")                    \
+  SANITIZER_CHECK(DynamicTypeCacheMiss, dynamic_type_cache_miss, 0,            \
+                  "Dynamic type cache miss, member call made on an object "    \
+                  "whose dynamic type differs from the expected type")         \
+  SANITIZER_CHECK(FloatCastOverflow, float_cast_overflow, 0,                   \
+                  "Floating-point to integer conversion overflowed")           \
+  SANITIZER_CHECK(FunctionTypeMismatch, function_type_mismatch, 0,             \
+                  "Function called with mismatched signature")                 \
+  SANITIZER_CHECK(ImplicitConversion, implicit_conversion, 0,                  \
+                  "Implicit integer conversion overflowed or lost data")       \
+  SANITIZER_CHECK(InvalidBuiltin, invalid_builtin, 0,                          \
+                  "Invalid use of builtin function")                           \
+  SANITIZER_CHECK(InvalidObjCCast, invalid_objc_cast, 0,                       \
+                  "Invalid Objective-C cast")                                  \
+  SANITIZER_CHECK(LoadInvalidValue, load_invalid_value, 0,                     \
+                  "Loaded an invalid or uninitialized value for the type")     \
+  SANITIZER_CHECK(MissingReturn, missing_return, 0,                            \
+                  "Execution reached the end of a value-returning function "   \
+                  "without returning a value")                                 \
+  SANITIZER_CHECK(MulOverflow, mul_overflow, 0,                                \
+                  "Integer multiplication overflowed")                         \
+  SANITIZER_CHECK(NegateOverflow, negate_overflow, 0,                          \
+                  "Integer negation overflowed")                               \
+  SANITIZER_CHECK(                                                             \
+      NullabilityArg, nullability_arg, 0,                                      \
+      "Passing null as an argument which is annotated with _Nonnull")          \
+  SANITIZER_CHECK(NullabilityReturn, nullability_return, 1,                    \
+                  "Returning null from a function with a return type "         \
+                  "annotated with _Nonnull")                                   \
+  SANITIZER_CHECK(NonnullArg, nonnull_arg, 0,                                  \
+                  "Passing null pointer as an argument which is declared to "  \
+                  "never be null")                                             \
+  SANITIZER_CHECK(NonnullReturn, nonnull_return, 1,                            \
+                  "Returning null pointer from a function which is declared "  \
+                  "to never return null")                                      \
+  SANITIZER_CHECK(OutOfBounds, out_of_bounds, 0, "Array index out of bounds")  \
+  SANITIZER_CHECK(PointerOverflow, pointer_overflow, 0,                        \
+                  "Pointer arithmetic overflowed bounds")                      \
+  SANITIZER_CHECK(ShiftOutOfBounds, shift_out_of_bounds, 0,                    \
+                  "Shift exponent is too large for the type")                  \
+  SANITIZER_CHECK(SubOverflow, sub_overflow, 0,                                \
+                  "Integer subtraction overflowed")                            \
+  SANITIZER_CHECK(TypeMismatch, type_mismatch, 1,                              \
+                  "Type mismatch in operation")                                \
+  SANITIZER_CHECK(AlignmentAssumption, alignment_assumption, 0,                \
+                  "Alignment assumption violated")                             \
+  SANITIZER_CHECK(                                                             \
+      VLABoundNotPositive, vla_bound_not_positive, 0,                          \
+      "Variable length array bound evaluates to non-positive value")           \
+  SANITIZER_CHECK(BoundsSafety, bounds_safety, 0,                              \
+                  "") // BoundsSafety Msg is empty because it is not considered
+                      // part of UBSan; therefore, no trap reason is emitted for
+                      // this case.
 
 enum SanitizerHandler {
-#define SANITIZER_CHECK(Enum, Name, Version) Enum,
+#define SANITIZER_CHECK(Enum, Name, Version, Msg) Enum,
   LIST_SANITIZER_CHECKS
 #undef SANITIZER_CHECK
 };
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index ee736a2..70f510a 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -274,7 +274,7 @@ llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments,
 
 void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst,
                                                      const CallExpr *E) {
-  constexpr const char *Tag = "amdgpu-as";
+  constexpr const char *Tag = "amdgpu-synchronize-as";
 
   LLVMContext &Ctx = Inst->getContext();
   SmallVector<MMRAMetadata::TagT, 3> MMRAs;
@@ -633,6 +633,41 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
     llvm::Function *F = CGM.getIntrinsic(IID, {LoadTy});
     return Builder.CreateCall(F, {Addr});
   }
+  case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b32:
+  case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b64:
+  case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b128:
+  case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b32:
+  case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b64:
+  case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b128: {
+
+    Intrinsic::ID IID;
+    switch (BuiltinID) {
+    case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b32:
+      IID = Intrinsic::amdgcn_global_load_monitor_b32;
+      break;
+    case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b64:
+      IID = Intrinsic::amdgcn_global_load_monitor_b64;
+      break;
+    case AMDGPU::BI__builtin_amdgcn_global_load_monitor_b128:
+      IID = Intrinsic::amdgcn_global_load_monitor_b128;
+      break;
+    case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b32:
+      IID = Intrinsic::amdgcn_flat_load_monitor_b32;
+      break;
+    case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b64:
+      IID = Intrinsic::amdgcn_flat_load_monitor_b64;
+      break;
+    case AMDGPU::BI__builtin_amdgcn_flat_load_monitor_b128:
+      IID = Intrinsic::amdgcn_flat_load_monitor_b128;
+      break;
+    }
+
+    llvm::Type *LoadTy = ConvertType(E->getType());
+    llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
+    llvm::Value *Val = EmitScalarExpr(E->getArg(1));
+    llvm::Function *F = CGM.getIntrinsic(IID, {LoadTy});
+    return Builder.CreateCall(F, {Addr, Val});
+  }
   case AMDGPU::BI__builtin_amdgcn_load_to_lds: {
     // Should this have asan instrumentation?
     return emitBuiltinWithOneOverloadedType<5>(*this, E,
@@ -855,6 +890,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
   case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8:
   case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8:
   case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
+  case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_f8f6f4:
   case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4:
   case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_f16:
   case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_bf16:
@@ -1118,6 +1154,10 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
       ArgsForMatchingMatrixTypes = {4, 1};
       BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x64_iu8;
       break;
+    case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_f8f6f4:
+      ArgsForMatchingMatrixTypes = {5, 1, 3};
+      BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4;
+      break;
     case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4:
       ArgsForMatchingMatrixTypes = {3, 0, 1};
       BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_32x16x128_f4;
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 7e6a47f..980f7eb 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -4922,19 +4922,6 @@ Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,
   if (Builtin->LLVMIntrinsic == 0)
     return nullptr;
 
-  if (BuiltinID == SME::BI__builtin_sme___arm_in_streaming_mode) {
-    // If we already know the streaming mode, don't bother with the intrinsic
-    // and emit a constant instead
-    const auto *FD = cast<FunctionDecl>(CurFuncDecl);
-    if (const auto *FPT = FD->getType()->getAs<FunctionProtoType>()) {
-      unsigned SMEAttrs = FPT->getAArch64SMEAttributes();
-      if (!(SMEAttrs & FunctionType::SME_PStateSMCompatibleMask)) {
-        bool IsStreaming = SMEAttrs & FunctionType::SME_PStateSMEnabledMask;
-        return ConstantInt::getBool(Builder.getContext(), IsStreaming);
-      }
-    }
-  }
-
   // Predicates must match the main datatype.
   for (Value *&Op : Ops)
     if (auto PredTy = dyn_cast<llvm::VectorType>(Op->getType()))
@@ -8112,7 +8099,7 @@ Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) {
 
 llvm::Value *
 CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
-  uint64_t FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
+  llvm::APInt FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs);
   Value *Result = Builder.getTrue();
   if (FeaturesMask != 0) {
     // Get features from structure in runtime library
@@ -8128,7 +8115,7 @@ CodeGenFunction::EmitAArch64CpuSupports(ArrayRef<StringRef> FeaturesStrs) {
         {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)});
     Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures,
                                                 CharUnits::fromQuantity(8));
-    Value *Mask = Builder.getInt64(FeaturesMask);
+    Value *Mask = Builder.getInt(FeaturesMask.trunc(64));
     Value *Bitset = Builder.CreateAnd(Features, Mask);
     Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
     Result = Builder.CreateAnd(Result, Cmp);
diff --git a/clang/lib/CodeGen/TargetBuiltins/WebAssembly.cpp b/clang/lib/CodeGen/TargetBuiltins/WebAssembly.cpp
index b7fd70e..33a8d8f 100644
--- a/clang/lib/CodeGen/TargetBuiltins/WebAssembly.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/WebAssembly.cpp
@@ -12,7 +12,10 @@
 
 #include "CGBuiltin.h"
 #include "clang/Basic/TargetBuiltins.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/IntrinsicsWebAssembly.h"
+#include "llvm/Support/ErrorHandling.h"
 
 using namespace clang;
 using namespace CodeGen;
@@ -218,6 +221,64 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
     Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_func);
     return Builder.CreateCall(Callee);
   }
+  case WebAssembly::BI__builtin_wasm_test_function_pointer_signature: {
+    Value *FuncRef = EmitScalarExpr(E->getArg(0));
+
+    // Get the function type from the argument's static type
+    QualType ArgType = E->getArg(0)->getType();
+    const PointerType *PtrTy = ArgType->getAs<PointerType>();
+    assert(PtrTy && "Sema should have ensured this is a function pointer");
+
+    const FunctionType *FuncTy = PtrTy->getPointeeType()->getAs<FunctionType>();
+    assert(FuncTy && "Sema should have ensured this is a function pointer");
+
+    // In the llvm IR, we won't have access any more to the type of the function
+    // pointer so we need to insert this type information somehow. The
+    // @llvm.wasm.ref.test.func takes varargs arguments whose values are unused
+    // to indicate the type of the function to test for. See the test here:
+    // llvm/test/CodeGen/WebAssembly/ref-test-func.ll
+    //
+    // The format is: first we include the return types (since this is a C
+    // function pointer, there will be 0 or one of these) then a token type to
+    // indicate the boundary between return types and param types, then the
+    // param types.
+
+    llvm::FunctionType *LLVMFuncTy =
+        cast<llvm::FunctionType>(ConvertType(QualType(FuncTy, 0)));
+
+    unsigned NParams = LLVMFuncTy->getNumParams();
+    std::vector<Value *> Args;
+    Args.reserve(NParams + 3);
+    // The only real argument is the FuncRef
+    Args.push_back(FuncRef);
+
+    // Add the type information
+    auto addType = [this, &Args](llvm::Type *T) {
+      if (T->isVoidTy()) {
+        // Do nothing
+      } else if (T->isFloatingPointTy()) {
+        Args.push_back(ConstantFP::get(T, 0));
+      } else if (T->isIntegerTy()) {
+        Args.push_back(ConstantInt::get(T, 0));
+      } else if (T->isPointerTy()) {
+        Args.push_back(ConstantPointerNull::get(llvm::PointerType::get(
+            getLLVMContext(), T->getPointerAddressSpace())));
+      } else {
+        // TODO: Handle reference types. For now, we reject them in Sema.
+        llvm_unreachable("Unhandled type");
+      }
+    };
+
+    addType(LLVMFuncTy->getReturnType());
+    // The token type indicates the boundary between return types and param
+    // types.
+    Args.push_back(PoisonValue::get(llvm::Type::getTokenTy(getLLVMContext())));
+    for (unsigned i = 0; i < NParams; i++) {
+      addType(LLVMFuncTy->getParamType(i));
+    }
+    Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_test_func);
+    return Builder.CreateCall(Callee, Args);
+  }
   case WebAssembly::BI__builtin_wasm_swizzle_i8x16: {
     Value *Src = EmitScalarExpr(E->getArg(0));
     Value *Indices = EmitScalarExpr(E->getArg(1));
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index ec1135e..586f287 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -68,6 +68,7 @@
 #include "clang/Driver/Types.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSet.h"
@@ -83,6 +84,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ExitCodes.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/FileUtilities.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/MD5.h"
 #include "llvm/Support/Path.h"
@@ -109,65 +111,6 @@ using namespace clang::driver;
 using namespace clang;
 using namespace llvm::opt;
 
-static std::optional<llvm::Triple> getOffloadTargetTriple(const Driver &D,
-                                                          const ArgList &Args) {
-  auto OffloadTargets = Args.getAllArgValues(options::OPT_offload_EQ);
-  // Offload compilation flow does not support multiple targets for now. We
-  // need the HIPActionBuilder (and possibly the CudaActionBuilder{,Base}too)
-  // to support multiple tool chains first.
-  switch (OffloadTargets.size()) {
-  default:
-    D.Diag(diag::err_drv_only_one_offload_target_supported);
-    return std::nullopt;
-  case 0:
-    D.Diag(diag::err_drv_invalid_or_unsupported_offload_target) << "";
-    return std::nullopt;
-  case 1:
-    break;
-  }
-  return llvm::Triple(OffloadTargets[0]);
-}
-
-static std::optional<llvm::Triple>
-getNVIDIAOffloadTargetTriple(const Driver &D, const ArgList &Args,
-                             const llvm::Triple &HostTriple) {
-  if (!Args.hasArg(options::OPT_offload_EQ)) {
-    return llvm::Triple(HostTriple.isArch64Bit() ? "nvptx64-nvidia-cuda"
-                                                 : "nvptx-nvidia-cuda");
-  }
-  auto TT = getOffloadTargetTriple(D, Args);
-  if (TT && (TT->getArch() == llvm::Triple::spirv32 ||
-             TT->getArch() == llvm::Triple::spirv64)) {
-    if (Args.hasArg(options::OPT_emit_llvm))
-      return TT;
-    D.Diag(diag::err_drv_cuda_offload_only_emit_bc);
-    return std::nullopt;
-  }
-  D.Diag(diag::err_drv_invalid_or_unsupported_offload_target) << TT->str();
-  return std::nullopt;
-}
-
-static std::optional<llvm::Triple>
-getHIPOffloadTargetTriple(const Driver &D, const ArgList &Args) {
-  if (!Args.hasArg(options::OPT_offload_EQ)) {
-    auto OffloadArchs = Args.getAllArgValues(options::OPT_offload_arch_EQ);
-    if (llvm::is_contained(OffloadArchs, "amdgcnspirv") &&
-        OffloadArchs.size() == 1)
-      return llvm::Triple("spirv64-amd-amdhsa");
-    return llvm::Triple("amdgcn-amd-amdhsa"); // Default HIP triple.
-  }
-  auto TT = getOffloadTargetTriple(D, Args);
-  if (!TT)
-    return std::nullopt;
-  if (TT->isAMDGCN() && TT->getVendor() == llvm::Triple::AMD &&
-      TT->getOS() == llvm::Triple::AMDHSA)
-    return TT;
-  if (TT->getArch() == llvm::Triple::spirv64)
-    return TT;
-  D.Diag(diag::err_drv_invalid_or_unsupported_offload_target) << TT->str();
-  return std::nullopt;
-}
-
 template <typename F> static bool usesInput(const ArgList &Args, F &&Fn) {
   return llvm::any_of(Args, [&](Arg *A) {
     return (A->getOption().matches(options::OPT_x) &&
@@ -266,8 +209,8 @@ Driver::Driver(StringRef ClangExecutable, StringRef TargetTriple,
       CCLogDiagnostics(false), CCGenDiagnostics(false),
       CCPrintProcessStats(false), CCPrintInternalStats(false),
       TargetTriple(TargetTriple), Saver(Alloc), PrependArg(nullptr),
-      CheckInputsExist(true), ProbePrecompiled(true),
-      SuppressMissingInputWarning(false) {
+      PreferredLinker(CLANG_DEFAULT_LINKER), CheckInputsExist(true),
+      ProbePrecompiled(true), SuppressMissingInputWarning(false) {
   // Provide a sane fallback if no VFS is specified.
   if (!this->VFS)
     this->VFS = llvm::vfs::getRealFileSystem();
@@ -458,6 +401,44 @@ phases::ID Driver::getFinalPhase(const DerivedArgList &DAL,
   return FinalPhase;
 }
 
+llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
+Driver::executeProgram(llvm::ArrayRef<llvm::StringRef> Args) const {
+  llvm::SmallString<64> OutputFile;
+  llvm::sys::fs::createTemporaryFile("driver-program", "txt", OutputFile,
+                                     llvm::sys::fs::OF_Text);
+  llvm::FileRemover OutputRemover(OutputFile.c_str());
+  std::optional<llvm::StringRef> Redirects[] = {
+      {""},
+      OutputFile.str(),
+      {""},
+  };
+
+  std::string ErrorMessage;
+  int SecondsToWait = 60;
+  if (std::optional<std::string> Str =
+          llvm::sys::Process::GetEnv("CLANG_TOOLCHAIN_PROGRAM_TIMEOUT")) {
+    if (!llvm::to_integer(*Str, SecondsToWait))
+      return llvm::createStringError(std::error_code(),
+                                     "CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected "
+                                     "an integer, got '" +
+                                         *Str + "'");
+    SecondsToWait = std::max(SecondsToWait, 0); // infinite
+  }
+  StringRef Executable = Args[0];
+  if (llvm::sys::ExecuteAndWait(Executable, Args, {}, Redirects, SecondsToWait,
+                                /*MemoryLimit=*/0, &ErrorMessage))
+    return llvm::createStringError(std::error_code(),
+                                   Executable + ": " + ErrorMessage);
+
+  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> OutputBuf =
+      llvm::MemoryBuffer::getFile(OutputFile.c_str());
+  if (!OutputBuf)
+    return llvm::createStringError(OutputBuf.getError(),
+                                   "Failed to read stdout of " + Executable +
+                                       ": " + OutputBuf.getError().message());
+  return std::move(*OutputBuf);
+}
+
 static Arg *MakeInputArg(DerivedArgList &Args, const OptTable &Opts,
                          StringRef Value, bool Claim = true) {
   Arg *A = new Arg(Opts.getOption(options::OPT_INPUT), Value,
@@ -921,250 +902,266 @@ Driver::OpenMPRuntimeKind Driver::getOpenMPRuntime(const ArgList &Args) const {
   return RT;
 }
 
-static llvm::Triple getSYCLDeviceTriple(StringRef TargetArch) {
-  SmallVector<StringRef, 5> SYCLAlias = {"spir", "spir64", "spirv", "spirv32",
-                                         "spirv64"};
-  if (llvm::is_contained(SYCLAlias, TargetArch)) {
-    llvm::Triple TargetTriple;
-    TargetTriple.setArchName(TargetArch);
-    TargetTriple.setVendor(llvm::Triple::UnknownVendor);
-    TargetTriple.setOS(llvm::Triple::UnknownOS);
-    return TargetTriple;
-  }
-  return llvm::Triple(TargetArch);
+// Handles `native` offload architectures by using the 'offload-arch' utility.
+static llvm::SmallVector<std::string>
+getSystemOffloadArchs(Compilation &C, Action::OffloadKind Kind) {
+  StringRef Program = C.getArgs().getLastArgValue(
+      options::OPT_offload_arch_tool_EQ, "offload-arch");
+
+  SmallVector<std::string> GPUArchs;
+  if (llvm::ErrorOr<std::string> Executable =
+          llvm::sys::findProgramByName(Program, {C.getDriver().Dir})) {
+    llvm::SmallVector<StringRef> Args{*Executable};
+    if (Kind == Action::OFK_HIP)
+      Args.push_back("--only=amdgpu");
+    else if (Kind == Action::OFK_Cuda)
+      Args.push_back("--only=nvptx");
+    auto StdoutOrErr = C.getDriver().executeProgram(Args);
+
+    if (!StdoutOrErr) {
+      C.getDriver().Diag(diag::err_drv_undetermined_gpu_arch)
+          << Action::GetOffloadKindName(Kind) << StdoutOrErr.takeError()
+          << "--offload-arch";
+      return GPUArchs;
+    }
+    if ((*StdoutOrErr)->getBuffer().empty()) {
+      C.getDriver().Diag(diag::err_drv_undetermined_gpu_arch)
+          << Action::GetOffloadKindName(Kind) << "No GPU detected in the system"
+          << "--offload-arch";
+      return GPUArchs;
+    }
+
+    for (StringRef Arch : llvm::split((*StdoutOrErr)->getBuffer(), "\n"))
+      if (!Arch.empty())
+        GPUArchs.push_back(Arch.str());
+  } else {
+    C.getDriver().Diag(diag::err_drv_command_failure) << "offload-arch";
+  }
+  return GPUArchs;
 }
 
-static bool addSYCLDefaultTriple(Compilation &C,
-                                 SmallVectorImpl<llvm::Triple> &SYCLTriples) {
-  // Check current set of triples to see if the default has already been set.
-  for (const auto &SYCLTriple : SYCLTriples) {
-    if (SYCLTriple.getSubArch() == llvm::Triple::NoSubArch &&
-        SYCLTriple.isSPIROrSPIRV())
-      return false;
+// Attempts to infer the correct offloading toolchain triple by looking at the
+// requested offloading kind and architectures.
+static llvm::DenseSet<llvm::StringRef>
+inferOffloadToolchains(Compilation &C, Action::OffloadKind Kind) {
+  std::set<std::string> Archs;
+  for (Arg *A : C.getInputArgs()) {
+    for (StringRef Arch : A->getValues()) {
+      if (A->getOption().matches(options::OPT_offload_arch_EQ)) {
+        if (Arch == "native") {
+          for (StringRef Str : getSystemOffloadArchs(C, Kind))
+            Archs.insert(Str.str());
+        } else {
+          Archs.insert(Arch.str());
+        }
+      } else if (A->getOption().matches(options::OPT_no_offload_arch_EQ)) {
+        if (Arch == "all")
+          Archs.clear();
+        else
+          Archs.erase(Arch.str());
+      }
+    }
   }
-  // Add the default triple as it was not found.
-  llvm::Triple DefaultTriple = getSYCLDeviceTriple(
-      C.getDefaultToolChain().getTriple().isArch32Bit() ? "spirv32"
-                                                        : "spirv64");
-  SYCLTriples.insert(SYCLTriples.begin(), DefaultTriple);
-  return true;
+
+  llvm::DenseSet<llvm::StringRef> Triples;
+  for (llvm::StringRef Arch : Archs) {
+    OffloadArch ID = StringToOffloadArch(Arch);
+    if (ID == OffloadArch::UNKNOWN)
+      ID = StringToOffloadArch(
+          getProcessorFromTargetID(llvm::Triple("amdgcn-amd-amdhsa"), Arch));
+
+    if (Kind == Action::OFK_HIP && !IsAMDOffloadArch(ID)) {
+      C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch)
+          << "HIP" << Arch;
+      return llvm::DenseSet<llvm::StringRef>();
+    }
+    if (Kind == Action::OFK_Cuda && !IsNVIDIAOffloadArch(ID)) {
+      C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch)
+          << "CUDA" << Arch;
+      return llvm::DenseSet<llvm::StringRef>();
+    }
+    if (Kind == Action::OFK_OpenMP &&
+        (ID == OffloadArch::UNKNOWN || ID == OffloadArch::UNUSED)) {
+      C.getDriver().Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch)
+          << Arch;
+      return llvm::DenseSet<llvm::StringRef>();
+    }
+    if (ID == OffloadArch::UNKNOWN || ID == OffloadArch::UNUSED) {
+      C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch)
+          << "offload" << Arch;
+      return llvm::DenseSet<llvm::StringRef>();
+    }
+
+    StringRef Triple;
+    if (ID == OffloadArch::AMDGCNSPIRV)
+      Triple = "spirv64-amd-amdhsa";
+    else if (IsNVIDIAOffloadArch(ID))
+      Triple = C.getDefaultToolChain().getTriple().isArch64Bit()
+                   ? "nvptx64-nvidia-cuda"
+                   : "nvptx-nvidia-cuda";
+    else if (IsAMDOffloadArch(ID))
+      Triple = "amdgcn-amd-amdhsa";
+    else
+      continue;
+
+    // Make a new argument that dispatches this argument to the appropriate
+    // toolchain. This is required when we infer it and create potentially
+    // incompatible toolchains from the global option.
+    Option Opt = C.getDriver().getOpts().getOption(options::OPT_Xarch__);
+    unsigned Index = C.getArgs().getBaseArgs().MakeIndex("-Xarch_");
+    Arg *A = new Arg(Opt, C.getArgs().getArgString(Index), Index,
+                     C.getArgs().MakeArgString(Triple.split("-").first),
+                     C.getArgs().MakeArgString("--offload-arch=" + Arch));
+    C.getArgs().append(A);
+    C.getArgs().AddSynthesizedArg(A);
+    Triples.insert(Triple);
+  }
+
+  // Infer the default target triple if no specific architectures are given.
+  if (Archs.empty() && Kind == Action::OFK_HIP)
+    Triples.insert("amdgcn-amd-amdhsa");
+  else if (Archs.empty() && Kind == Action::OFK_Cuda)
+    Triples.insert(C.getDefaultToolChain().getTriple().isArch64Bit()
+                       ? "nvptx64-nvidia-cuda"
+                       : "nvptx-nvidia-cuda");
+  else if (Archs.empty() && Kind == Action::OFK_SYCL)
+    Triples.insert(C.getDefaultToolChain().getTriple().isArch64Bit()
+                       ? "spirv64-unknown-unknown"
+                       : "spirv32-unknown-unknown");
+
+  // We need to dispatch these to the appropriate toolchain now.
+  C.getArgs().eraseArg(options::OPT_offload_arch_EQ);
+  C.getArgs().eraseArg(options::OPT_no_offload_arch_EQ);
+
+  return Triples;
 }
 
 void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
                                               InputList &Inputs) {
-
-  //
-  // CUDA/HIP
-  //
-  // We need to generate a CUDA/HIP toolchain if any of the inputs has a CUDA
-  // or HIP type. However, mixed CUDA/HIP compilation is not supported.
+  bool UseLLVMOffload = C.getInputArgs().hasArg(
+      options::OPT_foffload_via_llvm, options::OPT_fno_offload_via_llvm, false);
   bool IsCuda =
-      llvm::any_of(Inputs, [](std::pair<types::ID, const llvm::opt::Arg *> &I) {
-        return types::isCuda(I.first);
-      });
-  bool IsHIP =
       llvm::any_of(Inputs,
                    [](std::pair<types::ID, const llvm::opt::Arg *> &I) {
-                     return types::isHIP(I.first);
-                   }) ||
-      C.getInputArgs().hasArg(options::OPT_hip_link) ||
-      C.getInputArgs().hasArg(options::OPT_hipstdpar);
-  bool UseLLVMOffload = C.getInputArgs().hasArg(
-      options::OPT_foffload_via_llvm, options::OPT_fno_offload_via_llvm, false);
-  if (IsCuda && IsHIP) {
-    Diag(clang::diag::err_drv_mix_cuda_hip);
+                     return types::isCuda(I.first);
+                   }) &&
+      !UseLLVMOffload;
+  bool IsHIP =
+      (llvm::any_of(Inputs,
+                    [](std::pair<types::ID, const llvm::opt::Arg *> &I) {
+                      return types::isHIP(I.first);
+                    }) ||
+       C.getInputArgs().hasArg(options::OPT_hip_link) ||
+       C.getInputArgs().hasArg(options::OPT_hipstdpar)) &&
+      !UseLLVMOffload;
+  bool IsSYCL = C.getInputArgs().hasFlag(options::OPT_fsycl,
+                                         options::OPT_fno_sycl, false);
+  bool IsOpenMPOffloading =
+      UseLLVMOffload ||
+      (C.getInputArgs().hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ,
+                                options::OPT_fno_openmp, false) &&
+       (C.getInputArgs().hasArg(options::OPT_offload_targets_EQ) ||
+        (C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) &&
+         !(IsCuda || IsHIP))));
+
+  llvm::SmallSet<Action::OffloadKind, 4> Kinds;
+  const std::pair<bool, Action::OffloadKind> ActiveKinds[] = {
+      {IsCuda, Action::OFK_Cuda},
+      {IsHIP, Action::OFK_HIP},
+      {IsOpenMPOffloading, Action::OFK_OpenMP},
+      {IsSYCL, Action::OFK_SYCL}};
+  for (const auto &[Active, Kind] : ActiveKinds)
+    if (Active)
+      Kinds.insert(Kind);
+
+  // We currently don't support any kind of mixed offloading.
+  if (Kinds.size() > 1) {
+    Diag(clang::diag::err_drv_mix_offload)
+        << Action::GetOffloadKindName(*Kinds.begin()).upper()
+        << Action::GetOffloadKindName(*(++Kinds.begin())).upper();
     return;
   }
-  if (IsCuda && !UseLLVMOffload) {
-    auto CudaTriple = getNVIDIAOffloadTargetTriple(
-        *this, C.getInputArgs(), C.getDefaultToolChain().getTriple());
-    if (!CudaTriple)
-      return;
-
-    auto &TC =
-        getOffloadToolChain(C.getInputArgs(), Action::OFK_Cuda, *CudaTriple,
-                            C.getDefaultToolChain().getTriple());
-
-    // Emit a warning if the detected CUDA version is too new.
-    const CudaInstallationDetector &CudaInstallation =
-        static_cast<const toolchains::CudaToolChain &>(TC).CudaInstallation;
-    if (CudaInstallation.isValid())
-      CudaInstallation.WarnIfUnsupportedVersion();
-    C.addOffloadDeviceToolChain(&TC, Action::OFK_Cuda);
-    OffloadArchs[&TC] = getOffloadArchs(C, C.getArgs(), Action::OFK_Cuda, &TC,
-                                        /*SpecificToolchain=*/true);
-  } else if (IsHIP && !UseLLVMOffload) {
-    if (auto *OMPTargetArg =
-            C.getInputArgs().getLastArg(options::OPT_offload_targets_EQ)) {
-      Diag(clang::diag::err_drv_unsupported_opt_for_language_mode)
-          << OMPTargetArg->getSpelling() << "HIP";
-      return;
-    }
-
-    auto HIPTriple = getHIPOffloadTargetTriple(*this, C.getInputArgs());
-    if (!HIPTriple)
-      return;
-
-    auto &TC =
-        getOffloadToolChain(C.getInputArgs(), Action::OFK_HIP, *HIPTriple,
-                            C.getDefaultToolChain().getTriple());
-    C.addOffloadDeviceToolChain(&TC, Action::OFK_HIP);
-
-    // TODO: Fix 'amdgcnspirv' handling with the new driver.
-    if (C.getInputArgs().hasFlag(options::OPT_offload_new_driver,
-                                 options::OPT_no_offload_new_driver, false))
-      OffloadArchs[&TC] = getOffloadArchs(C, C.getArgs(), Action::OFK_HIP, &TC,
-                                          /*SpecificToolchain=*/true);
-  }
 
+  // Initialize the compilation identifier used for unique CUDA / HIP names.
   if (IsCuda || IsHIP)
     CUIDOpts = CUIDOptions(C.getArgs(), *this);
 
-  //
-  // OpenMP
-  //
-  // We need to generate an OpenMP toolchain if the user specified targets with
-  // the -fopenmp-targets option or used --offload-arch with OpenMP enabled.
-  bool IsOpenMPOffloading =
-      ((IsCuda || IsHIP) && UseLLVMOffload) ||
-      (C.getInputArgs().hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ,
-                                options::OPT_fno_openmp, false) &&
-       (C.getInputArgs().hasArg(options::OPT_offload_targets_EQ) ||
-        C.getInputArgs().hasArg(options::OPT_offload_arch_EQ)));
-  if (IsOpenMPOffloading) {
-    // We expect that -fopenmp-targets is always used in conjunction with the
-    // option -fopenmp specifying a valid runtime with offloading support, i.e.
-    // libomp or libiomp.
-    OpenMPRuntimeKind RuntimeKind = getOpenMPRuntime(C.getInputArgs());
-    if (RuntimeKind != OMPRT_OMP && RuntimeKind != OMPRT_IOMP5) {
-      Diag(clang::diag::err_drv_expecting_fopenmp_with_fopenmp_targets);
-      return;
-    }
-
-    // If the user specified -fopenmp-targets= we create a toolchain for each
-    // valid triple. Otherwise, if only --offload-arch= was specified we instead
-    // attempt to derive the appropriate toolchains from the arguments.
-    if (Arg *OpenMPTargets =
-            C.getInputArgs().getLastArg(options::OPT_offload_targets_EQ)) {
-      if (OpenMPTargets && !OpenMPTargets->getNumValues()) {
-        Diag(clang::diag::warn_drv_empty_joined_argument)
-            << OpenMPTargets->getAsString(C.getInputArgs());
+  // Get the list of requested offloading toolchains. If they were not
+  // explicitly specified we will infer them based on the offloading language
+  // and requested architectures.
+  std::multiset<llvm::StringRef> Triples;
+  if (C.getInputArgs().hasArg(options::OPT_offload_targets_EQ)) {
+    std::vector<std::string> ArgValues =
+        C.getInputArgs().getAllArgValues(options::OPT_offload_targets_EQ);
+    for (llvm::StringRef Target : ArgValues)
+      Triples.insert(C.getInputArgs().MakeArgString(Target));
+
+    if (ArgValues.empty())
+      Diag(clang::diag::warn_drv_empty_joined_argument)
+          << C.getInputArgs()
+                 .getLastArg(options::OPT_offload_targets_EQ)
+                 ->getAsString(C.getInputArgs());
+  } else if (Kinds.size() > 0) {
+    for (Action::OffloadKind Kind : Kinds) {
+      llvm::DenseSet<llvm::StringRef> Derived = inferOffloadToolchains(C, Kind);
+      Triples.insert(Derived.begin(), Derived.end());
+    }
+  }
+
+  // Build an offloading toolchain for every requested target and kind.
+  llvm::StringMap<StringRef> FoundNormalizedTriples;
+  for (StringRef Target : Triples) {
+    // OpenMP offloading requires a compatible libomp.
+    if (Kinds.contains(Action::OFK_OpenMP)) {
+      OpenMPRuntimeKind RuntimeKind = getOpenMPRuntime(C.getInputArgs());
+      if (RuntimeKind != OMPRT_OMP && RuntimeKind != OMPRT_IOMP5) {
+        Diag(clang::diag::err_drv_expecting_fopenmp_with_fopenmp_targets);
         return;
       }
+    }
 
-      // Make sure these show up in a deterministic order.
-      std::multiset<StringRef> OpenMPTriples;
-      for (StringRef T : OpenMPTargets->getValues())
-        OpenMPTriples.insert(T);
-
-      llvm::StringMap<StringRef> FoundNormalizedTriples;
-      for (StringRef T : OpenMPTriples) {
-        llvm::Triple TT(ToolChain::getOpenMPTriple(T));
-        std::string NormalizedName = TT.normalize();
-
-        // Make sure we don't have a duplicate triple.
-        auto [TripleIt, Inserted] =
-            FoundNormalizedTriples.try_emplace(NormalizedName, T);
-        if (!Inserted) {
-          Diag(clang::diag::warn_drv_omp_offload_target_duplicate)
-              << T << TripleIt->second;
-          continue;
-        }
-
-        // If the specified target is invalid, emit a diagnostic.
-        if (TT.getArch() == llvm::Triple::UnknownArch) {
-          Diag(clang::diag::err_drv_invalid_omp_target) << T;
-          continue;
-        }
+    // Certain options are not allowed when combined with SYCL compilation.
+    if (Kinds.contains(Action::OFK_SYCL)) {
+      for (auto ID :
+           {options::OPT_static_libstdcxx, options::OPT_ffreestanding})
+        if (Arg *IncompatArg = C.getInputArgs().getLastArg(ID))
+          Diag(clang::diag::err_drv_argument_not_allowed_with)
+              << IncompatArg->getSpelling() << "-fsycl";
+    }
 
-        auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_OpenMP, TT,
-                                       C.getDefaultToolChain().getTriple());
-        C.addOffloadDeviceToolChain(&TC, Action::OFK_OpenMP);
-        OffloadArchs[&TC] =
-            getOffloadArchs(C, C.getArgs(), Action::OFK_OpenMP, &TC,
-                            /*SpecificToolchain=*/true);
-      }
-    } else if (C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) &&
-               ((!IsHIP && !IsCuda) || UseLLVMOffload)) {
-      llvm::Triple AMDTriple("amdgcn-amd-amdhsa");
-      llvm::Triple NVPTXTriple("nvptx64-nvidia-cuda");
-
-      for (StringRef Arch :
-           C.getInputArgs().getAllArgValues(options::OPT_offload_arch_EQ)) {
-        bool IsNVPTX = IsNVIDIAOffloadArch(
-            StringToOffloadArch(getProcessorFromTargetID(NVPTXTriple, Arch)));
-        bool IsAMDGPU = IsAMDOffloadArch(
-            StringToOffloadArch(getProcessorFromTargetID(AMDTriple, Arch)));
-        if (!IsNVPTX && !IsAMDGPU && !Arch.empty() &&
-            !Arch.equals_insensitive("native")) {
-          Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch) << Arch;
-          return;
-        }
+    // Create a device toolchain for every specified kind and triple.
+    for (Action::OffloadKind Kind : Kinds) {
+      llvm::Triple TT = Kind == Action::OFK_OpenMP
+                            ? ToolChain::getOpenMPTriple(Target)
+                            : llvm::Triple(Target);
+      if (TT.getArch() == llvm::Triple::ArchType::UnknownArch) {
+        Diag(diag::err_drv_invalid_or_unsupported_offload_target) << TT.str();
+        continue;
       }
 
-      // Attempt to deduce the offloading triple from the set of architectures.
-      // We can only correctly deduce NVPTX / AMDGPU triples currently.
-      for (const llvm::Triple &TT : {AMDTriple, NVPTXTriple}) {
-        auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_OpenMP, TT,
-                                       C.getDefaultToolChain().getTriple());
-
-        llvm::SmallVector<StringRef> Archs =
-            getOffloadArchs(C, C.getArgs(), Action::OFK_OpenMP, &TC,
-                            /*SpecificToolchain=*/false);
-        if (!Archs.empty()) {
-          C.addOffloadDeviceToolChain(&TC, Action::OFK_OpenMP);
-          OffloadArchs[&TC] = Archs;
-        }
+      std::string NormalizedName = TT.normalize();
+      auto [TripleIt, Inserted] =
+          FoundNormalizedTriples.try_emplace(NormalizedName, Target);
+      if (!Inserted) {
+        Diag(clang::diag::warn_drv_omp_offload_target_duplicate)
+            << Target << TripleIt->second;
+        continue;
       }
 
-      // If the set is empty then we failed to find a native architecture.
-      auto TCRange = C.getOffloadToolChains(Action::OFK_OpenMP);
-      if (TCRange.first == TCRange.second)
-        Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch)
-            << "native";
-    }
-  } else if (C.getInputArgs().hasArg(options::OPT_offload_targets_EQ)) {
-    Diag(clang::diag::err_drv_expecting_fopenmp_with_fopenmp_targets);
-    return;
-  }
-
-  // We need to generate a SYCL toolchain if the user specified -fsycl.
-  bool IsSYCL = C.getInputArgs().hasFlag(options::OPT_fsycl,
-                                         options::OPT_fno_sycl, false);
-
-  auto argSYCLIncompatible = [&](OptSpecifier OptId) {
-    if (!IsSYCL)
-      return;
-    if (Arg *IncompatArg = C.getInputArgs().getLastArg(OptId))
-      Diag(clang::diag::err_drv_argument_not_allowed_with)
-          << IncompatArg->getSpelling() << "-fsycl";
-  };
-  // -static-libstdc++ is not compatible with -fsycl.
-  argSYCLIncompatible(options::OPT_static_libstdcxx);
-  // -ffreestanding cannot be used with -fsycl
-  argSYCLIncompatible(options::OPT_ffreestanding);
-
-  llvm::SmallVector<llvm::Triple, 4> UniqueSYCLTriplesVec;
+      auto &TC = getOffloadToolChain(C.getInputArgs(), Kind, TT,
+                                     C.getDefaultToolChain().getTriple());
 
-  if (IsSYCL) {
-    addSYCLDefaultTriple(C, UniqueSYCLTriplesVec);
+      // Emit a warning if the detected CUDA version is too new.
+      if (Kind == Action::OFK_Cuda) {
+        auto &CudaInstallation =
+            static_cast<const toolchains::CudaToolChain &>(TC).CudaInstallation;
+        if (CudaInstallation.isValid())
+          CudaInstallation.WarnIfUnsupportedVersion();
+      }
 
-    // We'll need to use the SYCL and host triples as the key into
-    // getOffloadingDeviceToolChain, because the device toolchains we're
-    // going to create will depend on both.
-    const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>();
-    for (const auto &TT : UniqueSYCLTriplesVec) {
-      auto &TC = getOffloadToolChain(C.getInputArgs(), Action::OFK_SYCL, TT,
-                                     HostTC->getTriple());
-      C.addOffloadDeviceToolChain(&TC, Action::OFK_SYCL);
-      OffloadArchs[&TC] = getOffloadArchs(C, C.getArgs(), Action::OFK_SYCL, &TC,
-                                          /*SpecificToolchain=*/true);
+      C.addOffloadDeviceToolChain(&TC, Kind);
     }
   }
-
-  //
-  // TODO: Add support for other offloading programming models here.
-  //
 }
 
 bool Driver::loadZOSCustomizationFile(llvm::cl::ExpansionContext &ExpCtx) {
@@ -3306,9 +3303,6 @@ class OffloadingActionBuilder final {
       // architecture. If we are in host-only mode we return 'success' so that
       // the host uses the CUDA offload kind.
       if (auto *IA = dyn_cast<InputAction>(HostAction)) {
-        assert(!GpuArchList.empty() &&
-               "We should have at least one GPU architecture.");
-
         // If the host input is not CUDA or HIP, we don't need to bother about
         // this input.
         if (!(IA->getType() == types::TY_CUDA ||
@@ -3408,10 +3402,6 @@ class OffloadingActionBuilder final {
       CudaDeviceActions.clear();
     }
 
-    /// Get canonicalized offload arch option. \returns empty StringRef if the
-    /// option is invalid.
-    virtual StringRef getCanonicalOffloadArch(StringRef Arch) = 0;
-
     virtual std::optional<std::pair<llvm::StringRef, llvm::StringRef>>
     getConflictOffloadArchCombination(const std::set<StringRef> &GpuArchs) = 0;
 
@@ -3440,91 +3430,25 @@ class OffloadingActionBuilder final {
         return true;
       }
 
-      ToolChains.push_back(
-          AssociatedOffloadKind == Action::OFK_Cuda
-              ? C.getSingleOffloadToolChain<Action::OFK_Cuda>()
-              : C.getSingleOffloadToolChain<Action::OFK_HIP>());
-
-      CompileHostOnly = C.getDriver().offloadHostOnly();
-      EmitLLVM = Args.getLastArg(options::OPT_emit_llvm);
-      EmitAsm = Args.getLastArg(options::OPT_S);
-
-      // --offload and --offload-arch options are mutually exclusive.
-      if (Args.hasArgNoClaim(options::OPT_offload_EQ) &&
-          Args.hasArgNoClaim(options::OPT_offload_arch_EQ,
-                             options::OPT_no_offload_arch_EQ)) {
-        C.getDriver().Diag(diag::err_opt_not_valid_with_opt) << "--offload-arch"
-                                                             << "--offload";
-      }
-
-      // Collect all offload arch parameters, removing duplicates.
       std::set<StringRef> GpuArchs;
-      bool Error = false;
-      const ToolChain &TC = *ToolChains.front();
-      for (Arg *A : C.getArgsForToolChain(&TC, /*BoundArch=*/"",
-                                          AssociatedOffloadKind)) {
-        if (!(A->getOption().matches(options::OPT_offload_arch_EQ) ||
-              A->getOption().matches(options::OPT_no_offload_arch_EQ)))
-          continue;
-        A->claim();
-
-        for (StringRef ArchStr : llvm::split(A->getValue(), ",")) {
-          if (A->getOption().matches(options::OPT_no_offload_arch_EQ) &&
-              ArchStr == "all") {
-            GpuArchs.clear();
-          } else if (ArchStr == "native") {
-            auto GPUsOrErr = ToolChains.front()->getSystemGPUArchs(Args);
-            if (!GPUsOrErr) {
-              TC.getDriver().Diag(diag::err_drv_undetermined_gpu_arch)
-                  << llvm::Triple::getArchTypeName(TC.getArch())
-                  << llvm::toString(GPUsOrErr.takeError()) << "--offload-arch";
-              continue;
-            }
+      for (Action::OffloadKind Kind : {Action::OFK_Cuda, Action::OFK_HIP}) {
+        for (auto &I : llvm::make_range(C.getOffloadToolChains(Kind))) {
+          ToolChains.push_back(I.second);
 
-            for (auto GPU : *GPUsOrErr) {
-              GpuArchs.insert(Args.MakeArgString(GPU));
-            }
-          } else {
-            ArchStr = getCanonicalOffloadArch(ArchStr);
-            if (ArchStr.empty()) {
-              Error = true;
-            } else if (A->getOption().matches(options::OPT_offload_arch_EQ))
-              GpuArchs.insert(ArchStr);
-            else if (A->getOption().matches(options::OPT_no_offload_arch_EQ))
-              GpuArchs.erase(ArchStr);
-            else
-              llvm_unreachable("Unexpected option.");
-          }
+          for (auto Arch :
+               C.getDriver().getOffloadArchs(C, C.getArgs(), Kind, *I.second))
+            GpuArchs.insert(Arch);
         }
       }
 
-      auto &&ConflictingArchs = getConflictOffloadArchCombination(GpuArchs);
-      if (ConflictingArchs) {
-        C.getDriver().Diag(clang::diag::err_drv_bad_offload_arch_combo)
-            << ConflictingArchs->first << ConflictingArchs->second;
-        C.setContainsError();
-        return true;
-      }
-
-      // Collect list of GPUs remaining in the set.
       for (auto Arch : GpuArchs)
         GpuArchList.push_back(Arch.data());
 
-      // Default to sm_20 which is the lowest common denominator for
-      // supported GPUs.  sm_20 code should work correctly, if
-      // suboptimally, on all newer GPUs.
-      if (GpuArchList.empty()) {
-        if (ToolChains.front()->getTriple().isSPIRV()) {
-          if (ToolChains.front()->getTriple().getVendor() == llvm::Triple::AMD)
-            GpuArchList.push_back(OffloadArch::AMDGCNSPIRV);
-          else
-            GpuArchList.push_back(OffloadArch::Generic);
-        } else {
-          GpuArchList.push_back(DefaultOffloadArch);
-        }
-      }
+      CompileHostOnly = C.getDriver().offloadHostOnly();
+      EmitLLVM = Args.getLastArg(options::OPT_emit_llvm);
+      EmitAsm = Args.getLastArg(options::OPT_S);
 
-      return Error;
+      return false;
     }
   };
 
@@ -3538,15 +3462,6 @@ class OffloadingActionBuilder final {
       DefaultOffloadArch = OffloadArch::CudaDefault;
     }
 
-    StringRef getCanonicalOffloadArch(StringRef ArchStr) override {
-      OffloadArch Arch = StringToOffloadArch(ArchStr);
-      if (Arch == OffloadArch::UNKNOWN || !IsNVIDIAOffloadArch(Arch)) {
-        C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr;
-        return StringRef();
-      }
-      return OffloadArchToString(Arch);
-    }
-
     std::optional<std::pair<llvm::StringRef, llvm::StringRef>>
     getConflictOffloadArchCombination(
         const std::set<StringRef> &GpuArchs) override {
@@ -3691,7 +3606,7 @@ class OffloadingActionBuilder final {
           if (!CompileDeviceOnly) {
             C.getDriver().Diag(diag::err_opt_not_valid_without_opt)
                 << "-fhip-emit-relocatable"
-                << "--cuda-device-only";
+                << "--offload-device-only";
           }
         }
       }
@@ -3705,24 +3620,6 @@ class OffloadingActionBuilder final {
 
     bool canUseBundlerUnbundler() const override { return true; }
 
-    StringRef getCanonicalOffloadArch(StringRef IdStr) override {
-      llvm::StringMap<bool> Features;
-      // getHIPOffloadTargetTriple() is known to return valid value as it has
-      // been called successfully in the CreateOffloadingDeviceToolChains().
-      auto T =
-          (IdStr == "amdgcnspirv")
-              ? llvm::Triple("spirv64-amd-amdhsa")
-              : *getHIPOffloadTargetTriple(C.getDriver(), C.getInputArgs());
-      auto ArchStr = parseTargetID(T, IdStr, &Features);
-      if (!ArchStr) {
-        C.getDriver().Diag(clang::diag::err_drv_bad_target_id) << IdStr;
-        C.setContainsError();
-        return StringRef();
-      }
-      auto CanId = getCanonicalTargetID(*ArchStr, Features);
-      return Args.MakeArgStringRef(CanId);
-    };
-
     std::optional<std::pair<llvm::StringRef, llvm::StringRef>>
     getConflictOffloadArchCombination(
         const std::set<StringRef> &GpuArchs) override {
@@ -4715,23 +4612,20 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
 static StringRef getCanonicalArchString(Compilation &C,
                                         const llvm::opt::DerivedArgList &Args,
                                         StringRef ArchStr,
-                                        const llvm::Triple &Triple,
-                                        bool SpecificToolchain) {
+                                        const llvm::Triple &Triple) {
   // Lookup the CUDA / HIP architecture string. Only report an error if we were
   // expecting the triple to be only NVPTX / AMDGPU.
   OffloadArch Arch =
       StringToOffloadArch(getProcessorFromTargetID(Triple, ArchStr));
   if (Triple.isNVPTX() &&
       (Arch == OffloadArch::UNKNOWN || !IsNVIDIAOffloadArch(Arch))) {
-    if (SpecificToolchain)
-      C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch)
-          << "CUDA" << ArchStr;
+    C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch)
+        << "CUDA" << ArchStr;
     return StringRef();
   } else if (Triple.isAMDGPU() &&
              (Arch == OffloadArch::UNKNOWN || !IsAMDOffloadArch(Arch))) {
-    if (SpecificToolchain)
-      C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch)
-          << "HIP" << ArchStr;
+    C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch)
+        << "HIP" << ArchStr;
     return StringRef();
   }
 
@@ -4767,11 +4661,7 @@ getConflictOffloadArchCombination(const llvm::DenseSet<StringRef> &Archs,
 
 llvm::SmallVector<StringRef>
 Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
-                        Action::OffloadKind Kind, const ToolChain *TC,
-                        bool SpecificToolchain) const {
-  if (!TC)
-    TC = &C.getDefaultToolChain();
-
+                        Action::OffloadKind Kind, const ToolChain &TC) const {
   // --offload and --offload-arch options are mutually exclusive.
   if (Args.hasArgNoClaim(options::OPT_offload_EQ) &&
       Args.hasArgNoClaim(options::OPT_offload_arch_EQ,
@@ -4784,48 +4674,44 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
   }
 
   llvm::DenseSet<StringRef> Archs;
-  for (auto *Arg : C.getArgsForToolChain(TC, /*BoundArch=*/"", Kind)) {
+  for (auto *Arg : C.getArgsForToolChain(&TC, /*BoundArch=*/"", Kind)) {
     // Add or remove the seen architectures in order of appearance. If an
     // invalid architecture is given we simply exit.
     if (Arg->getOption().matches(options::OPT_offload_arch_EQ)) {
       for (StringRef Arch : Arg->getValues()) {
         if (Arch == "native" || Arch.empty()) {
-          auto GPUsOrErr = TC->getSystemGPUArchs(Args);
+          auto GPUsOrErr = TC.getSystemGPUArchs(Args);
           if (!GPUsOrErr) {
-            if (!SpecificToolchain)
-              llvm::consumeError(GPUsOrErr.takeError());
-            else
-              TC->getDriver().Diag(diag::err_drv_undetermined_gpu_arch)
-                  << llvm::Triple::getArchTypeName(TC->getArch())
-                  << llvm::toString(GPUsOrErr.takeError()) << "--offload-arch";
+            TC.getDriver().Diag(diag::err_drv_undetermined_gpu_arch)
+                << llvm::Triple::getArchTypeName(TC.getArch())
+                << llvm::toString(GPUsOrErr.takeError()) << "--offload-arch";
             continue;
           }
 
           for (auto ArchStr : *GPUsOrErr) {
-            StringRef CanonicalStr =
-                getCanonicalArchString(C, Args, Args.MakeArgString(ArchStr),
-                                       TC->getTriple(), SpecificToolchain);
+            StringRef CanonicalStr = getCanonicalArchString(
+                C, Args, Args.MakeArgString(ArchStr), TC.getTriple());
             if (!CanonicalStr.empty())
               Archs.insert(CanonicalStr);
-            else if (SpecificToolchain)
+            else
               return llvm::SmallVector<StringRef>();
           }
         } else {
-          StringRef CanonicalStr = getCanonicalArchString(
-              C, Args, Arch, TC->getTriple(), SpecificToolchain);
+          StringRef CanonicalStr =
+              getCanonicalArchString(C, Args, Arch, TC.getTriple());
           if (!CanonicalStr.empty())
             Archs.insert(CanonicalStr);
-          else if (SpecificToolchain)
+          else
             return llvm::SmallVector<StringRef>();
         }
       }
     } else if (Arg->getOption().matches(options::OPT_no_offload_arch_EQ)) {
-      for (StringRef Arch : llvm::split(Arg->getValue(), ",")) {
+      for (StringRef Arch : Arg->getValues()) {
         if (Arch == "all") {
           Archs.clear();
         } else {
-          StringRef ArchStr = getCanonicalArchString(
-              C, Args, Arch, TC->getTriple(), SpecificToolchain);
+          StringRef ArchStr =
+              getCanonicalArchString(C, Args, Arch, TC.getTriple());
           Archs.erase(ArchStr);
         }
       }
@@ -4833,28 +4719,30 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args,
   }
 
   if (auto ConflictingArchs =
-          getConflictOffloadArchCombination(Archs, TC->getTriple()))
+          getConflictOffloadArchCombination(Archs, TC.getTriple()))
     C.getDriver().Diag(clang::diag::err_drv_bad_offload_arch_combo)
         << ConflictingArchs->first << ConflictingArchs->second;
 
-  // Skip filling defaults if we're just querying what is availible.
-  if (SpecificToolchain && Archs.empty()) {
+  // Fill in the default architectures if not provided explicitly.
+  if (Archs.empty()) {
     if (Kind == Action::OFK_Cuda) {
       Archs.insert(OffloadArchToString(OffloadArch::CudaDefault));
     } else if (Kind == Action::OFK_HIP) {
-      Archs.insert(OffloadArchToString(OffloadArch::HIPDefault));
+      Archs.insert(OffloadArchToString(TC.getTriple().isSPIRV()
+                                           ? OffloadArch::Generic
+                                           : OffloadArch::HIPDefault));
     } else if (Kind == Action::OFK_SYCL) {
       Archs.insert(StringRef());
     } else if (Kind == Action::OFK_OpenMP) {
       // Accept legacy `-march` device arguments for OpenMP.
-      if (auto *Arg = C.getArgsForToolChain(TC, /*BoundArch=*/"", Kind)
+      if (auto *Arg = C.getArgsForToolChain(&TC, /*BoundArch=*/"", Kind)
                           .getLastArg(options::OPT_march_EQ)) {
         Archs.insert(Arg->getValue());
       } else {
-        auto ArchsOrErr = TC->getSystemGPUArchs(Args);
+        auto ArchsOrErr = TC.getSystemGPUArchs(Args);
         if (!ArchsOrErr) {
-          TC->getDriver().Diag(diag::err_drv_undetermined_gpu_arch)
-              << llvm::Triple::getArchTypeName(TC->getArch())
+          TC.getDriver().Diag(diag::err_drv_undetermined_gpu_arch)
+              << llvm::Triple::getArchTypeName(TC.getArch())
               << llvm::toString(ArchsOrErr.takeError()) << "--offload-arch";
         } else if (!ArchsOrErr->empty()) {
           for (auto Arch : *ArchsOrErr)
@@ -4886,6 +4774,21 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
       C.isOffloadingHostKind(Action::OFK_HIP) &&
       !Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false);
 
+  bool HIPRelocatableObj =
+      C.isOffloadingHostKind(Action::OFK_HIP) &&
+      Args.hasFlag(options::OPT_fhip_emit_relocatable,
+                   options::OPT_fno_hip_emit_relocatable, false);
+
+  if (!HIPNoRDC && HIPRelocatableObj)
+    C.getDriver().Diag(diag::err_opt_not_valid_with_opt)
+        << "-fhip-emit-relocatable"
+        << "-fgpu-rdc";
+
+  if (!offloadDeviceOnly() && HIPRelocatableObj)
+    C.getDriver().Diag(diag::err_opt_not_valid_without_opt)
+        << "-fhip-emit-relocatable"
+        << "--offload-device-only";
+
   // For HIP non-rdc non-device-only compilation, create a linker wrapper
   // action for each host object to link, bundle and wrap device files in
   // it.
@@ -4934,7 +4837,7 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
     // Get the product of all bound architectures and toolchains.
     SmallVector<std::pair<const ToolChain *, StringRef>> TCAndArchs;
     for (const ToolChain *TC : ToolChains) {
-      for (StringRef Arch : OffloadArchs.lookup(TC)) {
+      for (StringRef Arch : getOffloadArchs(C, C.getArgs(), Kind, *TC)) {
         TCAndArchs.push_back(std::make_pair(TC, Arch));
         DeviceActions.push_back(
             C.MakeAction<InputAction>(*InputArg, InputType, CUID));
@@ -4966,7 +4869,7 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
       if (Kind == Action::OFK_SYCL && Phase == phases::Assemble)
         continue;
 
-      auto TCAndArch = TCAndArchs.begin();
+      auto *TCAndArch = TCAndArchs.begin();
       for (Action *&A : DeviceActions) {
         if (A->getType() == types::TY_Nothing)
           continue;
@@ -4998,15 +4901,21 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
     // Compiling HIP in device-only non-RDC mode requires linking each action
     // individually.
     for (Action *&A : DeviceActions) {
-      if ((A->getType() != types::TY_Object &&
+      // Special handling for the HIP SPIR-V toolchain because it doesn't use
+      // the SPIR-V backend yet doesn't report the output as an object.
+      bool IsAMDGCNSPIRV = A->getOffloadingToolChain() &&
+                           A->getOffloadingToolChain()->getTriple().getOS() ==
+                               llvm::Triple::OSType::AMDHSA &&
+                           A->getOffloadingToolChain()->getTriple().isSPIRV();
+      if ((A->getType() != types::TY_Object && !IsAMDGCNSPIRV &&
            A->getType() != types::TY_LTO_BC) ||
-          !HIPNoRDC || !offloadDeviceOnly())
+          HIPRelocatableObj || !HIPNoRDC || !offloadDeviceOnly())
         continue;
       ActionList LinkerInput = {A};
       A = C.MakeAction<LinkJobAction>(LinkerInput, types::TY_Image);
     }
 
-    auto TCAndArch = TCAndArchs.begin();
+    auto *TCAndArch = TCAndArchs.begin();
     for (Action *A : DeviceActions) {
       DDeps.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind);
       OffloadAction::DeviceDependences DDep;
@@ -5025,13 +4934,14 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
   }
 
   // HIP code in device-only non-RDC mode will bundle the output if it invoked
-  // the linker.
+  // the linker or if the user explicitly requested it.
   bool ShouldBundleHIP =
-      HIPNoRDC && offloadDeviceOnly() &&
       Args.hasFlag(options::OPT_gpu_bundle_output,
-                   options::OPT_no_gpu_bundle_output, true) &&
-      !llvm::any_of(OffloadActions,
-                    [](Action *A) { return A->getType() != types::TY_Image; });
+                   options::OPT_no_gpu_bundle_output, false) ||
+      (HIPNoRDC && offloadDeviceOnly() &&
+       llvm::none_of(OffloadActions, [](Action *A) {
+         return A->getType() != types::TY_Image;
+       }));
 
   // All kinds exit now in device-only mode except for non-RDC mode HIP.
   if (offloadDeviceOnly() && !ShouldBundleHIP)
@@ -5054,8 +4964,9 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
     // fatbinary for each translation unit, linking each input individually.
     Action *FatbinAction =
         C.MakeAction<LinkJobAction>(OffloadActions, types::TY_HIP_FATBIN);
-    DDep.add(*FatbinAction, *C.getSingleOffloadToolChain<Action::OFK_HIP>(),
-             nullptr, Action::OFK_HIP);
+    DDep.add(*FatbinAction,
+             *C.getOffloadToolChains<Action::OFK_HIP>().first->second, nullptr,
+             Action::OFK_HIP);
   } else {
     // Package all the offloading actions into a single output that can be
     // embedded in the host and linked.
@@ -5131,11 +5042,13 @@ Action *Driver::ConstructPhaseAction(
     if (Args.hasArg(options::OPT_extract_api))
       return C.MakeAction<ExtractAPIJobAction>(Input, types::TY_API_INFO);
 
-    // With 'fexperimental-modules-reduced-bmi', we don't want to run the
+    // With 'fmodules-reduced-bmi', we don't want to run the
     // precompile phase unless the user specified '--precompile'. In the case
     // the '--precompile' flag is enabled, we will try to emit the reduced BMI
     // as a by product in GenerateModuleInterfaceAction.
-    if (Args.hasArg(options::OPT_modules_reduced_bmi) &&
+    if (!Args.hasArg(options::OPT_fno_modules_reduced_bmi) &&
+        (Input->getType() == driver::types::TY_CXXModule ||
+         Input->getType() == driver::types::TY_PP_CXXModule) &&
         !Args.getLastArg(options::OPT__precompile))
       return Input;
 
@@ -5208,7 +5121,10 @@ Action *Driver::ConstructPhaseAction(
                         false) ||
            (Args.hasFlag(options::OPT_offload_new_driver,
                          options::OPT_no_offload_new_driver, false) &&
-            !offloadDeviceOnly())) ||
+            (!offloadDeviceOnly() ||
+             (Input->getOffloadingToolChain() &&
+              TargetDeviceOffloadKind == Action::OFK_HIP &&
+              Input->getOffloadingToolChain()->getTriple().isSPIRV())))) ||
           TargetDeviceOffloadKind == Action::OFK_OpenMP))) {
       types::ID Output =
           Args.hasArg(options::OPT_S) &&
@@ -6323,7 +6239,7 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA,
   // `-fmodule-output`.
   if (!AtTopLevel && isa<PrecompileJobAction>(JA) &&
       JA.getType() == types::TY_ModuleFile && SpecifiedModuleOutput) {
-    assert(!C.getArgs().hasArg(options::OPT_modules_reduced_bmi));
+    assert(C.getArgs().hasArg(options::OPT_fno_modules_reduced_bmi));
     return GetModuleOutputPath(C, JA, BaseInput);
   }
 
diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp
index 21e4cff..98793a5 100644
--- a/clang/lib/Driver/SanitizerArgs.cpp
+++ b/clang/lib/Driver/SanitizerArgs.cpp
@@ -1382,6 +1382,12 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args,
     CmdArgs.push_back(Args.MakeArgString("-fsanitize-annotate-debug-info=" +
                                          toString(AnnotateDebugInfo)));
 
+  if (const Arg *A =
+          Args.getLastArg(options::OPT_fsanitize_debug_trap_reasons,
+                          options::OPT_fno_sanitize_debug_trap_reasons)) {
+    CmdArgs.push_back(Args.MakeArgString(A->getAsString(Args)));
+  }
+
   addSpecialCaseListOpt(Args, CmdArgs,
                         "-fsanitize-ignorelist=", UserIgnorelistFiles);
   addSpecialCaseListOpt(Args, CmdArgs,
diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index 481f575..25c6b5a 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -104,44 +104,6 @@ ToolChain::ToolChain(const Driver &D, const llvm::Triple &T,
     addIfExists(getFilePaths(), Path);
 }
 
-llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
-ToolChain::executeToolChainProgram(StringRef Executable) const {
-  llvm::SmallString<64> OutputFile;
-  llvm::sys::fs::createTemporaryFile("toolchain-program", "txt", OutputFile,
-                                     llvm::sys::fs::OF_Text);
-  llvm::FileRemover OutputRemover(OutputFile.c_str());
-  std::optional<llvm::StringRef> Redirects[] = {
-      {""},
-      OutputFile.str(),
-      {""},
-  };
-
-  std::string ErrorMessage;
-  int SecondsToWait = 60;
-  if (std::optional<std::string> Str =
-          llvm::sys::Process::GetEnv("CLANG_TOOLCHAIN_PROGRAM_TIMEOUT")) {
-    if (!llvm::to_integer(*Str, SecondsToWait))
-      return llvm::createStringError(std::error_code(),
-                                     "CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected "
-                                     "an integer, got '" +
-                                         *Str + "'");
-    SecondsToWait = std::max(SecondsToWait, 0); // infinite
-  }
-  if (llvm::sys::ExecuteAndWait(Executable, {Executable}, {}, Redirects,
-                                SecondsToWait,
-                                /*MemoryLimit=*/0, &ErrorMessage))
-    return llvm::createStringError(std::error_code(),
-                                   Executable + ": " + ErrorMessage);
-
-  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> OutputBuf =
-      llvm::MemoryBuffer::getFile(OutputFile.c_str());
-  if (!OutputBuf)
-    return llvm::createStringError(OutputBuf.getError(),
-                                   "Failed to read stdout of " + Executable +
-                                       ": " + OutputBuf.getError().message());
-  return std::move(*OutputBuf);
-}
-
 void ToolChain::setTripleEnvironment(llvm::Triple::EnvironmentType Env) {
   Triple.setEnvironment(Env);
   if (EffectiveTriple != llvm::Triple())
@@ -229,9 +191,10 @@ static void getAArch64MultilibFlags(const Driver &D,
   for (const auto &ArchInfo : AArch64::ArchInfos)
     if (FeatureSet.contains(ArchInfo->ArchFeature))
       ArchName = ArchInfo->Name;
-  assert(!ArchName.empty() && "at least one architecture should be found");
-  MArch.insert(MArch.begin(), ("-march=" + ArchName).str());
-  Result.push_back(llvm::join(MArch, "+"));
+  if (!ArchName.empty()) {
+    MArch.insert(MArch.begin(), ("-march=" + ArchName).str());
+    Result.push_back(llvm::join(MArch, "+"));
+  }
 
   const Arg *BranchProtectionArg =
       Args.getLastArgNoClaim(options::OPT_mbranch_protection_EQ);
@@ -255,6 +218,18 @@ static void getAArch64MultilibFlags(const Driver &D,
     Result.push_back(ABIArg->getAsString(Args));
   }
 
+  if (const Arg *A = Args.getLastArg(options::OPT_O_Group);
+      A && A->getOption().matches(options::OPT_O)) {
+    switch (A->getValue()[0]) {
+    case 's':
+      Result.push_back("-Os");
+      break;
+    case 'z':
+      Result.push_back("-Oz");
+      break;
+    }
+  }
+
   processMultilibCustomFlags(Result, Args);
 }
 
@@ -332,6 +307,19 @@ static void getARMMultilibFlags(const Driver &D, const llvm::Triple &Triple,
     if (Endian->getOption().matches(options::OPT_mbig_endian))
       Result.push_back(Endian->getAsString(Args));
   }
+
+  if (const Arg *A = Args.getLastArg(options::OPT_O_Group);
+      A && A->getOption().matches(options::OPT_O)) {
+    switch (A->getValue()[0]) {
+    case 's':
+      Result.push_back("-Os");
+      break;
+    case 'z':
+      Result.push_back("-Oz");
+      break;
+    }
+  }
+
   processMultilibCustomFlags(Result, Args);
 }
 
@@ -773,7 +761,7 @@ std::string ToolChain::buildCompilerRTBasename(const llvm::opt::ArgList &Args,
     break;
   case ToolChain::FT_Shared:
     if (TT.isOSWindows())
-      Suffix = TT.isWindowsGNUEnvironment() ? ".dll.a" : ".lib";
+      Suffix = TT.isOSCygMing() ? ".dll.a" : ".lib";
     else if (TT.isOSAIX())
       Suffix = ".a";
     else
@@ -1100,7 +1088,7 @@ std::string ToolChain::GetLinkerPath(bool *LinkerIsLLD) const {
   // Get -fuse-ld= first to prevent -Wunused-command-line-argument. -fuse-ld= is
   // considered as the linker flavor, e.g. "bfd", "gold", or "lld".
   const Arg* A = Args.getLastArg(options::OPT_fuse_ld_EQ);
-  StringRef UseLinker = A ? A->getValue() : CLANG_DEFAULT_LINKER;
+  StringRef UseLinker = A ? A->getValue() : getDriver().getPreferredLinker();
 
   // --ld-path= takes precedence over -fuse-ld= and specifies the executable
   // name. -B, COMPILER_PATH and PATH and consulted if the value does not
@@ -1644,7 +1632,8 @@ void ToolChain::addSYCLIncludeArgs(const ArgList &DriverArgs,
                                    ArgStringList &CC1Args) const {}
 
 llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
-ToolChain::getDeviceLibs(const ArgList &DriverArgs) const {
+ToolChain::getDeviceLibs(const ArgList &DriverArgs,
+                         const Action::OffloadKind DeviceOffloadingKind) const {
   return {};
 }
 
diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index 7fc34f4..0781683 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -31,6 +31,68 @@ using namespace clang::driver::toolchains;
 using namespace clang;
 using namespace llvm::opt;
 
+RocmInstallationDetector::CommonBitcodeLibsPreferences::
+    CommonBitcodeLibsPreferences(const Driver &D,
+                                 const llvm::opt::ArgList &DriverArgs,
+                                 StringRef GPUArch,
+                                 const Action::OffloadKind DeviceOffloadingKind,
+                                 const bool NeedsASanRT)
+    : ABIVer(DeviceLibABIVersion::fromCodeObjectVersion(
+          tools::getAMDGPUCodeObjectVersion(D, DriverArgs))) {
+  const auto Kind = llvm::AMDGPU::parseArchAMDGCN(GPUArch);
+  const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
+
+  IsOpenMP = DeviceOffloadingKind == Action::OFK_OpenMP;
+
+  const bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32);
+  Wave64 =
+      !HasWave32 || DriverArgs.hasFlag(options::OPT_mwavefrontsize64,
+                                       options::OPT_mno_wavefrontsize64, false);
+
+  const bool IsKnownOffloading = DeviceOffloadingKind == Action::OFK_OpenMP ||
+                                 DeviceOffloadingKind == Action::OFK_HIP;
+
+  // Default to enabling f32 denormals on subtargets where fma is fast with
+  // denormals
+  const bool DefaultDAZ =
+      (Kind == llvm::AMDGPU::GK_NONE)
+          ? false
+          : !((ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) &&
+              (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32));
+  // TODO: There are way too many flags that change this. Do we need to
+  // check them all?
+  DAZ = IsKnownOffloading
+            ? DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
+                                 options::OPT_fno_gpu_flush_denormals_to_zero,
+                                 DefaultDAZ)
+            : DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) || DefaultDAZ;
+
+  FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only) ||
+               DriverArgs.hasFlag(options::OPT_ffinite_math_only,
+                                  options::OPT_fno_finite_math_only, false);
+
+  UnsafeMathOpt =
+      DriverArgs.hasArg(options::OPT_cl_unsafe_math_optimizations) ||
+      DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations,
+                         options::OPT_fno_unsafe_math_optimizations, false);
+
+  FastRelaxedMath = DriverArgs.hasArg(options::OPT_cl_fast_relaxed_math) ||
+                    DriverArgs.hasFlag(options::OPT_ffast_math,
+                                       options::OPT_fno_fast_math, false);
+
+  const bool DefaultSqrt = IsKnownOffloading ? true : false;
+  CorrectSqrt =
+      DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt) ||
+      DriverArgs.hasFlag(
+          options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
+          options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt, DefaultSqrt);
+  // GPU Sanitizer currently only supports ASan and is enabled through host
+  // ASan.
+  GPUSan = (DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
+                               options::OPT_fno_gpu_sanitize, true) &&
+            NeedsASanRT);
+}
+
 void RocmInstallationDetector::scanLibDevicePath(llvm::StringRef Path) {
   assert(!Path.empty());
 
@@ -841,7 +903,7 @@ AMDGPUToolChain::getSystemGPUArchs(const ArgList &Args) const {
   else
     Program = GetProgramPath("amdgpu-arch");
 
-  auto StdoutOrErr = executeToolChainProgram(Program);
+  auto StdoutOrErr = getDriver().executeProgram({Program});
   if (!StdoutOrErr)
     return StdoutOrErr.takeError();
 
@@ -884,33 +946,14 @@ void ROCMToolChain::addClangTargetOptions(
                                                 ABIVer))
     return;
 
-  bool Wave64 = isWave64(DriverArgs, Kind);
-  // TODO: There are way too many flags that change this. Do we need to check
-  // them all?
-  bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
-             getDefaultDenormsAreZeroForTarget(Kind);
-  bool FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only);
-
-  bool UnsafeMathOpt =
-      DriverArgs.hasArg(options::OPT_cl_unsafe_math_optimizations);
-  bool FastRelaxedMath = DriverArgs.hasArg(options::OPT_cl_fast_relaxed_math);
-  bool CorrectSqrt =
-      DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt);
-
-  // GPU Sanitizer currently only supports ASan and is enabled through host
-  // ASan.
-  bool GPUSan = DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
-                                   options::OPT_fno_gpu_sanitize, true) &&
-                getSanitizerArgs(DriverArgs).needsAsanRt();
-
   // Add the OpenCL specific bitcode library.
   llvm::SmallVector<BitCodeLibraryInfo, 12> BCLibs;
   BCLibs.emplace_back(RocmInstallation->getOpenCLPath().str());
 
   // Add the generic set of libraries.
   BCLibs.append(RocmInstallation->getCommonBitcodeLibs(
-      DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt,
-      FastRelaxedMath, CorrectSqrt, ABIVer, GPUSan, false));
+      DriverArgs, LibDeviceFile, GpuArch, DeviceOffloadingKind,
+      getSanitizerArgs(DriverArgs).needsAsanRt()));
 
   for (auto [BCFile, Internalize] : BCLibs) {
     if (Internalize)
@@ -947,35 +990,37 @@ bool RocmInstallationDetector::checkCommonBitcodeLibs(
 
 llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
 RocmInstallationDetector::getCommonBitcodeLibs(
-    const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile, bool Wave64,
-    bool DAZ, bool FiniteOnly, bool UnsafeMathOpt, bool FastRelaxedMath,
-    bool CorrectSqrt, DeviceLibABIVersion ABIVer, bool GPUSan,
-    bool isOpenMP) const {
+    const llvm::opt::ArgList &DriverArgs, StringRef LibDeviceFile,
+    StringRef GPUArch, const Action::OffloadKind DeviceOffloadingKind,
+    const bool NeedsASanRT) const {
   llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> BCLibs;
 
+  CommonBitcodeLibsPreferences Pref{D, DriverArgs, GPUArch,
+                                    DeviceOffloadingKind, NeedsASanRT};
+
   auto AddBCLib = [&](ToolChain::BitCodeLibraryInfo BCLib,
                       bool Internalize = true) {
     BCLib.ShouldInternalize = Internalize;
     BCLibs.emplace_back(BCLib);
   };
   auto AddSanBCLibs = [&]() {
-    if (GPUSan)
+    if (Pref.GPUSan)
       AddBCLib(getAsanRTLPath(), false);
   };
 
   AddSanBCLibs();
   AddBCLib(getOCMLPath());
-  if (!isOpenMP)
+  if (!Pref.IsOpenMP)
     AddBCLib(getOCKLPath());
-  else if (GPUSan && isOpenMP)
+  else if (Pref.GPUSan && Pref.IsOpenMP)
     AddBCLib(getOCKLPath(), false);
-  AddBCLib(getDenormalsAreZeroPath(DAZ));
-  AddBCLib(getUnsafeMathPath(UnsafeMathOpt || FastRelaxedMath));
-  AddBCLib(getFiniteOnlyPath(FiniteOnly || FastRelaxedMath));
-  AddBCLib(getCorrectlyRoundedSqrtPath(CorrectSqrt));
-  AddBCLib(getWavefrontSize64Path(Wave64));
+  AddBCLib(getDenormalsAreZeroPath(Pref.DAZ));
+  AddBCLib(getUnsafeMathPath(Pref.UnsafeMathOpt || Pref.FastRelaxedMath));
+  AddBCLib(getFiniteOnlyPath(Pref.FiniteOnly || Pref.FastRelaxedMath));
+  AddBCLib(getCorrectlyRoundedSqrtPath(Pref.CorrectSqrt));
+  AddBCLib(getWavefrontSize64Path(Pref.Wave64));
   AddBCLib(LibDeviceFile);
-  auto ABIVerPath = getABIVersionPath(ABIVer);
+  auto ABIVerPath = getABIVersionPath(Pref.ABIVer);
   if (!ABIVerPath.empty())
     AddBCLib(ABIVerPath);
 
@@ -983,9 +1028,9 @@ RocmInstallationDetector::getCommonBitcodeLibs(
 }
 
 llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
-ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs,
-                                       const std::string &GPUArch,
-                                       bool isOpenMP) const {
+ROCMToolChain::getCommonDeviceLibNames(
+    const llvm::opt::ArgList &DriverArgs, const std::string &GPUArch,
+    Action::OffloadKind DeviceOffloadingKind) const {
   auto Kind = llvm::AMDGPU::parseArchAMDGCN(GPUArch);
   const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
 
@@ -996,33 +1041,9 @@ ROCMToolChain::getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs,
                                                 ABIVer))
     return {};
 
-  // If --hip-device-lib is not set, add the default bitcode libraries.
-  // TODO: There are way too many flags that change this. Do we need to check
-  // them all?
-  bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero,
-                                options::OPT_fno_gpu_flush_denormals_to_zero,
-                                getDefaultDenormsAreZeroForTarget(Kind));
-  bool FiniteOnly = DriverArgs.hasFlag(
-      options::OPT_ffinite_math_only, options::OPT_fno_finite_math_only, false);
-  bool UnsafeMathOpt =
-      DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations,
-                         options::OPT_fno_unsafe_math_optimizations, false);
-  bool FastRelaxedMath = DriverArgs.hasFlag(options::OPT_ffast_math,
-                                            options::OPT_fno_fast_math, false);
-  bool CorrectSqrt = DriverArgs.hasFlag(
-      options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
-      options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt, true);
-  bool Wave64 = isWave64(DriverArgs, Kind);
-
-  // GPU Sanitizer currently only supports ASan and is enabled through host
-  // ASan.
-  bool GPUSan = DriverArgs.hasFlag(options::OPT_fgpu_sanitize,
-                                   options::OPT_fno_gpu_sanitize, true) &&
-                getSanitizerArgs(DriverArgs).needsAsanRt();
-
   return RocmInstallation->getCommonBitcodeLibs(
-      DriverArgs, LibDeviceFile, Wave64, DAZ, FiniteOnly, UnsafeMathOpt,
-      FastRelaxedMath, CorrectSqrt, ABIVer, GPUSan, isOpenMP);
+      DriverArgs, LibDeviceFile, GPUArch, DeviceOffloadingKind,
+      getSanitizerArgs(DriverArgs).needsAsanRt());
 }
 
 bool AMDGPUToolChain::shouldSkipSanitizeOption(
diff --git a/clang/lib/Driver/ToolChains/AMDGPU.h b/clang/lib/Driver/ToolChains/AMDGPU.h
index 08bd4fa..e5d41e2 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.h
+++ b/clang/lib/Driver/ToolChains/AMDGPU.h
@@ -10,7 +10,6 @@
 #define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_AMDGPU_H
 
 #include "Gnu.h"
-#include "ROCm.h"
 #include "clang/Basic/TargetID.h"
 #include "clang/Driver/Options.h"
 #include "clang/Driver/Tool.h"
@@ -147,7 +146,7 @@ public:
   llvm::SmallVector<BitCodeLibraryInfo, 12>
   getCommonDeviceLibNames(const llvm::opt::ArgList &DriverArgs,
                           const std::string &GPUArch,
-                          bool isOpenMP = false) const;
+                          Action::OffloadKind DeviceOffloadingKind) const;
 
   SanitizerMask getSupportedSanitizers() const override {
     return SanitizerKind::Address;
diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
index 7ffa3f0..2b41d54 100644
--- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
@@ -44,7 +44,7 @@ void AMDGPUOpenMPToolChain::addClangTargetOptions(
                           true))
     return;
 
-  for (auto BCFile : getDeviceLibs(DriverArgs)) {
+  for (auto BCFile : getDeviceLibs(DriverArgs, DeviceOffloadingKind)) {
     CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode"
                                                : "-mlink-bitcode-file");
     CC1Args.push_back(DriverArgs.MakeArgString(BCFile.Path));
@@ -132,7 +132,9 @@ AMDGPUOpenMPToolChain::computeMSVCVersion(const Driver *D,
 }
 
 llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
-AMDGPUOpenMPToolChain::getDeviceLibs(const llvm::opt::ArgList &Args) const {
+AMDGPUOpenMPToolChain::getDeviceLibs(
+    const llvm::opt::ArgList &Args,
+    const Action::OffloadKind DeviceOffloadingKind) const {
   if (!Args.hasFlag(options::OPT_offloadlib, options::OPT_no_offloadlib, true))
     return {};
 
@@ -140,8 +142,8 @@ AMDGPUOpenMPToolChain::getDeviceLibs(const llvm::opt::ArgList &Args) const {
       getTriple(), Args.getLastArgValue(options::OPT_march_EQ));
 
   SmallVector<BitCodeLibraryInfo, 12> BCLibs;
-  for (auto BCLib : getCommonDeviceLibNames(Args, GpuArch.str(),
-                                            /*IsOpenMP=*/true))
+  for (auto BCLib :
+       getCommonDeviceLibNames(Args, GpuArch.str(), DeviceOffloadingKind))
     BCLibs.emplace_back(BCLib);
 
   return BCLibs;
diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
index 0536c9f..cbafdf5 100644
--- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
+++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h
@@ -58,7 +58,8 @@ public:
                      const llvm::opt::ArgList &Args) const override;
 
   llvm::SmallVector<BitCodeLibraryInfo, 12>
-  getDeviceLibs(const llvm::opt::ArgList &Args) const override;
+  getDeviceLibs(const llvm::opt::ArgList &Args,
+                const Action::OffloadKind DeviceOffloadKind) const override;
 
   const ToolChain &HostTC;
 };
diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
index 6bd710e..418f9fd 100644
--- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
@@ -467,3 +467,18 @@ void aarch64::setPAuthABIInTriple(const Driver &D, const ArgList &Args,
     break;
   }
 }
+
+/// Is the triple {aarch64.aarch64_be}-none-elf?
+bool aarch64::isAArch64BareMetal(const llvm::Triple &Triple) {
+  if (Triple.getArch() != llvm::Triple::aarch64 &&
+      Triple.getArch() != llvm::Triple::aarch64_be)
+    return false;
+
+  if (Triple.getVendor() != llvm::Triple::UnknownVendor)
+    return false;
+
+  if (Triple.getOS() != llvm::Triple::UnknownOS)
+    return false;
+
+  return Triple.getEnvironmentName() == "elf";
+}
diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.h b/clang/lib/Driver/ToolChains/Arch/AArch64.h
index 2057272..2765ee8 100644
--- a/clang/lib/Driver/ToolChains/Arch/AArch64.h
+++ b/clang/lib/Driver/ToolChains/Arch/AArch64.h
@@ -30,6 +30,7 @@ std::string getAArch64TargetCPU(const llvm::opt::ArgList &Args,
 
 void setPAuthABIInTriple(const Driver &D, const llvm::opt::ArgList &Args,
                          llvm::Triple &triple);
+bool isAArch64BareMetal(const llvm::Triple &Triple);
 
 } // end namespace aarch64
 } // end namespace target
diff --git a/clang/lib/Driver/ToolChains/Arch/Sparc.cpp b/clang/lib/Driver/ToolChains/Arch/Sparc.cpp
index 9595ee8..94a94f1 100644
--- a/clang/lib/Driver/ToolChains/Arch/Sparc.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/Sparc.cpp
@@ -23,7 +23,9 @@ const char *sparc::getSparcAsmModeForCPU(StringRef Name,
   if (Triple.getArch() == llvm::Triple::sparcv9) {
     const char *DefV9CPU;
 
-    if (Triple.isOSLinux() || Triple.isOSFreeBSD() || Triple.isOSOpenBSD())
+    if (Triple.isOSSolaris())
+      DefV9CPU = "-Av9b";
+    else if (Triple.isOSLinux() || Triple.isOSFreeBSD() || Triple.isOSOpenBSD())
       DefV9CPU = "-Av9a";
     else
       DefV9CPU = "-Av9";
@@ -35,6 +37,13 @@ const char *sparc::getSparcAsmModeForCPU(StringRef Name,
         .Case("niagara4", "-Av9d")
         .Default(DefV9CPU);
   } else {
+    const char *DefV8CPU;
+
+    if (Triple.isOSSolaris())
+      DefV8CPU = "-Av8plus";
+    else
+      DefV8CPU = "-Av8";
+
     return llvm::StringSwitch<const char *>(Name)
         .Case("v8", "-Av8")
         .Case("supersparc", "-Av8")
@@ -70,7 +79,7 @@ const char *sparc::getSparcAsmModeForCPU(StringRef Name,
         .Case("gr712rc", "-Aleon")
         .Case("leon4", "-Aleon")
         .Case("gr740", "-Aleon")
-        .Default("-Av8");
+        .Default(DefV8CPU);
   }
 }
 
@@ -130,7 +139,8 @@ std::string sparc::getSparcTargetCPU(const Driver &D, const ArgList &Args,
   return "";
 }
 
-void sparc::getSparcTargetFeatures(const Driver &D, const ArgList &Args,
+void sparc::getSparcTargetFeatures(const Driver &D, const llvm::Triple &Triple,
+                                   const ArgList &Args,
                                    std::vector<StringRef> &Features) {
   sparc::FloatABI FloatABI = sparc::getSparcFloatABI(D, Args);
   if (FloatABI == sparc::FloatABI::Soft)
@@ -150,11 +160,22 @@ void sparc::getSparcTargetFeatures(const Driver &D, const ArgList &Args,
       Features.push_back("-popc");
   }
 
+  // Those OSes default to enabling VIS on 64-bit SPARC.
+  // See also the corresponding code for external assemblers in
+  // sparc::getSparcAsmModeForCPU().
+  bool IsSparcV9ATarget =
+      (Triple.getArch() == llvm::Triple::sparcv9) &&
+      (Triple.isOSLinux() || Triple.isOSFreeBSD() || Triple.isOSOpenBSD());
+  bool IsSparcV9BTarget = Triple.isOSSolaris();
+  bool IsSparcV8PlusTarget =
+      Triple.getArch() == llvm::Triple::sparc && Triple.isOSSolaris();
   if (Arg *A = Args.getLastArg(options::OPT_mvis, options::OPT_mno_vis)) {
     if (A->getOption().matches(options::OPT_mvis))
       Features.push_back("+vis");
     else
       Features.push_back("-vis");
+  } else if (IsSparcV9ATarget) {
+    Features.push_back("+vis");
   }
 
   if (Arg *A = Args.getLastArg(options::OPT_mvis2, options::OPT_mno_vis2)) {
@@ -162,6 +183,8 @@ void sparc::getSparcTargetFeatures(const Driver &D, const ArgList &Args,
       Features.push_back("+vis2");
     else
       Features.push_back("-vis2");
+  } else if (IsSparcV9BTarget) {
+    Features.push_back("+vis2");
   }
 
   if (Arg *A = Args.getLastArg(options::OPT_mvis3, options::OPT_mno_vis3)) {
@@ -182,6 +205,8 @@ void sparc::getSparcTargetFeatures(const Driver &D, const ArgList &Args,
   if (Arg *A = Args.getLastArg(options::OPT_mv8plus, options::OPT_mno_v8plus)) {
     if (A->getOption().matches(options::OPT_mv8plus))
       Features.push_back("+v8plus");
+  } else if (IsSparcV8PlusTarget) {
+    Features.push_back("+v8plus");
   }
 
   if (Args.hasArg(options::OPT_ffixed_g1))
diff --git a/clang/lib/Driver/ToolChains/Arch/Sparc.h b/clang/lib/Driver/ToolChains/Arch/Sparc.h
index 2b178d9..fa25b49 100644
--- a/clang/lib/Driver/ToolChains/Arch/Sparc.h
+++ b/clang/lib/Driver/ToolChains/Arch/Sparc.h
@@ -31,7 +31,8 @@ FloatABI getSparcFloatABI(const Driver &D, const llvm::opt::ArgList &Args);
 std::string getSparcTargetCPU(const Driver &D, const llvm::opt::ArgList &Args,
                               const llvm::Triple &Triple);
 
-void getSparcTargetFeatures(const Driver &D, const llvm::opt::ArgList &Args,
+void getSparcTargetFeatures(const Driver &D, const llvm::Triple &Triple,
+                            const llvm::opt::ArgList &Args,
                             std::vector<llvm::StringRef> &Features);
 const char *getSparcAsmModeForCPU(llvm::StringRef Name,
                                   const llvm::Triple &Triple);
diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp b/clang/lib/Driver/ToolChains/BareMetal.cpp
index e670696..25a16fe 100644
--- a/clang/lib/Driver/ToolChains/BareMetal.cpp
+++ b/clang/lib/Driver/ToolChains/BareMetal.cpp
@@ -12,6 +12,7 @@
 #include "clang/Driver/CommonArgs.h"
 #include "clang/Driver/InputInfo.h"
 
+#include "Arch/AArch64.h"
 #include "Arch/ARM.h"
 #include "Arch/RISCV.h"
 #include "clang/Driver/Compilation.h"
@@ -31,21 +32,6 @@ using namespace clang::driver;
 using namespace clang::driver::tools;
 using namespace clang::driver::toolchains;
 
-/// Is the triple {aarch64.aarch64_be}-none-elf?
-static bool isAArch64BareMetal(const llvm::Triple &Triple) {
-  if (Triple.getArch() != llvm::Triple::aarch64 &&
-      Triple.getArch() != llvm::Triple::aarch64_be)
-    return false;
-
-  if (Triple.getVendor() != llvm::Triple::UnknownVendor)
-    return false;
-
-  if (Triple.getOS() != llvm::Triple::UnknownOS)
-    return false;
-
-  return Triple.getEnvironmentName() == "elf";
-}
-
 static bool isRISCVBareMetal(const llvm::Triple &Triple) {
   if (!Triple.isRISCV())
     return false;
@@ -363,8 +349,9 @@ void BareMetal::findMultilibs(const Driver &D, const llvm::Triple &Triple,
 }
 
 bool BareMetal::handlesTarget(const llvm::Triple &Triple) {
-  return arm::isARMEABIBareMetal(Triple) || isAArch64BareMetal(Triple) ||
-         isRISCVBareMetal(Triple) || isPPCBareMetal(Triple);
+  return arm::isARMEABIBareMetal(Triple) ||
+         aarch64::isAArch64BareMetal(Triple) || isRISCVBareMetal(Triple) ||
+         isPPCBareMetal(Triple);
 }
 
 Tool *BareMetal::buildLinker() const {
@@ -684,7 +671,8 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
     CmdArgs.push_back("--start-group");
     AddRunTimeLibs(TC, D, CmdArgs, Args);
-    CmdArgs.push_back("-lc");
+    if (!Args.hasArg(options::OPT_nolibc))
+      CmdArgs.push_back("-lc");
     if (TC.hasValidGCCInstallation() || detectGCCToolchainAdjacent(D))
       CmdArgs.push_back("-lgloss");
     CmdArgs.push_back("--end-group");
@@ -694,9 +682,6 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA,
       NeedCRTs)
     CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath(CRTEnd)));
 
-  if (TC.getTriple().isRISCV())
-    CmdArgs.push_back("-X");
-
   // The R_ARM_TARGET2 relocation must be treated as R_ARM_REL32 on arm*-*-elf
   // and arm*-*-eabi (the default is R_ARM_GOT_PREL, used on arm*-*-linux and
   // arm*-*-*bsd).
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 8880c93..4e1b1d9 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -16,6 +16,7 @@
 #include "Arch/SystemZ.h"
 #include "Hexagon.h"
 #include "PS4CPU.h"
+#include "ToolChains/Cuda.h"
 #include "clang/Basic/CLWarnings.h"
 #include "clang/Basic/CodeGenOptions.h"
 #include "clang/Basic/HeaderInclude.h"
@@ -97,32 +98,15 @@ forAllAssociatedToolChains(Compilation &C, const JobAction &JA,
 
   // Apply Work on all the offloading tool chains associated with the current
   // action.
-  if (JA.isHostOffloading(Action::OFK_Cuda))
-    Work(*C.getSingleOffloadToolChain<Action::OFK_Cuda>());
-  else if (JA.isDeviceOffloading(Action::OFK_Cuda))
-    Work(*C.getSingleOffloadToolChain<Action::OFK_Host>());
-  else if (JA.isHostOffloading(Action::OFK_HIP))
-    Work(*C.getSingleOffloadToolChain<Action::OFK_HIP>());
-  else if (JA.isDeviceOffloading(Action::OFK_HIP))
-    Work(*C.getSingleOffloadToolChain<Action::OFK_Host>());
-
-  if (JA.isHostOffloading(Action::OFK_OpenMP)) {
-    auto TCs = C.getOffloadToolChains<Action::OFK_OpenMP>();
-    for (auto II = TCs.first, IE = TCs.second; II != IE; ++II)
-      Work(*II->second);
-  } else if (JA.isDeviceOffloading(Action::OFK_OpenMP))
-    Work(*C.getSingleOffloadToolChain<Action::OFK_Host>());
-
-  if (JA.isHostOffloading(Action::OFK_SYCL)) {
-    auto TCs = C.getOffloadToolChains<Action::OFK_SYCL>();
-    for (auto II = TCs.first, IE = TCs.second; II != IE; ++II)
-      Work(*II->second);
-  } else if (JA.isDeviceOffloading(Action::OFK_SYCL))
-    Work(*C.getSingleOffloadToolChain<Action::OFK_Host>());
-
-  //
-  // TODO: Add support for other offloading programming models here.
-  //
+  for (Action::OffloadKind Kind : {Action::OFK_Cuda, Action::OFK_OpenMP,
+                                   Action::OFK_HIP, Action::OFK_SYCL}) {
+    if (JA.isHostOffloading(Kind)) {
+      auto TCs = C.getOffloadToolChains(Kind);
+      for (auto II = TCs.first, IE = TCs.second; II != IE; ++II)
+        Work(*II->second);
+    } else if (JA.isDeviceOffloading(Kind))
+      Work(*C.getSingleOffloadToolChain<Action::OFK_Host>());
+  }
 }
 
 static bool
@@ -242,17 +226,19 @@ static bool ShouldEnableAutolink(const ArgList &Args, const ToolChain &TC,
 static const char *addDebugCompDirArg(const ArgList &Args,
                                       ArgStringList &CmdArgs,
                                       const llvm::vfs::FileSystem &VFS) {
+  std::string DebugCompDir;
   if (Arg *A = Args.getLastArg(options::OPT_ffile_compilation_dir_EQ,
-                               options::OPT_fdebug_compilation_dir_EQ)) {
-    if (A->getOption().matches(options::OPT_ffile_compilation_dir_EQ))
-      CmdArgs.push_back(Args.MakeArgString(Twine("-fdebug-compilation-dir=") +
-                                           A->getValue()));
+                               options::OPT_fdebug_compilation_dir_EQ))
+    DebugCompDir = A->getValue();
+
+  if (DebugCompDir.empty()) {
+    if (llvm::ErrorOr<std::string> CWD = VFS.getCurrentWorkingDirectory())
+      DebugCompDir = std::move(*CWD);
     else
-      A->render(Args, CmdArgs);
-  } else if (llvm::ErrorOr<std::string> CWD =
-                 VFS.getCurrentWorkingDirectory()) {
-    CmdArgs.push_back(Args.MakeArgString("-fdebug-compilation-dir=" + *CWD));
+      return nullptr;
   }
+  CmdArgs.push_back(
+      Args.MakeArgString("-fdebug-compilation-dir=" + DebugCompDir));
   StringRef Path(CmdArgs.back());
   return Path.substr(Path.find('=') + 1).data();
 }
@@ -541,17 +527,17 @@ static void addPGOAndCoverageFlags(const ToolChain &TC, Compilation &C,
     CmdArgs.push_back("-fcoverage-mcdc");
   }
 
+  StringRef CoverageCompDir;
   if (Arg *A = Args.getLastArg(options::OPT_ffile_compilation_dir_EQ,
-                               options::OPT_fcoverage_compilation_dir_EQ)) {
-    if (A->getOption().matches(options::OPT_ffile_compilation_dir_EQ))
-      CmdArgs.push_back(Args.MakeArgString(
-          Twine("-fcoverage-compilation-dir=") + A->getValue()));
-    else
-      A->render(Args, CmdArgs);
-  } else if (llvm::ErrorOr<std::string> CWD =
-                 D.getVFS().getCurrentWorkingDirectory()) {
-    CmdArgs.push_back(Args.MakeArgString("-fcoverage-compilation-dir=" + *CWD));
-  }
+                               options::OPT_fcoverage_compilation_dir_EQ))
+    CoverageCompDir = A->getValue();
+  if (CoverageCompDir.empty()) {
+    if (auto CWD = D.getVFS().getCurrentWorkingDirectory())
+      CmdArgs.push_back(
+          Args.MakeArgString(Twine("-fcoverage-compilation-dir=") + *CWD));
+  } else
+    CmdArgs.push_back(Args.MakeArgString(Twine("-fcoverage-compilation-dir=") +
+                                         CoverageCompDir));
 
   if (Args.hasArg(options::OPT_fprofile_exclude_files_EQ)) {
     auto *Arg = Args.getLastArg(options::OPT_fprofile_exclude_files_EQ);
@@ -2731,16 +2717,6 @@ static void CollectArgsForIntegratedAssembler(Compilation &C,
     CmdArgs.push_back(MipsTargetFeature);
   }
 
-  // Those OSes default to enabling VIS on 64-bit SPARC.
-  // See also the corresponding code for external assemblers in
-  // sparc::getSparcAsmModeForCPU().
-  bool IsSparcV9ATarget =
-      (C.getDefaultToolChain().getArch() == llvm::Triple::sparcv9) &&
-      (Triple.isOSLinux() || Triple.isOSFreeBSD() || Triple.isOSOpenBSD());
-  if (IsSparcV9ATarget && SparcTargetFeatures.empty()) {
-    CmdArgs.push_back("-target-feature");
-    CmdArgs.push_back("+vis");
-  }
   for (const char *Feature : SparcTargetFeatures) {
     CmdArgs.push_back("-target-feature");
     CmdArgs.push_back(Feature);
@@ -3908,17 +3884,17 @@ static bool RenderModulesOptions(Compilation &C, const Driver &D,
                                  const ArgList &Args, const InputInfo &Input,
                                  const InputInfo &Output, bool HaveStd20,
                                  ArgStringList &CmdArgs) {
-  bool IsCXX = types::isCXX(Input.getType());
-  bool HaveStdCXXModules = IsCXX && HaveStd20;
+  const bool IsCXX = types::isCXX(Input.getType());
+  const bool HaveStdCXXModules = IsCXX && HaveStd20;
   bool HaveModules = HaveStdCXXModules;
 
   // -fmodules enables the use of precompiled modules (off by default).
   // Users can pass -fno-cxx-modules to turn off modules support for
   // C++/Objective-C++ programs.
+  const bool AllowedInCXX = Args.hasFlag(options::OPT_fcxx_modules,
+                                         options::OPT_fno_cxx_modules, true);
   bool HaveClangModules = false;
   if (Args.hasFlag(options::OPT_fmodules, options::OPT_fno_modules, false)) {
-    bool AllowedInCXX = Args.hasFlag(options::OPT_fcxx_modules,
-                                     options::OPT_fno_cxx_modules, true);
     if (AllowedInCXX || !IsCXX) {
       CmdArgs.push_back("-fmodules");
       HaveClangModules = true;
@@ -3927,6 +3903,9 @@ static bool RenderModulesOptions(Compilation &C, const Driver &D,
 
   HaveModules |= HaveClangModules;
 
+  if (HaveModules && !AllowedInCXX)
+    CmdArgs.push_back("-fno-cxx-modules");
+
   // -fmodule-maps enables implicit reading of module map files. By default,
   // this is enabled if we are using Clang's flavor of precompiled modules.
   if (Args.hasFlag(options::OPT_fimplicit_module_maps,
@@ -4095,31 +4074,34 @@ static bool RenderModulesOptions(Compilation &C, const Driver &D,
   // module fragment.
   CmdArgs.push_back("-fskip-odr-check-in-gmf");
 
-  if (Args.hasArg(options::OPT_modules_reduced_bmi) &&
+  if (!Args.hasArg(options::OPT_fno_modules_reduced_bmi) &&
       (Input.getType() == driver::types::TY_CXXModule ||
-       Input.getType() == driver::types::TY_PP_CXXModule)) {
+       Input.getType() == driver::types::TY_PP_CXXModule) &&
+      !Args.hasArg(options::OPT__precompile)) {
     CmdArgs.push_back("-fmodules-reduced-bmi");
 
     if (Args.hasArg(options::OPT_fmodule_output_EQ))
       Args.AddLastArg(CmdArgs, options::OPT_fmodule_output_EQ);
-    else {
-      if (Args.hasArg(options::OPT__precompile) &&
-          (!Args.hasArg(options::OPT_o) ||
-           Args.getLastArg(options::OPT_o)->getValue() ==
-               getCXX20NamedModuleOutputPath(Args, Input.getBaseInput()))) {
-        D.Diag(diag::err_drv_reduced_module_output_overrided);
-      }
-
+    else
       CmdArgs.push_back(Args.MakeArgString(
           "-fmodule-output=" +
           getCXX20NamedModuleOutputPath(Args, Input.getBaseInput())));
-    }
   }
 
-  // Noop if we see '-fmodules-reduced-bmi' with other translation
-  // units than module units. This is more user friendly to allow end uers to
-  // enable this feature without asking for help from build systems.
-  Args.ClaimAllArgs(options::OPT_modules_reduced_bmi);
+  if (Args.hasArg(options::OPT_fmodules_reduced_bmi) &&
+      Args.hasArg(options::OPT__precompile) &&
+      (!Args.hasArg(options::OPT_o) ||
+       Args.getLastArg(options::OPT_o)->getValue() ==
+           getCXX20NamedModuleOutputPath(Args, Input.getBaseInput()))) {
+    D.Diag(diag::err_drv_reduced_module_output_overrided);
+  }
+
+  // Noop if we see '-fmodules-reduced-bmi' or `-fno-modules-reduced-bmi` with
+  // other translation units than module units. This is more user friendly to
+  // allow end uers to enable this feature without asking for help from build
+  // systems.
+  Args.ClaimAllArgs(options::OPT_fmodules_reduced_bmi);
+  Args.ClaimAllArgs(options::OPT_fno_modules_reduced_bmi);
 
   // We need to include the case the input file is a module file here.
   // Since the default compilation model for C++ module interface unit will
@@ -4992,8 +4974,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     else {
       // Host-side compilation.
       NormalizedTriple =
-          (IsCuda ? C.getSingleOffloadToolChain<Action::OFK_Cuda>()
-                  : C.getSingleOffloadToolChain<Action::OFK_HIP>())
+          (IsCuda ? C.getOffloadToolChains(Action::OFK_Cuda).first->second
+                  : C.getOffloadToolChains(Action::OFK_HIP).first->second)
               ->getTriple()
               .normalize();
       if (IsCuda) {
@@ -5965,7 +5947,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back("-mms-bitfields");
   }
 
-  if (Triple.isWindowsGNUEnvironment()) {
+  if (Triple.isOSCygMing()) {
     Args.addOptOutFlag(CmdArgs, options::OPT_fauto_import,
                        options::OPT_fno_auto_import);
   }
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 651a39c..0771c7c 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -23,6 +23,7 @@
 #include "Hexagon.h"
 #include "MSP430.h"
 #include "Solaris.h"
+#include "ToolChains/Cuda.h"
 #include "clang/Basic/CodeGenOptions.h"
 #include "clang/Config/config.h"
 #include "clang/Driver/Action.h"
@@ -294,17 +295,22 @@ static void renderRemarksOptions(const ArgList &Args, ArgStringList &CmdArgs,
     Format = A->getValue();
 
   SmallString<128> F;
-  const Arg *A = Args.getLastArg(options::OPT_foptimization_record_file_EQ);
-  if (A)
+  if (const Arg *A =
+          Args.getLastArg(options::OPT_foptimization_record_file_EQ)) {
+    F = A->getValue();
+    F += ".";
+  } else if (const Arg *A = Args.getLastArg(options::OPT_dumpdir)) {
     F = A->getValue();
-  else if (Output.isFilename())
+  } else if (Output.isFilename()) {
     F = Output.getFilename();
+    F += ".";
+  }
 
   assert(!F.empty() && "Cannot determine remarks output name.");
   // Append "opt.ld.<format>" to the end of the file name.
   CmdArgs.push_back(Args.MakeArgString(Twine(PluginOptPrefix) +
-                                       "opt-remarks-filename=" + F +
-                                       ".opt.ld." + Format));
+                                       "opt-remarks-filename=" + F + "opt.ld." +
+                                       Format));
 
   if (const Arg *A =
           Args.getLastArg(options::OPT_foptimization_record_passes_EQ))
@@ -547,15 +553,22 @@ const char *tools::getLDMOption(const llvm::Triple &T, const ArgList &Args) {
   case llvm::Triple::aarch64:
     if (T.isOSManagarm())
       return "aarch64managarm";
+    else if (aarch64::isAArch64BareMetal(T))
+      return "aarch64elf";
     return "aarch64linux";
   case llvm::Triple::aarch64_be:
+    if (aarch64::isAArch64BareMetal(T))
+      return "aarch64elfb";
     return "aarch64linuxb";
   case llvm::Triple::arm:
   case llvm::Triple::thumb:
   case llvm::Triple::armeb:
-  case llvm::Triple::thumbeb:
-    return tools::arm::isARMBigEndian(T, Args) ? "armelfb_linux_eabi"
-                                               : "armelf_linux_eabi";
+  case llvm::Triple::thumbeb: {
+    bool IsBigEndian = tools::arm::isARMBigEndian(T, Args);
+    if (arm::isARMEABIBareMetal(T))
+      return IsBigEndian ? "armelfb" : "armelf";
+    return IsBigEndian ? "armelfb_linux_eabi" : "armelf_linux_eabi";
+  }
   case llvm::Triple::m68k:
     return "m68kelf";
   case llvm::Triple::ppc:
@@ -856,7 +869,7 @@ void tools::getTargetFeatures(const Driver &D, const llvm::Triple &Triple,
   case llvm::Triple::sparc:
   case llvm::Triple::sparcel:
   case llvm::Triple::sparcv9:
-    sparc::getSparcTargetFeatures(D, Args, Features);
+    sparc::getSparcTargetFeatures(D, Triple, Args, Features);
     break;
   case llvm::Triple::r600:
   case llvm::Triple::amdgcn:
@@ -1067,9 +1080,17 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args,
     }
   }
 
-  if (Args.hasArg(options::OPT_gsplit_dwarf))
-    CmdArgs.push_back(Args.MakeArgString(
-        Twine(PluginOptPrefix) + "dwo_dir=" + Output.getFilename() + "_dwo"));
+  if (Args.hasArg(options::OPT_gsplit_dwarf)) {
+    SmallString<128> F;
+    if (const Arg *A = Args.getLastArg(options::OPT_dumpdir)) {
+      F = A->getValue();
+    } else {
+      F = Output.getFilename();
+      F += "_";
+    }
+    CmdArgs.push_back(
+        Args.MakeArgString(Twine(PluginOptPrefix) + "dwo_dir=" + F + "dwo"));
+  }
 
   if (IsThinLTO && !IsOSAIX)
     CmdArgs.push_back(Args.MakeArgString(Twine(PluginOptPrefix) + "thinlto"));
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index 2373d94..7d803be 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -815,7 +815,7 @@ NVPTXToolChain::getSystemGPUArchs(const ArgList &Args) const {
   else
     Program = GetProgramPath("nvptx-arch");
 
-  auto StdoutOrErr = executeToolChainProgram(Program);
+  auto StdoutOrErr = getDriver().executeProgram({Program});
   if (!StdoutOrErr)
     return StdoutOrErr.takeError();
 
diff --git a/clang/lib/Driver/ToolChains/Cuda.h b/clang/lib/Driver/ToolChains/Cuda.h
index 259eda6..8aeba53 100644
--- a/clang/lib/Driver/ToolChains/Cuda.h
+++ b/clang/lib/Driver/ToolChains/Cuda.h
@@ -11,6 +11,7 @@
 
 #include "clang/Basic/Cuda.h"
 #include "clang/Driver/Action.h"
+#include "clang/Driver/CudaInstallationDetector.h"
 #include "clang/Driver/Multilib.h"
 #include "clang/Driver/Tool.h"
 #include "clang/Driver/ToolChain.h"
@@ -22,61 +23,6 @@
 
 namespace clang {
 namespace driver {
-
-/// A class to find a viable CUDA installation
-class CudaInstallationDetector {
-private:
-  const Driver &D;
-  bool IsValid = false;
-  CudaVersion Version = CudaVersion::UNKNOWN;
-  std::string InstallPath;
-  std::string BinPath;
-  std::string LibDevicePath;
-  std::string IncludePath;
-  llvm::StringMap<std::string> LibDeviceMap;
-
-  // CUDA architectures for which we have raised an error in
-  // CheckCudaVersionSupportsArch.
-  mutable std::bitset<(int)OffloadArch::LAST> ArchsWithBadVersion;
-
-public:
-  CudaInstallationDetector(const Driver &D, const llvm::Triple &HostTriple,
-                           const llvm::opt::ArgList &Args);
-
-  void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs,
-                          llvm::opt::ArgStringList &CC1Args) const;
-
-  /// Emit an error if Version does not support the given Arch.
-  ///
-  /// If either Version or Arch is unknown, does not emit an error.  Emits at
-  /// most one error per Arch.
-  void CheckCudaVersionSupportsArch(OffloadArch Arch) const;
-
-  /// Check whether we detected a valid Cuda install.
-  bool isValid() const { return IsValid; }
-  /// Print information about the detected CUDA installation.
-  void print(raw_ostream &OS) const;
-
-  /// Get the detected Cuda install's version.
-  CudaVersion version() const {
-    return Version == CudaVersion::NEW ? CudaVersion::PARTIALLY_SUPPORTED
-                                       : Version;
-  }
-  /// Get the detected Cuda installation path.
-  StringRef getInstallPath() const { return InstallPath; }
-  /// Get the detected path to Cuda's bin directory.
-  StringRef getBinPath() const { return BinPath; }
-  /// Get the detected Cuda Include path.
-  StringRef getIncludePath() const { return IncludePath; }
-  /// Get the detected Cuda device library path.
-  StringRef getLibDevicePath() const { return LibDevicePath; }
-  /// Get libdevice file for given architecture
-  std::string getLibDeviceFile(StringRef Gpu) const {
-    return LibDeviceMap.lookup(Gpu);
-  }
-  void WarnIfUnsupportedVersion() const;
-};
-
 namespace tools {
 namespace NVPTX {
 
diff --git a/clang/lib/Driver/ToolChains/Darwin.h b/clang/lib/Driver/ToolChains/Darwin.h
index b38bfe6..d1cfb6f 100644
--- a/clang/lib/Driver/ToolChains/Darwin.h
+++ b/clang/lib/Driver/ToolChains/Darwin.h
@@ -9,12 +9,12 @@
 #ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_DARWIN_H
 #define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_DARWIN_H
 
-#include "Cuda.h"
-#include "LazyDetector.h"
-#include "ROCm.h"
-#include "SYCL.h"
 #include "clang/Basic/DarwinSDKInfo.h"
 #include "clang/Basic/LangOptions.h"
+#include "clang/Driver/CudaInstallationDetector.h"
+#include "clang/Driver/LazyDetector.h"
+#include "clang/Driver/RocmInstallationDetector.h"
+#include "clang/Driver/SyclInstallationDetector.h"
 #include "clang/Driver/Tool.h"
 #include "clang/Driver/ToolChain.h"
 #include "clang/Driver/XRayArgs.h"
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index 1edb83f..7ab41e9 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -447,6 +447,7 @@ void Flang::addTargetOptions(const ArgList &Args,
   // Add the target features.
   switch (TC.getArch()) {
   default:
+    getTargetFeatures(D, Triple, Args, CmdArgs, /*ForAs*/ false);
     break;
   case llvm::Triple::aarch64:
     getTargetFeatures(D, Triple, Args, CmdArgs, /*ForAs*/ false);
diff --git a/clang/lib/Driver/ToolChains/Gnu.h b/clang/lib/Driver/ToolChains/Gnu.h
index 3b8df71..4c42a5e5 100644
--- a/clang/lib/Driver/ToolChains/Gnu.h
+++ b/clang/lib/Driver/ToolChains/Gnu.h
@@ -9,10 +9,10 @@
 #ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_GNU_H
 #define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_GNU_H
 
-#include "Cuda.h"
-#include "LazyDetector.h"
-#include "ROCm.h"
-#include "SYCL.h"
+#include "clang/Driver/CudaInstallationDetector.h"
+#include "clang/Driver/LazyDetector.h"
+#include "clang/Driver/RocmInstallationDetector.h"
+#include "clang/Driver/SyclInstallationDetector.h"
 #include "clang/Driver/Tool.h"
 #include "clang/Driver/ToolChain.h"
 #include <set>
diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp
index 5fe0f85..b4c6da0 100644
--- a/clang/lib/Driver/ToolChains/HIPAMD.cpp
+++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp
@@ -264,7 +264,7 @@ void HIPAMDToolChain::addClangTargetOptions(
     return; // No DeviceLibs for SPIR-V.
   }
 
-  for (auto BCFile : getDeviceLibs(DriverArgs)) {
+  for (auto BCFile : getDeviceLibs(DriverArgs, DeviceOffloadingKind)) {
     CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode"
                                                : "-mlink-bitcode-file");
     CC1Args.push_back(DriverArgs.MakeArgString(BCFile.Path));
@@ -355,7 +355,8 @@ VersionTuple HIPAMDToolChain::computeMSVCVersion(const Driver *D,
 }
 
 llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
-HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
+HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs,
+                               Action::OffloadKind DeviceOffloadingKind) const {
   llvm::SmallVector<BitCodeLibraryInfo, 12> BCLibs;
   if (!DriverArgs.hasFlag(options::OPT_offloadlib, options::OPT_no_offloadlib,
                           true) ||
@@ -397,7 +398,8 @@ HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
     assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
 
     // Add common device libraries like ocml etc.
-    for (auto N : getCommonDeviceLibNames(DriverArgs, GpuArch.str()))
+    for (auto N : getCommonDeviceLibNames(DriverArgs, GpuArch.str(),
+                                          DeviceOffloadingKind))
       BCLibs.emplace_back(N);
 
     // Add instrument lib.
diff --git a/clang/lib/Driver/ToolChains/HIPAMD.h b/clang/lib/Driver/ToolChains/HIPAMD.h
index 3630b11..30fc01a 100644
--- a/clang/lib/Driver/ToolChains/HIPAMD.h
+++ b/clang/lib/Driver/ToolChains/HIPAMD.h
@@ -10,6 +10,7 @@
 #define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_HIPAMD_H
 
 #include "AMDGPU.h"
+#include "clang/Driver/SyclInstallationDetector.h"
 #include "clang/Driver/Tool.h"
 #include "clang/Driver/ToolChain.h"
 
@@ -80,7 +81,8 @@ public:
   void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs,
                          llvm::opt::ArgStringList &CC1Args) const override;
   llvm::SmallVector<BitCodeLibraryInfo, 12>
-  getDeviceLibs(const llvm::opt::ArgList &Args) const override;
+  getDeviceLibs(const llvm::opt::ArgList &Args,
+                Action::OffloadKind DeviceOffloadKind) const override;
 
   SanitizerMask getSupportedSanitizers() const override;
 
diff --git a/clang/lib/Driver/ToolChains/HIPSPV.cpp b/clang/lib/Driver/ToolChains/HIPSPV.cpp
index 53649ca..62bca04 100644
--- a/clang/lib/Driver/ToolChains/HIPSPV.cpp
+++ b/clang/lib/Driver/ToolChains/HIPSPV.cpp
@@ -69,8 +69,17 @@ void HIPSPV::Linker::constructLinkAndEmitSpirvCommand(
 
   // Link LLVM bitcode.
   ArgStringList LinkArgs{};
+
   for (auto Input : Inputs)
     LinkArgs.push_back(Input.getFilename());
+
+  // Add static device libraries using the common helper function.
+  // This handles unbundling archives (.a) containing bitcode bundles.
+  StringRef Arch = getToolChain().getTriple().getArchName();
+  StringRef Target =
+      "generic"; // SPIR-V is generic, no specific target ID like -mcpu
+  tools::AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, LinkArgs, Arch,
+                                    Target, /*IsBitCodeSDL=*/true);
   LinkArgs.append({"-o", TempFile});
   const char *LlvmLink =
       Args.MakeArgString(getToolChain().GetProgramPath("llvm-link"));
@@ -149,7 +158,8 @@ void HIPSPVToolChain::addClangTargetOptions(
     CC1Args.append(
         {"-fvisibility=hidden", "-fapply-global-visibility-to-externs"});
 
-  for (const BitCodeLibraryInfo &BCFile : getDeviceLibs(DriverArgs))
+  for (const BitCodeLibraryInfo &BCFile :
+       getDeviceLibs(DriverArgs, DeviceOffloadingKind))
     CC1Args.append(
         {"-mlink-builtin-bitcode", DriverArgs.MakeArgString(BCFile.Path)});
 }
@@ -200,7 +210,9 @@ void HIPSPVToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs,
 }
 
 llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12>
-HIPSPVToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs) const {
+HIPSPVToolChain::getDeviceLibs(
+    const llvm::opt::ArgList &DriverArgs,
+    const Action::OffloadKind DeviceOffloadingKind) const {
   llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> BCLibs;
   if (!DriverArgs.hasFlag(options::OPT_offloadlib, options::OPT_no_offloadlib,
                           true))
diff --git a/clang/lib/Driver/ToolChains/HIPSPV.h b/clang/lib/Driver/ToolChains/HIPSPV.h
index ecd82e7..caf6924 100644
--- a/clang/lib/Driver/ToolChains/HIPSPV.h
+++ b/clang/lib/Driver/ToolChains/HIPSPV.h
@@ -69,7 +69,8 @@ public:
   void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs,
                          llvm::opt::ArgStringList &CC1Args) const override;
   llvm::SmallVector<BitCodeLibraryInfo, 12>
-  getDeviceLibs(const llvm::opt::ArgList &Args) const override;
+  getDeviceLibs(const llvm::opt::ArgList &Args,
+                const Action::OffloadKind DeviceOffloadKind) const override;
 
   SanitizerMask getSupportedSanitizers() const override;
 
diff --git a/clang/lib/Driver/ToolChains/MSVC.cpp b/clang/lib/Driver/ToolChains/MSVC.cpp
index 7d31eea..bb469ff 100644
--- a/clang/lib/Driver/ToolChains/MSVC.cpp
+++ b/clang/lib/Driver/ToolChains/MSVC.cpp
@@ -279,8 +279,8 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     AddRunTimeLibs(TC, TC.getDriver(), CmdArgs, Args);
   }
 
-  StringRef Linker =
-      Args.getLastArgValue(options::OPT_fuse_ld_EQ, CLANG_DEFAULT_LINKER);
+  StringRef Linker = Args.getLastArgValue(options::OPT_fuse_ld_EQ,
+                                          TC.getDriver().getPreferredLinker());
   if (Linker.empty())
     Linker = "link";
   // We need to translate 'lld' into 'lld-link'.
diff --git a/clang/lib/Driver/ToolChains/MSVC.h b/clang/lib/Driver/ToolChains/MSVC.h
index b35390c..5c17edc 100644
--- a/clang/lib/Driver/ToolChains/MSVC.h
+++ b/clang/lib/Driver/ToolChains/MSVC.h
@@ -9,11 +9,11 @@
 #ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_MSVC_H
 #define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_MSVC_H
 
-#include "AMDGPU.h"
-#include "Cuda.h"
-#include "LazyDetector.h"
-#include "SYCL.h"
 #include "clang/Driver/Compilation.h"
+#include "clang/Driver/CudaInstallationDetector.h"
+#include "clang/Driver/LazyDetector.h"
+#include "clang/Driver/RocmInstallationDetector.h"
+#include "clang/Driver/SyclInstallationDetector.h"
 #include "clang/Driver/Tool.h"
 #include "clang/Driver/ToolChain.h"
 #include "llvm/Frontend/Debug/Options.h"
diff --git a/clang/lib/Driver/ToolChains/MinGW.cpp b/clang/lib/Driver/ToolChains/MinGW.cpp
index b2e36ae..1bb9bcf 100644
--- a/clang/lib/Driver/ToolChains/MinGW.cpp
+++ b/clang/lib/Driver/ToolChains/MinGW.cpp
@@ -85,11 +85,18 @@ void tools::MinGW::Linker::AddLibGCC(const ArgList &Args,
 
   CmdArgs.push_back("-lmoldname");
   CmdArgs.push_back("-lmingwex");
-  for (auto Lib : Args.getAllArgValues(options::OPT_l))
+  for (auto Lib : Args.getAllArgValues(options::OPT_l)) {
     if (StringRef(Lib).starts_with("msvcr") ||
         StringRef(Lib).starts_with("ucrt") ||
-        StringRef(Lib).starts_with("crtdll"))
+        StringRef(Lib).starts_with("crtdll")) {
+      std::string CRTLib = (llvm::Twine("-l") + Lib).str();
+      // Respect the user's chosen crt variant, but still provide it
+      // again as the last linker argument, because some of the libraries
+      // we added above may depend on it.
+      CmdArgs.push_back(Args.MakeArgStringRef(CRTLib));
       return;
+    }
+  }
   CmdArgs.push_back("-lmsvcrt");
 }
 
@@ -548,7 +555,7 @@ toolchains::MinGW::MinGW(const Driver &D, const llvm::Triple &Triple,
     getFilePaths().push_back(Base + "lib");
 
   NativeLLVMSupport =
-      Args.getLastArgValue(options::OPT_fuse_ld_EQ, CLANG_DEFAULT_LINKER)
+      Args.getLastArgValue(options::OPT_fuse_ld_EQ, D.getPreferredLinker())
           .equals_insensitive("lld");
 }
 
diff --git a/clang/lib/Driver/ToolChains/MinGW.h b/clang/lib/Driver/ToolChains/MinGW.h
index a9963d8..1730da4 100644
--- a/clang/lib/Driver/ToolChains/MinGW.h
+++ b/clang/lib/Driver/ToolChains/MinGW.h
@@ -11,8 +11,9 @@
 
 #include "Cuda.h"
 #include "Gnu.h"
-#include "LazyDetector.h"
-#include "ROCm.h"
+#include "clang/Driver/CudaInstallationDetector.h"
+#include "clang/Driver/LazyDetector.h"
+#include "clang/Driver/RocmInstallationDetector.h"
 #include "clang/Driver/Tool.h"
 #include "clang/Driver/ToolChain.h"
 #include "llvm/Support/ErrorOr.h"
diff --git a/clang/lib/Driver/ToolChains/OpenBSD.cpp b/clang/lib/Driver/ToolChains/OpenBSD.cpp
index 79b1b69..8f58918 100644
--- a/clang/lib/Driver/ToolChains/OpenBSD.cpp
+++ b/clang/lib/Driver/ToolChains/OpenBSD.cpp
@@ -161,7 +161,7 @@ void openbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   if (Nopie || Profiling)
     CmdArgs.push_back("-nopie");
 
-  if (Triple.isRISCV64()) {
+  if (Triple.isLoongArch64() || Triple.isRISCV64()) {
     CmdArgs.push_back("-X");
     if (Args.hasArg(options::OPT_mno_relax))
       CmdArgs.push_back("--no-relax");
diff --git a/clang/lib/Driver/ToolChains/SYCL.h b/clang/lib/Driver/ToolChains/SYCL.h
index 2a8b4ec..be4ba47 100644
--- a/clang/lib/Driver/ToolChains/SYCL.h
+++ b/clang/lib/Driver/ToolChains/SYCL.h
@@ -9,21 +9,12 @@
 #ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_SYCL_H
 #define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_SYCL_H
 
+#include "clang/Driver/SyclInstallationDetector.h"
 #include "clang/Driver/Tool.h"
 #include "clang/Driver/ToolChain.h"
 
 namespace clang {
 namespace driver {
-
-class SYCLInstallationDetector {
-public:
-  SYCLInstallationDetector(const Driver &D, const llvm::Triple &HostTriple,
-                           const llvm::opt::ArgList &Args);
-
-  void addSYCLIncludeArgs(const llvm::opt::ArgList &DriverArgs,
-                          llvm::opt::ArgStringList &CC1Args) const;
-};
-
 namespace toolchains {
 
 class LLVM_LIBRARY_VISIBILITY SYCLToolChain : public ToolChain {
diff --git a/clang/lib/Driver/ToolChains/Solaris.cpp b/clang/lib/Driver/ToolChains/Solaris.cpp
index a3574e1..02aa598 100644
--- a/clang/lib/Driver/ToolChains/Solaris.cpp
+++ b/clang/lib/Driver/ToolChains/Solaris.cpp
@@ -39,7 +39,7 @@ void solaris::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
 bool solaris::isLinkerGnuLd(const ToolChain &TC, const ArgList &Args) {
   // Only used if targetting Solaris.
   const Arg *A = Args.getLastArg(options::OPT_fuse_ld_EQ);
-  StringRef UseLinker = A ? A->getValue() : CLANG_DEFAULT_LINKER;
+  StringRef UseLinker = A ? A->getValue() : TC.getDriver().getPreferredLinker();
   return UseLinker == "bfd" || UseLinker == "gld";
 }
 
@@ -52,7 +52,7 @@ static bool getPIE(const ArgList &Args, const ToolChain &TC) {
                       TC.isPIEDefault(Args));
 }
 
-// FIXME: Need to handle CLANG_DEFAULT_LINKER here?
+// FIXME: Need to handle PreferredLinker here?
 std::string solaris::Linker::getLinkerPath(const ArgList &Args) const {
   const ToolChain &ToolChain = getToolChain();
   if (const Arg *A = Args.getLastArg(options::OPT_fuse_ld_EQ)) {
@@ -345,7 +345,7 @@ SanitizerMask Solaris::getSupportedSanitizers() const {
 
 const char *Solaris::getDefaultLinker() const {
   // FIXME: Only handle Solaris ld and GNU ld here.
-  return llvm::StringSwitch<const char *>(CLANG_DEFAULT_LINKER)
+  return llvm::StringSwitch<const char *>(getDriver().getPreferredLinker())
       .Cases("bfd", "gld", "/usr/gnu/bin/ld")
       .Default("/usr/bin/ld");
 }
diff --git a/clang/lib/Driver/ToolChains/UEFI.cpp b/clang/lib/Driver/ToolChains/UEFI.cpp
index ac6668e..2b41173 100644
--- a/clang/lib/Driver/ToolChains/UEFI.cpp
+++ b/clang/lib/Driver/ToolChains/UEFI.cpp
@@ -83,8 +83,8 @@ void tools::uefi::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   // This should ideally be handled by ToolChain::GetLinkerPath but we need
   // to special case some linker paths. In the case of lld, we need to
   // translate 'lld' into 'lld-link'.
-  StringRef Linker =
-      Args.getLastArgValue(options::OPT_fuse_ld_EQ, CLANG_DEFAULT_LINKER);
+  StringRef Linker = Args.getLastArgValue(options::OPT_fuse_ld_EQ,
+                                          TC.getDriver().getPreferredLinker());
   if (Linker.empty() || Linker == "lld")
     Linker = "lld-link";
 
diff --git a/clang/lib/Format/BreakableToken.cpp b/clang/lib/Format/BreakableToken.cpp
index c36cb74..29db200 100644
--- a/clang/lib/Format/BreakableToken.cpp
+++ b/clang/lib/Format/BreakableToken.cpp
@@ -25,7 +25,7 @@
 namespace clang {
 namespace format {
 
-static constexpr StringRef Blanks = " \t\v\f\r";
+static constexpr StringRef Blanks(" \t\v\f\r");
 
 static StringRef getLineCommentIndentPrefix(StringRef Comment,
                                             const FormatStyle &Style) {
@@ -513,7 +513,7 @@ BreakableBlockComment::BreakableBlockComment(
     Decoration = "";
   }
   for (size_t i = 1, e = Content.size(); i < e && !Decoration.empty(); ++i) {
-    const StringRef &Text = Content[i];
+    const StringRef Text(Content[i]);
     if (i + 1 == e) {
       // If the last line is empty, the closing "*/" will have a star.
       if (Text.empty())
diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp
index 4010f7f..9a10403 100644
--- a/clang/lib/Format/ContinuationIndenter.cpp
+++ b/clang/lib/Format/ContinuationIndenter.cpp
@@ -560,6 +560,7 @@ bool ContinuationIndenter::mustBreak(const LineState &State) {
         return true;
     }
   } else if (Current.is(TT_BinaryOperator) && Current.CanBreakBefore &&
+             Current.getPrecedence() != prec::Assignment &&
              CurrentState.BreakBeforeParameter) {
     return true;
   }
@@ -1724,7 +1725,8 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
   }
   if (Previous && (Previous->isOneOf(TT_BinaryOperator, TT_ConditionalExpr) ||
                    (Previous->isOneOf(tok::l_paren, tok::comma, tok::colon) &&
-                    !Previous->isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)))) {
+                    !Previous->isOneOf(TT_DictLiteral, TT_ObjCMethodExpr,
+                                       TT_CtorInitializerColon)))) {
     CurrentState.NestedBlockInlined =
         !Newline && hasNestedBlockInlined(Previous, Current, Style);
   }
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index 62feb3d..0637807 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -731,6 +731,7 @@ template <> struct MappingTraits<FormatStyle::SpaceBeforeParensCustom> {
     IO.mapOptional("AfterFunctionDeclarationName",
                    Spacing.AfterFunctionDeclarationName);
     IO.mapOptional("AfterIfMacros", Spacing.AfterIfMacros);
+    IO.mapOptional("AfterNot", Spacing.AfterNot);
     IO.mapOptional("AfterOverloadedOperator", Spacing.AfterOverloadedOperator);
     IO.mapOptional("AfterPlacementOperator", Spacing.AfterPlacementOperator);
     IO.mapOptional("AfterRequiresInClause", Spacing.AfterRequiresInClause);
@@ -1753,7 +1754,6 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
   GoogleStyle.AttributeMacros.push_back("absl_nullable");
   GoogleStyle.AttributeMacros.push_back("absl_nullability_unknown");
   GoogleStyle.BreakTemplateDeclarations = FormatStyle::BTDS_Yes;
-  GoogleStyle.DerivePointerAlignment = true;
   GoogleStyle.IncludeStyle.IncludeBlocks = tooling::IncludeStyle::IBS_Regroup;
   GoogleStyle.IncludeStyle.IncludeCategories = {{"^<ext/.*\\.h>", 2, 0, false},
                                                 {"^<.*\\.h>", 1, 0, false},
@@ -1862,6 +1862,7 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
   } else if (Language == FormatStyle::LK_ObjC) {
     GoogleStyle.AlwaysBreakBeforeMultilineStrings = false;
     GoogleStyle.ColumnLimit = 100;
+    GoogleStyle.DerivePointerAlignment = true;
     // "Regroup" doesn't work well for ObjC yet (main header heuristic,
     // relationship between ObjC standard library headers and other heades,
     // #imports, etc.)
@@ -2638,31 +2639,44 @@ private:
 
   int countVariableAlignments(const SmallVectorImpl<AnnotatedLine *> &Lines) {
     int AlignmentDiff = 0;
+
     for (const AnnotatedLine *Line : Lines) {
       AlignmentDiff += countVariableAlignments(Line->Children);
-      for (FormatToken *Tok = Line->First; Tok && Tok->Next; Tok = Tok->Next) {
+
+      for (const auto *Tok = Line->getFirstNonComment(); Tok; Tok = Tok->Next) {
         if (Tok->isNot(TT_PointerOrReference))
           continue;
-        // Don't treat space in `void foo() &&` as evidence.
-        if (const auto *Prev = Tok->getPreviousNonComment()) {
-          if (Prev->is(tok::r_paren) && Prev->MatchingParen) {
-            if (const auto *Func =
-                    Prev->MatchingParen->getPreviousNonComment()) {
-              if (Func->isOneOf(TT_FunctionDeclarationName, TT_StartOfName,
-                                TT_OverloadedOperator)) {
-                continue;
-              }
-            }
-          }
+
+        const auto *Prev = Tok->Previous;
+        const bool PrecededByName = Prev && Prev->Tok.getIdentifierInfo();
+        const bool SpaceBefore = Tok->hasWhitespaceBefore();
+
+        // e.g. `int **`, `int*&`, etc.
+        while (Tok->Next && Tok->Next->is(TT_PointerOrReference))
+          Tok = Tok->Next;
+
+        const auto *Next = Tok->Next;
+        const bool FollowedByName = Next && Next->Tok.getIdentifierInfo();
+        const bool SpaceAfter = Next && Next->hasWhitespaceBefore();
+
+        if ((!PrecededByName && !FollowedByName) ||
+            // e.g. `int * i` or `int*i`
+            (PrecededByName && FollowedByName && SpaceBefore == SpaceAfter)) {
+          continue;
         }
-        bool SpaceBefore = Tok->hasWhitespaceBefore();
-        bool SpaceAfter = Tok->Next->hasWhitespaceBefore();
-        if (SpaceBefore && !SpaceAfter)
+
+        if ((PrecededByName && SpaceBefore) ||
+            (FollowedByName && !SpaceAfter)) {
+          // Right alignment.
           ++AlignmentDiff;
-        if (!SpaceBefore && SpaceAfter)
+        } else if ((PrecededByName && !SpaceBefore) ||
+                   (FollowedByName && SpaceAfter)) {
+          // Left alignment.
           --AlignmentDiff;
+        }
       }
     }
+
     return AlignmentDiff;
   }
 
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index 49da316..3f4aa52 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -1198,7 +1198,7 @@ void FormatTokenLexer::truncateToken(size_t NewLen) {
 /// Count the length of leading whitespace in a token.
 static size_t countLeadingWhitespace(StringRef Text) {
   // Basically counting the length matched by this regex.
-  // "^([\n\r\f\v \t]|(\\\\|\\?\\?/)[\n\r])+"
+  // "^([\n\r\f\v \t]|\\\\[\n\r])+"
   // Directly using the regex turned out to be slow. With the regex
   // version formatting all files in this directory took about 1.25
   // seconds. This version took about 0.5 seconds.
@@ -1222,13 +1222,6 @@ static size_t countLeadingWhitespace(StringRef Text) {
         break;
       // Splice found, consume it.
       Cur = Lookahead + 1;
-    } else if (Cur[0] == '?' && Cur[1] == '?' && Cur[2] == '/' &&
-               (Cur[3] == '\n' || Cur[3] == '\r')) {
-      // Newlines can also be escaped by a '?' '?' '/' trigraph. By the way, the
-      // characters are quoted individually in this comment because if we write
-      // them together some compilers warn that we have a trigraph in the code.
-      assert(End - Cur >= 4);
-      Cur += 4;
     } else {
       break;
     }
@@ -1300,22 +1293,16 @@ FormatToken *FormatTokenLexer::getNextToken() {
             Style.TabWidth - (Style.TabWidth ? Column % Style.TabWidth : 0);
         break;
       case '\\':
-      case '?':
-      case '/':
-        // The text was entirely whitespace when this loop was entered. Thus
-        // this has to be an escape sequence.
-        assert(Text.substr(i, 4) == "\?\?/\r" ||
-               Text.substr(i, 4) == "\?\?/\n" ||
-               (i >= 1 && (Text.substr(i - 1, 4) == "\?\?/\r" ||
-                           Text.substr(i - 1, 4) == "\?\?/\n")) ||
-               (i >= 2 && (Text.substr(i - 2, 4) == "\?\?/\r" ||
-                           Text.substr(i - 2, 4) == "\?\?/\n")) ||
-               (Text[i] == '\\' && [&]() -> bool {
-                 size_t j = i + 1;
-                 while (j < Text.size() && isHorizontalWhitespace(Text[j]))
-                   ++j;
-                 return j < Text.size() && (Text[j] == '\n' || Text[j] == '\r');
-               }()));
+        // The code preceding the loop and in the countLeadingWhitespace
+        // function guarantees that Text is entirely whitespace, not including
+        // comments but including escaped newlines. So the character shows up,
+        // then it has to be in an escape sequence.
+        assert([&]() -> bool {
+          size_t j = i + 1;
+          while (j < Text.size() && isHorizontalWhitespace(Text[j]))
+            ++j;
+          return j < Text.size() && (Text[j] == '\n' || Text[j] == '\r');
+        }());
         InEscape = true;
         break;
       default:
diff --git a/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp b/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp
index 87823ae..7772a56 100644
--- a/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp
+++ b/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp
@@ -19,7 +19,7 @@ namespace format {
 
 enum class Base { Binary, Decimal, Hex, Other };
 
-static Base getBase(const StringRef IntegerLiteral) {
+static Base getBase(StringRef IntegerLiteral) {
   assert(IntegerLiteral.size() > 1);
 
   if (IntegerLiteral[0] > '0') {
@@ -45,15 +45,18 @@ std::pair<tooling::Replacements, unsigned>
 IntegerLiteralSeparatorFixer::process(const Environment &Env,
                                       const FormatStyle &Style) {
   switch (Style.Language) {
-  case FormatStyle::LK_Cpp:
-  case FormatStyle::LK_ObjC:
-    Separator = '\'';
-    break;
   case FormatStyle::LK_CSharp:
   case FormatStyle::LK_Java:
   case FormatStyle::LK_JavaScript:
     Separator = '_';
     break;
+  case FormatStyle::LK_Cpp:
+  case FormatStyle::LK_ObjC:
+    if (Style.Standard >= FormatStyle::LS_Cpp14) {
+      Separator = '\'';
+      break;
+    }
+    [[fallthrough]];
   default:
     return {};
   }
@@ -164,8 +167,8 @@ IntegerLiteralSeparatorFixer::process(const Environment &Env,
   return {Result, 0};
 }
 
-bool IntegerLiteralSeparatorFixer::checkSeparator(
-    const StringRef IntegerLiteral, int DigitsPerGroup) const {
+bool IntegerLiteralSeparatorFixer::checkSeparator(StringRef IntegerLiteral,
+                                                  int DigitsPerGroup) const {
   assert(DigitsPerGroup > 0);
 
   int I = 0;
@@ -184,7 +187,7 @@ bool IntegerLiteralSeparatorFixer::checkSeparator(
   return true;
 }
 
-std::string IntegerLiteralSeparatorFixer::format(const StringRef IntegerLiteral,
+std::string IntegerLiteralSeparatorFixer::format(StringRef IntegerLiteral,
                                                  int DigitsPerGroup,
                                                  int DigitCount,
                                                  bool RemoveSeparator) const {
diff --git a/clang/lib/Format/IntegerLiteralSeparatorFixer.h b/clang/lib/Format/IntegerLiteralSeparatorFixer.h
index 2c158e4..e24af18 100644
--- a/clang/lib/Format/IntegerLiteralSeparatorFixer.h
+++ b/clang/lib/Format/IntegerLiteralSeparatorFixer.h
@@ -26,8 +26,8 @@ public:
                                                      const FormatStyle &Style);
 
 private:
-  bool checkSeparator(const StringRef IntegerLiteral, int DigitsPerGroup) const;
-  std::string format(const StringRef IntegerLiteral, int DigitsPerGroup,
+  bool checkSeparator(StringRef IntegerLiteral, int DigitsPerGroup) const;
+  std::string format(StringRef IntegerLiteral, int DigitsPerGroup,
                      int DigitCount, bool RemoveSeparator) const;
 
   char Separator;
diff --git a/clang/lib/Format/ObjCPropertyAttributeOrderFixer.cpp b/clang/lib/Format/ObjCPropertyAttributeOrderFixer.cpp
index 37a1807..b885942 100644
--- a/clang/lib/Format/ObjCPropertyAttributeOrderFixer.cpp
+++ b/clang/lib/Format/ObjCPropertyAttributeOrderFixer.cpp
@@ -66,7 +66,7 @@ void ObjCPropertyAttributeOrderFixer::sortPropertyAttributes(
       return;
     }
 
-    const StringRef Attribute{Tok->TokenText};
+    const StringRef Attribute(Tok->TokenText);
     StringRef Value;
 
     // Also handle `getter=getFoo` attributes.
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 581bfba..d28d2fd 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -5478,7 +5478,8 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
     if (Left.TokenText == "!")
       return Style.SpaceAfterLogicalNot;
     assert(Left.TokenText == "not");
-    return Right.isOneOf(tok::coloncolon, TT_UnaryOperator);
+    return Right.isOneOf(tok::coloncolon, TT_UnaryOperator) ||
+           (Right.is(tok::l_paren) && Style.SpaceBeforeParensOptions.AfterNot);
   }
 
   // If the next token is a binary operator or a selector name, we have
diff --git a/clang/lib/Frontend/ASTUnit.cpp b/clang/lib/Frontend/ASTUnit.cpp
index 67ed17b..09caf85 100644
--- a/clang/lib/Frontend/ASTUnit.cpp
+++ b/clang/lib/Frontend/ASTUnit.cpp
@@ -1773,7 +1773,7 @@ std::unique_ptr<ASTUnit> ASTUnit::LoadFromCompilerInvocation(
 
   if (AST->LoadFromCompilerInvocation(std::move(PCHContainerOps),
                                       PrecompilePreambleAfterNParses,
-                                      &AST->FileMgr->getVirtualFileSystem()))
+                                      AST->FileMgr->getVirtualFileSystemPtr()))
     return nullptr;
   return AST;
 }
@@ -1895,7 +1895,7 @@ bool ASTUnit::Reparse(std::shared_ptr<PCHContainerOperations> PCHContainerOps,
 
   if (!VFS) {
     assert(FileMgr && "FileMgr is null on Reparse call");
-    VFS = &FileMgr->getVirtualFileSystem();
+    VFS = FileMgr->getVirtualFileSystemPtr();
   }
 
   clearFileLevelDecls();
@@ -2321,7 +2321,8 @@ void ASTUnit::CodeComplete(
   std::unique_ptr<llvm::MemoryBuffer> OverrideMainBuffer;
   if (Preamble && Line > 1 && hasSameUniqueID(File, OriginalSourceFile)) {
     OverrideMainBuffer = getMainBufferWithPrecompiledPreamble(
-        PCHContainerOps, Inv, &FileMgr.getVirtualFileSystem(), false, Line - 1);
+        PCHContainerOps, Inv, FileMgr.getVirtualFileSystemPtr(), false,
+        Line - 1);
   }
 
   // If the main file has been overridden due to the use of a preamble,
@@ -2331,7 +2332,7 @@ void ASTUnit::CodeComplete(
            "No preamble was built, but OverrideMainBuffer is not null");
 
     IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS =
-        &FileMgr.getVirtualFileSystem();
+        FileMgr.getVirtualFileSystemPtr();
     Preamble->AddImplicitPreamble(Clang->getInvocation(), VFS,
                                   OverrideMainBuffer.get());
     // FIXME: there is no way to update VFS if it was changed by
diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp
index c7b82db..40fb070 100644
--- a/clang/lib/Frontend/CompilerInstance.cpp
+++ b/clang/lib/Frontend/CompilerInstance.cpp
@@ -160,6 +160,11 @@ llvm::vfs::FileSystem &CompilerInstance::getVirtualFileSystem() const {
   return getFileManager().getVirtualFileSystem();
 }
 
+llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>
+CompilerInstance::getVirtualFileSystemPtr() const {
+  return getFileManager().getVirtualFileSystemPtr();
+}
+
 void CompilerInstance::setFileManager(FileManager *Value) {
   FileMgr = Value;
 }
@@ -375,7 +380,7 @@ IntrusiveRefCntPtr<DiagnosticsEngine> CompilerInstance::createDiagnostics(
 FileManager *CompilerInstance::createFileManager(
     IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS) {
   if (!VFS)
-    VFS = FileMgr ? &FileMgr->getVirtualFileSystem()
+    VFS = FileMgr ? FileMgr->getVirtualFileSystemPtr()
                   : createVFSFromCompilerInvocation(getInvocation(),
                                                     getDiagnostics());
   assert(VFS && "FileManager has no VFS?");
@@ -1218,7 +1223,7 @@ std::unique_ptr<CompilerInstance> CompilerInstance::cloneForModuleCompileImpl(
   } else if (FrontendOpts.ModulesShareFileManager) {
     Instance.setFileManager(&getFileManager());
   } else {
-    Instance.createFileManager(&getVirtualFileSystem());
+    Instance.createFileManager(getVirtualFileSystemPtr());
   }
 
   if (ThreadSafeConfig) {
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 3a36250..ab4384a 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -2013,8 +2013,8 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args,
             : llvm::codegenoptions::DebugTemplateNamesKind::Mangled);
   }
 
-  if (const Arg *A = Args.getLastArg(OPT_ftime_report, OPT_ftime_report_EQ,
-                                     OPT_ftime_report_json)) {
+  if (Args.hasArg(OPT_ftime_report, OPT_ftime_report_EQ, OPT_ftime_report_json,
+                  OPT_stats_file_timers)) {
     Opts.TimePasses = true;
 
     // -ftime-report= is only for new pass manager.
@@ -2026,7 +2026,7 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args,
         Opts.TimePassesPerRun = true;
       else
         Diags.Report(diag::err_drv_invalid_value)
-            << A->getAsString(Args) << A->getValue();
+            << EQ->getAsString(Args) << EQ->getValue();
     }
 
     if (Args.getLastArg(OPT_ftime_report_json))
diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp
index dcfbd53..685a9bb 100644
--- a/clang/lib/Frontend/FrontendActions.cpp
+++ b/clang/lib/Frontend/FrontendActions.cpp
@@ -181,7 +181,7 @@ bool GeneratePCHAction::shouldEraseOutputFiles() {
 
 bool GeneratePCHAction::BeginSourceFileAction(CompilerInstance &CI) {
   CI.getLangOpts().CompilingPCH = true;
-  return true;
+  return ASTFrontendAction::BeginSourceFileAction(CI);
 }
 
 std::vector<std::unique_ptr<ASTConsumer>>
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index 382ccd6..008a35d 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -945,8 +945,8 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
   if (LangOpts.GNUCVersion && LangOpts.CPlusPlus11)
     Builder.defineMacro("__GXX_EXPERIMENTAL_CXX0X__");
 
-  if (TI.getTriple().isWindowsGNUEnvironment()) {
-    // Set ABI defining macros for libstdc++ for MinGW, where the
+  if (TI.getTriple().isOSCygMing()) {
+    // Set ABI defining macros for libstdc++ for MinGW and Cygwin, where the
     // default in libstdc++ differs from the defaults for this target.
     Builder.defineMacro("__GXX_TYPEINFO_EQUALITY_INLINE", "0");
   }
diff --git a/clang/lib/Frontend/PrecompiledPreamble.cpp b/clang/lib/Frontend/PrecompiledPreamble.cpp
index 146cf90..486cd95 100644
--- a/clang/lib/Frontend/PrecompiledPreamble.cpp
+++ b/clang/lib/Frontend/PrecompiledPreamble.cpp
@@ -57,11 +57,9 @@ createVFSOverlayForPreamblePCH(StringRef PCHFilename,
                                IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS) {
   // We want only the PCH file from the real filesystem to be available,
   // so we create an in-memory VFS with just that and overlay it on top.
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> PCHFS(
-      new llvm::vfs::InMemoryFileSystem());
+  auto PCHFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   PCHFS->addFile(PCHFilename, 0, std::move(PCHBuffer));
-  IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> Overlay(
-      new llvm::vfs::OverlayFileSystem(VFS));
+  auto Overlay = llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(VFS);
   Overlay->pushOverlay(PCHFS);
   return Overlay;
 }
diff --git a/clang/lib/Frontend/Rewrite/FrontendActions.cpp b/clang/lib/Frontend/Rewrite/FrontendActions.cpp
index 84e7a4f..6c9c9d5 100644
--- a/clang/lib/Frontend/Rewrite/FrontendActions.cpp
+++ b/clang/lib/Frontend/Rewrite/FrontendActions.cpp
@@ -103,12 +103,13 @@ bool FixItAction::BeginSourceFileAction(CompilerInstance &CI) {
   }
   Rewriter.reset(new FixItRewriter(CI.getDiagnostics(), CI.getSourceManager(),
                                    CI.getLangOpts(), FixItOpts.get()));
-  return true;
+  return ASTFrontendAction::BeginSourceFileAction(CI);
 }
 
 void FixItAction::EndSourceFileAction() {
   // Otherwise rewrite all files.
   Rewriter->WriteFixedFiles();
+  ASTFrontendAction::EndSourceFileAction();
 }
 
 bool FixItRecompile::BeginInvocation(CompilerInstance &CI) {
@@ -298,7 +299,7 @@ bool RewriteIncludesAction::BeginSourceFileAction(CompilerInstance &CI) {
         std::make_unique<RewriteImportsListener>(CI, OutputStream));
   }
 
-  return true;
+  return PreprocessorFrontendAction::BeginSourceFileAction(CI);
 }
 
 void RewriteIncludesAction::ExecuteAction() {
diff --git a/clang/lib/Headers/avx10_2_512niintrin.h b/clang/lib/Headers/avx10_2_512niintrin.h
index 7e614f7..9d96e36c7 100644
--- a/clang/lib/Headers/avx10_2_512niintrin.h
+++ b/clang/lib/Headers/avx10_2_512niintrin.h
@@ -197,7 +197,7 @@ _mm512_mask_dpwsud_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwsud_epi32(
-    __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
+    __mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
   return (__m512i)__builtin_ia32_selectd_512(
       (__mmask16)__U, (__v16si)_mm512_dpwsud_epi32(__A, __B, __C),
       (__v16si)_mm512_setzero_si512());
@@ -218,7 +218,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwsuds_epi32(
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwsuds_epi32(
-    __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
+    __mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
   return (__m512i)__builtin_ia32_selectd_512(
       (__mmask16)__U, (__v16si)_mm512_dpwsuds_epi32(__A, __B, __C),
       (__v16si)_mm512_setzero_si512());
@@ -239,7 +239,7 @@ _mm512_mask_dpwusd_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwusd_epi32(
-    __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
+    __mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
   return (__m512i)__builtin_ia32_selectd_512(
       (__mmask16)__U, (__v16si)_mm512_dpwusd_epi32(__A, __B, __C),
       (__v16si)_mm512_setzero_si512());
@@ -260,7 +260,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwusds_epi32(
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwusds_epi32(
-    __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
+    __mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
   return (__m512i)__builtin_ia32_selectd_512(
       (__mmask16)__U, (__v16si)_mm512_dpwusds_epi32(__A, __B, __C),
       (__v16si)_mm512_setzero_si512());
@@ -281,7 +281,7 @@ _mm512_mask_dpwuud_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwuud_epi32(
-    __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
+    __mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
   return (__m512i)__builtin_ia32_selectd_512(
       (__mmask16)__U, (__v16si)_mm512_dpwuud_epi32(__A, __B, __C),
       (__v16si)_mm512_setzero_si512());
@@ -302,7 +302,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwuuds_epi32(
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwuuds_epi32(
-    __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
+    __mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
   return (__m512i)__builtin_ia32_selectd_512(
       (__mmask16)__U, (__v16si)_mm512_dpwuuds_epi32(__A, __B, __C),
       (__v16si)_mm512_setzero_si512());
diff --git a/clang/lib/Headers/avx10_2niintrin.h b/clang/lib/Headers/avx10_2niintrin.h
index 992be18..d5a66cf 100644
--- a/clang/lib/Headers/avx10_2niintrin.h
+++ b/clang/lib/Headers/avx10_2niintrin.h
@@ -253,7 +253,7 @@ _mm_mask_dpwsud_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_dpwsud_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
+_mm_maskz_dpwsud_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
   return (__m128i)__builtin_ia32_selectd_128(
       (__mmask8)__U, (__v4si)_mm_dpwsud_epi32(__A, __B, __C),
       (__v4si)_mm_setzero_si128());
@@ -266,7 +266,7 @@ _mm256_mask_dpwsud_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_dpwsud_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
+_mm256_maskz_dpwsud_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
   return (__m256i)__builtin_ia32_selectd_256(
       (__mmask8)__U, (__v8si)_mm256_dpwsud_epi32(__A, __B, __C),
       (__v8si)_mm256_setzero_si256());
@@ -279,7 +279,7 @@ _mm_mask_dpwsuds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_dpwsuds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
+_mm_maskz_dpwsuds_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
   return (__m128i)__builtin_ia32_selectd_128(
       (__mmask8)__U, (__v4si)_mm_dpwsuds_epi32(__A, __B, __C),
       (__v4si)_mm_setzero_si128());
@@ -292,7 +292,7 @@ _mm256_mask_dpwsuds_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpwsuds_epi32(
-    __m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
+    __mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
   return (__m256i)__builtin_ia32_selectd_256(
       (__mmask8)__U, (__v8si)_mm256_dpwsuds_epi32(__A, __B, __C),
       (__v8si)_mm256_setzero_si256());
@@ -305,7 +305,7 @@ _mm_mask_dpwusd_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_dpwusd_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
+_mm_maskz_dpwusd_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
   return (__m128i)__builtin_ia32_selectd_128(
       (__mmask8)__U, (__v4si)_mm_dpwusd_epi32(__A, __B, __C),
       (__v4si)_mm_setzero_si128());
@@ -318,7 +318,7 @@ _mm256_mask_dpwusd_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_dpwusd_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
+_mm256_maskz_dpwusd_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
   return (__m256i)__builtin_ia32_selectd_256(
       (__mmask8)__U, (__v8si)_mm256_dpwusd_epi32(__A, __B, __C),
       (__v8si)_mm256_setzero_si256());
@@ -331,7 +331,7 @@ _mm_mask_dpwusds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_dpwusds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
+_mm_maskz_dpwusds_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
   return (__m128i)__builtin_ia32_selectd_128(
       (__mmask8)__U, (__v4si)_mm_dpwusds_epi32(__A, __B, __C),
       (__v4si)_mm_setzero_si128());
@@ -344,7 +344,7 @@ _mm256_mask_dpwusds_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpwusds_epi32(
-    __m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
+    __mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
   return (__m256i)__builtin_ia32_selectd_256(
       (__mmask8)__U, (__v8si)_mm256_dpwusds_epi32(__A, __B, __C),
       (__v8si)_mm256_setzero_si256());
@@ -357,7 +357,7 @@ _mm_mask_dpwuud_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_dpwuud_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
+_mm_maskz_dpwuud_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
   return (__m128i)__builtin_ia32_selectd_128(
       (__mmask8)__U, (__v4si)_mm_dpwuud_epi32(__A, __B, __C),
       (__v4si)_mm_setzero_si128());
@@ -370,7 +370,7 @@ _mm256_mask_dpwuud_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_dpwuud_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
+_mm256_maskz_dpwuud_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
   return (__m256i)__builtin_ia32_selectd_256(
       (__mmask8)__U, (__v8si)_mm256_dpwuud_epi32(__A, __B, __C),
       (__v8si)_mm256_setzero_si256());
@@ -383,7 +383,7 @@ _mm_mask_dpwuuds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_dpwuuds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
+_mm_maskz_dpwuuds_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
   return (__m128i)__builtin_ia32_selectd_128(
       (__mmask8)__U, (__v4si)_mm_dpwuuds_epi32(__A, __B, __C),
       (__v4si)_mm_setzero_si128());
@@ -396,7 +396,7 @@ _mm256_mask_dpwuuds_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpwuuds_epi32(
-    __m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
+    __mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
   return (__m256i)__builtin_ia32_selectd_256(
       (__mmask8)__U, (__v8si)_mm256_dpwuuds_epi32(__A, __B, __C),
       (__v8si)_mm256_setzero_si256());
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
index e8ccccb..c877234 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
@@ -12,7 +12,7 @@
 namespace hlsl {
 namespace __detail {
 
-constexpr vector<uint, 4> d3d_color_to_ubyte4_impl(vector<float, 4> V) {
+constexpr int4 d3d_color_to_ubyte4_impl(float4 V) {
   // Use the same scaling factor used by FXC, and DXC for DXIL
   // (i.e., 255.001953)
   // https://github.com/microsoft/DirectXShaderCompiler/blob/070d0d5a2beacef9eeb51037a9b04665716fd6f3/lib/HLSL/HLOperationLower.cpp#L666C1-L697C2
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index 499a053..d9d87c8 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -418,7 +418,7 @@ const inline float4 lit(float NDotL, float NDotH, float M) {
 /// This function swizzles and scales components of the \a x parameter. Use this
 /// function to compensate for the lack of UBYTE4 support in some hardware.
 
-constexpr vector<uint, 4> D3DCOLORtoUBYTE4(vector<float, 4> V) {
+constexpr int4 D3DCOLORtoUBYTE4(float4 V) {
   return __detail::d3d_color_to_ubyte4_impl(V);
 }
 
diff --git a/clang/lib/Headers/opencl-c-base.h b/clang/lib/Headers/opencl-c-base.h
index 2b7f504..6206a34 100644
--- a/clang/lib/Headers/opencl-c-base.h
+++ b/clang/lib/Headers/opencl-c-base.h
@@ -697,7 +697,16 @@ template <typename _Tp> struct __remove_address_space<__constant _Tp> {
 #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_1_2)
 // OpenCL v1.2 s6.12.13, v2.0 s6.13.13 - printf
 
-int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2)));
+#ifdef __OPENCL_CPP_VERSION__
+#define CLINKAGE extern "C"
+#else
+#define CLINKAGE
+#endif
+
+CLINKAGE int printf(__constant const char *st, ...)
+    __attribute__((format(printf, 1, 2)));
+
+#undef CLINKAGE
 #endif
 
 #ifdef cl_intel_device_side_avc_motion_estimation
diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h
index e1e0fde..f65b4b3 100644
--- a/clang/lib/Headers/opencl-c.h
+++ b/clang/lib/Headers/opencl-c.h
@@ -18410,6 +18410,22 @@ intel_sub_group_avc_mce_convert_to_sic_result(
 #pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : end
 #endif // cl_intel_device_side_avc_motion_estimation
 
+#if defined(cl_intel_bfloat16_conversions)
+ushort __ovld intel_convert_bfloat16_as_ushort(float source);
+ushort2 __ovld intel_convert_bfloat162_as_ushort2(float2 source);
+ushort3 __ovld intel_convert_bfloat163_as_ushort3(float3 source);
+ushort4 __ovld intel_convert_bfloat164_as_ushort4(float4 source);
+ushort8 __ovld intel_convert_bfloat168_as_ushort8(float8 source);
+ushort16 __ovld intel_convert_bfloat1616_as_ushort16(float16 source);
+
+float __ovld intel_convert_as_bfloat16_float(ushort source);
+float2 __ovld intel_convert_as_bfloat162_float2(ushort2 source);
+float3 __ovld intel_convert_as_bfloat163_float3(ushort3 source);
+float4 __ovld intel_convert_as_bfloat164_float4(ushort4 source);
+float8 __ovld intel_convert_as_bfloat168_float8(ushort8 source);
+float16 __ovld intel_convert_as_bfloat1616_float16(ushort16 source);
+#endif // cl_intel_bfloat16_conversions
+
 #ifdef cl_amd_media_ops
 uint __ovld amd_bitalign(uint, uint, uint);
 uint2 __ovld amd_bitalign(uint2, uint2, uint2);
diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp
index db6a2bb..9b71486 100644
--- a/clang/lib/Interpreter/Interpreter.cpp
+++ b/clang/lib/Interpreter/Interpreter.cpp
@@ -761,10 +761,18 @@ Interpreter::getSymbolAddressFromLinkerName(llvm::StringRef Name) const {
 
 llvm::Error Interpreter::Undo(unsigned N) {
 
-  if (N > getEffectivePTUSize())
+  if (getEffectivePTUSize() == 0) {
     return llvm::make_error<llvm::StringError>("Operation failed. "
-                                               "Too many undos",
+                                               "No input left to undo",
                                                std::error_code());
+  } else if (N > getEffectivePTUSize()) {
+    return llvm::make_error<llvm::StringError>(
+        llvm::formatv(
+            "Operation failed. Wanted to undo {0} inputs, only have {1}.", N,
+            getEffectivePTUSize()),
+        std::error_code());
+  }
+
   for (unsigned I = 0; I < N; I++) {
     if (IncrExecutor) {
       if (llvm::Error Err = IncrExecutor->removeModule(PTUs.back()))
diff --git a/clang/lib/Interpreter/InterpreterValuePrinter.cpp b/clang/lib/Interpreter/InterpreterValuePrinter.cpp
index 34ffd62..0ea6274 100644
--- a/clang/lib/Interpreter/InterpreterValuePrinter.cpp
+++ b/clang/lib/Interpreter/InterpreterValuePrinter.cpp
@@ -106,7 +106,7 @@ static std::string EnumToString(const Value &V) {
   assert(EnumTy && "Fail to cast to enum type");
 
   EnumDecl *ED = EnumTy->getDecl();
-  uint64_t Data = V.getULongLong();
+  uint64_t Data = V.convertTo<uint64_t>();
   bool IsFirst = true;
   llvm::APSInt AP = Ctx.MakeIntValue(Data, DesugaredTy);
 
diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp
index a62508e..5b08d7f 100644
--- a/clang/lib/Lex/LiteralSupport.cpp
+++ b/clang/lib/Lex/LiteralSupport.cpp
@@ -1467,7 +1467,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
   if (s != PossibleNewDigitStart)
     DigitsBegin = PossibleNewDigitStart;
   else
-    IsSingleZero = (s == ThisTokEnd); // Is the only thing we've seen a 0?
+    IsSingleZero = (s == ThisTokBegin + 1);
 
   if (s == ThisTokEnd)
     return; // Done, simple octal number like 01234
diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
index 890567c..6f12ac8 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -1760,7 +1760,8 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
               Tok, *this, diag::err_feature_check_malformed);
           if (!II)
             return false;
-          else if (II->getBuiltinID() != 0) {
+          unsigned BuiltinID = II->getBuiltinID();
+          if (BuiltinID != 0) {
             switch (II->getBuiltinID()) {
             case Builtin::BI__builtin_cpu_is:
               return getTargetInfo().supportsCpuIs();
@@ -1774,8 +1775,11 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
               // usual allocation and deallocation functions. Required by libc++
               return 201802;
             default:
+              // __has_builtin should return false for aux builtins.
+              if (getBuiltinInfo().isAuxBuiltinID(BuiltinID))
+                return false;
               return Builtin::evaluateRequiredTargetFeatures(
-                  getBuiltinInfo().getRequiredFeatures(II->getBuiltinID()),
+                  getBuiltinInfo().getRequiredFeatures(BuiltinID),
                   getTargetInfo().getTargetOpts().FeatureMap);
             }
             return true;
diff --git a/clang/lib/Lex/Pragma.cpp b/clang/lib/Lex/Pragma.cpp
index 01c85e6..bba3c89 100644
--- a/clang/lib/Lex/Pragma.cpp
+++ b/clang/lib/Lex/Pragma.cpp
@@ -591,7 +591,8 @@ IdentifierInfo *Preprocessor::ParsePragmaPushOrPopMacro(Token &Tok) {
   }
 
   // Remember the macro string.
-  std::string StrVal = getSpelling(Tok);
+  Token StrTok = Tok;
+  std::string StrVal = getSpelling(StrTok);
 
   // Read the ')'.
   Lex(Tok);
@@ -604,6 +605,15 @@ IdentifierInfo *Preprocessor::ParsePragmaPushOrPopMacro(Token &Tok) {
   assert(StrVal[0] == '"' && StrVal[StrVal.size()-1] == '"' &&
          "Invalid string token!");
 
+  if (StrVal.size() <= 2) {
+    Diag(StrTok.getLocation(), diag::warn_pargma_push_pop_macro_empty_string)
+        << SourceRange(
+               StrTok.getLocation(),
+               StrTok.getLocation().getLocWithOffset(StrTok.getLength()))
+        << PragmaTok.getIdentifierInfo()->isStr("pop_macro");
+    return nullptr;
+  }
+
   // Create a Token from the string.
   Token MacroTok;
   MacroTok.startToken();
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index 893ef02..e47caeb 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -5695,11 +5695,10 @@ Parser::DeclGroupPtrTy Parser::ParseTopLevelStmtDecl() {
                                Scope::CompoundStmtScope);
   TopLevelStmtDecl *TLSD = Actions.ActOnStartTopLevelStmtDecl(getCurScope());
   StmtResult R = ParseStatementOrDeclaration(Stmts, SubStmtCtx);
+  Actions.ActOnFinishTopLevelStmtDecl(TLSD, R.get());
   if (!R.isUsable())
     R = Actions.ActOnNullStmt(Tok.getLocation());
 
-  Actions.ActOnFinishTopLevelStmtDecl(TLSD, R.get());
-
   if (Tok.is(tok::annot_repl_input_end) &&
       Tok.getAnnotationValue() != nullptr) {
     ConsumeAnnotationToken();
diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp
index 31392d1d..bc8841c 100644
--- a/clang/lib/Parse/ParseDeclCXX.cpp
+++ b/clang/lib/Parse/ParseDeclCXX.cpp
@@ -4940,9 +4940,8 @@ void Parser::ParseHLSLRootSignatureAttributeArgs(ParsedAttributes &Attrs) {
   // signature string and construct the in-memory elements
   if (!Found) {
     // Invoke the root signature parser to construct the in-memory constructs
-    SmallVector<hlsl::RootSignatureElement> RootElements;
-    hlsl::RootSignatureParser Parser(getLangOpts().HLSLRootSigVer, RootElements,
-                                     Signature, PP);
+    hlsl::RootSignatureParser Parser(getLangOpts().HLSLRootSigVer, Signature,
+                                     PP);
     if (Parser.parse()) {
       T.consumeClose();
       return;
@@ -4950,7 +4949,7 @@ void Parser::ParseHLSLRootSignatureAttributeArgs(ParsedAttributes &Attrs) {
 
     // Construct the declaration.
     Actions.HLSL().ActOnFinishRootSignatureDecl(RootSignatureLoc, DeclIdent,
-                                                RootElements);
+                                                Parser.getElements());
   }
 
   // Create the arg for the ParsedAttr
diff --git a/clang/lib/Parse/ParseHLSLRootSignature.cpp b/clang/lib/Parse/ParseHLSLRootSignature.cpp
index db9ed83..98dc458 100644
--- a/clang/lib/Parse/ParseHLSLRootSignature.cpp
+++ b/clang/lib/Parse/ParseHLSLRootSignature.cpp
@@ -27,11 +27,10 @@ static const TokenKind RootElementKeywords[] = {
 };
 
 RootSignatureParser::RootSignatureParser(
-    llvm::dxbc::RootSignatureVersion Version,
-    SmallVector<RootSignatureElement> &Elements, StringLiteral *Signature,
+    llvm::dxbc::RootSignatureVersion Version, StringLiteral *Signature,
     Preprocessor &PP)
-    : Version(Version), Elements(Elements), Signature(Signature),
-      Lexer(Signature->getString()), PP(PP), CurToken(0) {}
+    : Version(Version), Signature(Signature), Lexer(Signature->getString()),
+      PP(PP), CurToken(0) {}
 
 bool RootSignatureParser::parse() {
   // Iterate as many RootSignatureElements as possible, until we hit the
diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp
index d1400cb..35ad0b5 100644
--- a/clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -503,8 +503,12 @@ static bool areAllValuesNoReturn(const VarDecl *VD, const CFGBlock &VarBlk,
 
   TransferFunctions TF(VD);
   BackwardDataflowWorklist Worklist(*AC.getCFG(), AC);
+  llvm::DenseSet<const CFGBlock *> Visited;
   Worklist.enqueueBlock(&VarBlk);
   while (const CFGBlock *B = Worklist.dequeue()) {
+    if (Visited.contains(B))
+      continue;
+    Visited.insert(B);
     // First check the current block.
     for (CFGBlock::const_reverse_iterator ri = B->rbegin(), re = B->rend();
          ri != re; ++ri) {
@@ -2901,8 +2905,7 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings(
       .setAlwaysAdd(Stmt::UnaryOperatorClass);
   }
 
-  bool EnableLifetimeSafetyAnalysis = !Diags.isIgnored(
-      diag::warn_experimental_lifetime_safety_dummy_warning, D->getBeginLoc());
+  bool EnableLifetimeSafetyAnalysis = S.getLangOpts().EnableLifetimeSafety;
   // Install the logical handler.
   std::optional<LogicalErrorHandler> LEH;
   if (LogicalErrorHandler::hasActiveDiagnostics(Diags, D->getBeginLoc())) {
@@ -3029,8 +3032,8 @@ void clang::sema::AnalysisBasedWarnings::IssueWarnings(
   // TODO: Enable lifetime safety analysis for other languages once it is
   // stable.
   if (EnableLifetimeSafetyAnalysis && S.getLangOpts().CPlusPlus) {
-    if (CFG *cfg = AC.getCFG())
-      runLifetimeSafetyAnalysis(*cast<DeclContext>(D), *cfg, AC);
+    if (AC.getCFG())
+      lifetimes::runLifetimeSafetyAnalysis(AC);
   }
   // Check for violations of "called once" parameter properties.
   if (S.getLangOpts().ObjC && !S.getLangOpts().CPlusPlus &&
diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
index 56608e9..43a7f9e 100644
--- a/clang/lib/Sema/Sema.cpp
+++ b/clang/lib/Sema/Sema.cpp
@@ -1616,6 +1616,8 @@ void Sema::ActOnEndOfTranslationUnit() {
 
   if (!PP.isIncrementalProcessingEnabled())
     TUScope = nullptr;
+
+  checkExposure(Context.getTranslationUnitDecl());
 }
 
 
@@ -2249,16 +2251,15 @@ void Sema::checkTypeSupport(QualType Ty, SourceLocation Loc, ValueDecl *D) {
     }
 
     // Don't allow SVE types in functions without a SVE target.
-    if (Ty->isSVESizelessBuiltinType() && FD) {
+    if (Ty->isSVESizelessBuiltinType() && FD && !FD->getType().isNull()) {
       llvm::StringMap<bool> CallerFeatureMap;
       Context.getFunctionFeatureMap(CallerFeatureMap, FD);
       if (!Builtin::evaluateRequiredTargetFeatures("sve", CallerFeatureMap)) {
         if (!Builtin::evaluateRequiredTargetFeatures("sme", CallerFeatureMap))
           Diag(Loc, diag::err_sve_vector_in_non_sve_target) << Ty;
         else if (!IsArmStreamingFunction(FD,
-                                         /*IncludeLocallyStreaming=*/true)) {
+                                         /*IncludeLocallyStreaming=*/true))
           Diag(Loc, diag::err_sve_vector_in_non_streaming_function) << Ty;
-        }
       }
     }
 
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index bd603a9..e09c352 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -846,9 +846,9 @@ bool SemaARM::CheckARMCoprocessorImmediate(const TargetInfo &TI,
   return false;
 }
 
-bool SemaARM::CheckARMBuiltinExclusiveCall(unsigned BuiltinID,
-                                           CallExpr *TheCall,
-                                           unsigned MaxWidth) {
+bool SemaARM::CheckARMBuiltinExclusiveCall(const TargetInfo &TI,
+                                           unsigned BuiltinID,
+                                           CallExpr *TheCall) {
   assert((BuiltinID == ARM::BI__builtin_arm_ldrex ||
           BuiltinID == ARM::BI__builtin_arm_ldaex ||
           BuiltinID == ARM::BI__builtin_arm_strex ||
@@ -923,12 +923,56 @@ bool SemaARM::CheckARMBuiltinExclusiveCall(unsigned BuiltinID,
     return true;
   }
 
-  // But ARM doesn't have instructions to deal with 128-bit versions.
-  if (Context.getTypeSize(ValType) > MaxWidth) {
-    assert(MaxWidth == 64 && "Diagnostic unexpectedly inaccurate");
-    Diag(DRE->getBeginLoc(), diag::err_atomic_exclusive_builtin_pointer_size)
-        << PointerArg->getType() << PointerArg->getSourceRange();
-    return true;
+  // Check whether the size of the type can be handled atomically on this
+  // target.
+  if (!TI.getTriple().isAArch64()) {
+    unsigned Mask = TI.getARMLDREXMask();
+    unsigned Bits = Context.getTypeSize(ValType);
+    bool Supported =
+        (llvm::isPowerOf2_64(Bits)) && Bits >= 8 && (Mask & (Bits / 8));
+
+    if (!Supported) {
+      // Emit a diagnostic saying that this size isn't available. If _no_ size
+      // of exclusive access is supported on this target, we emit a diagnostic
+      // with special wording for that case, but otherwise, we emit
+      // err_atomic_exclusive_builtin_pointer_size and loop over `Mask` to
+      // control what subset of sizes it lists as legal.
+      if (Mask) {
+        auto D = Diag(DRE->getBeginLoc(),
+                      diag::err_atomic_exclusive_builtin_pointer_size)
+                 << PointerArg->getType();
+        bool Started = false;
+        for (unsigned Size = 1; Size <= 8; Size <<= 1) {
+          // For each of the sizes 1,2,4,8, pass two integers into the
+          // diagnostic. The first selects a separator from the previous
+          // number: 0 for no separator at all, 1 for a comma, 2 for " or "
+          // which appears before the final number in a list of more than one.
+          // The second integer just indicates whether we print this size in
+          // the message at all.
+          if (!(Mask & Size)) {
+            // This size isn't one of the supported ones, so emit no separator
+            // text and don't print the size itself.
+            D << 0 << 0;
+          } else {
+            // This size is supported, so print it, and an appropriate
+            // separator.
+            Mask &= ~Size;
+            if (!Started)
+              D << 0; // No separator if this is the first size we've printed
+            else if (Mask)
+              D << 1; // "," if there's still another size to come
+            else
+              D << 2; // " or " if the size we're about to print is the last
+            D << 1;   // print the size itself
+            Started = true;
+          }
+        }
+      } else {
+        Diag(DRE->getBeginLoc(),
+             diag::err_atomic_exclusive_builtin_pointer_size_none)
+            << PointerArg->getSourceRange();
+      }
+    }
   }
 
   switch (ValType.getObjCLifetime()) {
@@ -972,7 +1016,7 @@ bool SemaARM::CheckARMBuiltinFunctionCall(const TargetInfo &TI,
       BuiltinID == ARM::BI__builtin_arm_ldaex ||
       BuiltinID == ARM::BI__builtin_arm_strex ||
       BuiltinID == ARM::BI__builtin_arm_stlex) {
-    return CheckARMBuiltinExclusiveCall(BuiltinID, TheCall, 64);
+    return CheckARMBuiltinExclusiveCall(TI, BuiltinID, TheCall);
   }
 
   if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
@@ -1053,7 +1097,7 @@ bool SemaARM::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI,
       BuiltinID == AArch64::BI__builtin_arm_ldaex ||
       BuiltinID == AArch64::BI__builtin_arm_strex ||
       BuiltinID == AArch64::BI__builtin_arm_stlex) {
-    return CheckARMBuiltinExclusiveCall(BuiltinID, TheCall, 128);
+    return CheckARMBuiltinExclusiveCall(TI, BuiltinID, TheCall);
   }
 
   if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
@@ -1535,4 +1579,95 @@ bool SemaARM::areLaxCompatibleSveTypes(QualType FirstType,
          IsLaxCompatible(SecondType, FirstType);
 }
 
+bool SemaARM::checkTargetVersionAttr(const StringRef Param,
+                                     const SourceLocation Loc) {
+  using namespace DiagAttrParams;
+
+  llvm::SmallVector<StringRef, 8> Features;
+  Param.split(Features, '+');
+  for (StringRef Feat : Features) {
+    Feat = Feat.trim();
+    if (Feat == "default")
+      continue;
+    if (!getASTContext().getTargetInfo().validateCpuSupports(Feat))
+      return Diag(Loc, diag::warn_unsupported_target_attribute)
+             << Unsupported << None << Feat << TargetVersion;
+  }
+  return false;
+}
+
+bool SemaARM::checkTargetClonesAttr(
+    SmallVectorImpl<StringRef> &Params, SmallVectorImpl<SourceLocation> &Locs,
+    SmallVectorImpl<SmallString<64>> &NewParams) {
+  using namespace DiagAttrParams;
+
+  if (!getASTContext().getTargetInfo().hasFeature("fmv"))
+    return true;
+
+  assert(Params.size() == Locs.size() &&
+         "Mismatch between number of string parameters and locations");
+
+  bool HasDefault = false;
+  bool HasNonDefault = false;
+  for (unsigned I = 0, E = Params.size(); I < E; ++I) {
+    const StringRef Param = Params[I].trim();
+    const SourceLocation &Loc = Locs[I];
+
+    if (Param.empty())
+      return Diag(Loc, diag::warn_unsupported_target_attribute)
+             << Unsupported << None << "" << TargetClones;
+
+    if (Param == "default") {
+      if (HasDefault)
+        Diag(Loc, diag::warn_target_clone_duplicate_options);
+      else {
+        NewParams.push_back(Param);
+        HasDefault = true;
+      }
+      continue;
+    }
+
+    bool HasCodeGenImpact = false;
+    llvm::SmallVector<StringRef, 8> Features;
+    llvm::SmallVector<StringRef, 8> ValidFeatures;
+    Param.split(Features, '+');
+    for (StringRef Feat : Features) {
+      Feat = Feat.trim();
+      if (!getASTContext().getTargetInfo().validateCpuSupports(Feat)) {
+        Diag(Loc, diag::warn_unsupported_target_attribute)
+            << Unsupported << None << Feat << TargetClones;
+        continue;
+      }
+      if (getASTContext().getTargetInfo().doesFeatureAffectCodeGen(Feat))
+        HasCodeGenImpact = true;
+      ValidFeatures.push_back(Feat);
+    }
+
+    // Ignore features that don't impact code generation.
+    if (!HasCodeGenImpact) {
+      Diag(Loc, diag::warn_target_clone_no_impact_options);
+      continue;
+    }
+
+    if (ValidFeatures.empty())
+      continue;
+
+    // Canonicalize attribute parameter.
+    llvm::sort(ValidFeatures);
+    SmallString<64> NewParam(llvm::join(ValidFeatures, "+"));
+    if (llvm::is_contained(NewParams, NewParam)) {
+      Diag(Loc, diag::warn_target_clone_duplicate_options);
+      continue;
+    }
+
+    // Valid non-default argument.
+    NewParams.push_back(NewParam);
+    HasNonDefault = true;
+  }
+  if (!HasNonDefault)
+    return true;
+
+  return false;
+}
+
 } // namespace clang
diff --git a/clang/lib/Sema/SemaAvailability.cpp b/clang/lib/Sema/SemaAvailability.cpp
index 8c6a173..68a698f 100644
--- a/clang/lib/Sema/SemaAvailability.cpp
+++ b/clang/lib/Sema/SemaAvailability.cpp
@@ -547,6 +547,12 @@ static void DoEmitAvailabilityWarning(Sema &S, AvailabilityResult K,
     return;
   }
   case AR_Deprecated:
+    // Suppress -Wdeprecated-declarations in implicit
+    // functions.
+    if (const auto *FD = dyn_cast_or_null<FunctionDecl>(S.getCurFunctionDecl());
+        FD && FD->isImplicit())
+      return;
+
     if (ObjCPropertyAccess)
       diag = diag::warn_property_method_deprecated;
     else if (S.currentEvaluationContext().IsCaseExpr)
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 5e523fe..bc87611 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3013,6 +3013,8 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
   case Builtin::BI__builtin_elementwise_maxnum:
   case Builtin::BI__builtin_elementwise_minimum:
   case Builtin::BI__builtin_elementwise_maximum:
+  case Builtin::BI__builtin_elementwise_minimumnum:
+  case Builtin::BI__builtin_elementwise_maximumnum:
   case Builtin::BI__builtin_elementwise_atan2:
   case Builtin::BI__builtin_elementwise_fmod:
   case Builtin::BI__builtin_elementwise_pow:
@@ -15891,9 +15893,7 @@ ExprResult Sema::BuiltinMatrixTranspose(CallExpr *TheCall,
 // Get and verify the matrix dimensions.
 static std::optional<unsigned>
 getAndVerifyMatrixDimension(Expr *Expr, StringRef Name, Sema &S) {
-  SourceLocation ErrorPos;
-  std::optional<llvm::APSInt> Value =
-      Expr->getIntegerConstantExpr(S.Context, &ErrorPos);
+  std::optional<llvm::APSInt> Value = Expr->getIntegerConstantExpr(S.Context);
   if (!Value) {
     S.Diag(Expr->getBeginLoc(), diag::err_builtin_matrix_scalar_unsigned_arg)
         << Name;
diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp
index a43ac9e..0de5580 100644
--- a/clang/lib/Sema/SemaCodeComplete.cpp
+++ b/clang/lib/Sema/SemaCodeComplete.cpp
@@ -4034,6 +4034,14 @@ static void AddOverloadParameterChunks(
       return;
     }
 
+    // C++23 introduces an explicit object parameter, a.k.a. "deducing this"
+    // Skip it for autocomplete and treat the next parameter as the first
+    // parameter
+    if (Function && FirstParameter &&
+        Function->getParamDecl(P)->isExplicitObjectParameter()) {
+      continue;
+    }
+
     if (FirstParameter)
       FirstParameter = false;
     else
diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp
index 5205ca0b..da85959 100644
--- a/clang/lib/Sema/SemaConcept.cpp
+++ b/clang/lib/Sema/SemaConcept.cpp
@@ -588,6 +588,9 @@ static bool CheckConstraintSatisfaction(
     return true;
 
   for (const AssociatedConstraint &AC : AssociatedConstraints) {
+    if (AC.isNull())
+      return true;
+
     Sema::ArgPackSubstIndexRAII _(S, AC.ArgPackSubstIndex);
     ExprResult Res = calculateConstraintSatisfaction(
         S, Template, TemplateIDRange.getBegin(), TemplateArgsLists,
@@ -1102,10 +1105,6 @@ static bool CheckFunctionConstraintsWithoutInstantiation(
   }
 
   Sema::ContextRAII SavedContext(SemaRef, FD);
-  std::optional<Sema::CXXThisScopeRAII> ThisScope;
-  if (auto *Method = dyn_cast<CXXMethodDecl>(FD))
-    ThisScope.emplace(SemaRef, /*Record=*/Method->getParent(),
-                      /*ThisQuals=*/Method->getMethodQualifiers());
   return SemaRef.CheckConstraintSatisfaction(
       Template, TemplateAC, MLTAL, PointOfInstantiation, Satisfaction);
 }
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 14403e6..c7e7507 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -3267,6 +3267,14 @@ void Sema::mergeDeclAttributes(NamedDecl *New, Decl *Old,
     if (isa<UsedAttr>(I) || isa<RetainAttr>(I))
       continue;
 
+    if (isa<InferredNoReturnAttr>(I)) {
+      if (auto *FD = dyn_cast<FunctionDecl>(New)) {
+        if (FD->getTemplateSpecializationKind() == TSK_ExplicitSpecialization)
+          continue; // Don't propagate inferred noreturn attributes to explicit
+                    // specializations.
+      }
+    }
+
     if (mergeDeclAttribute(*this, New, I, LocalAMK))
       foundAny = true;
   }
@@ -12578,9 +12586,9 @@ static bool isDefaultStdCall(FunctionDecl *FD, Sema &S) {
   if (FD->getName() == "main" || FD->getName() == "wmain")
     return false;
 
-  // Default calling convention for MinGW is __cdecl
+  // Default calling convention for MinGW and Cygwin is __cdecl
   const llvm::Triple &T = S.Context.getTargetInfo().getTriple();
-  if (T.isWindowsGNUEnvironment())
+  if (T.isOSCygMing())
     return false;
 
   // Default calling convention for WinMain, wWinMain and DllMain
@@ -18476,6 +18484,10 @@ CreateNewDecl:
   // record.
   AddPushedVisibilityAttribute(New);
 
+  // If this is not a definition, process API notes for it now.
+  if (TUK != TagUseKind::Definition)
+    ProcessAPINotes(New);
+
   if (isMemberSpecialization && !New->isInvalidDecl())
     CompleteMemberSpecialization(New, Previous);
 
@@ -20569,7 +20581,8 @@ TopLevelStmtDecl *Sema::ActOnStartTopLevelStmtDecl(Scope *S) {
 }
 
 void Sema::ActOnFinishTopLevelStmtDecl(TopLevelStmtDecl *D, Stmt *Statement) {
-  D->setStmt(Statement);
+  if (Statement)
+    D->setStmt(Statement);
   PopCompoundScope();
   PopFunctionScopeInfo();
   PopDeclContext();
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 78f4804..16b18bc 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -1970,6 +1970,13 @@ void clang::inferNoReturnAttr(Sema &S, const Decl *D) {
   if (!FD)
     return;
 
+  // Skip explicit specializations here as they may have
+  // a user-provided definition that may deliberately differ from the primary
+  // template. If an explicit specialization truly never returns, the user
+  // should explicitly mark it with [[noreturn]].
+  if (FD->getTemplateSpecializationKind() == TSK_ExplicitSpecialization)
+    return;
+
   auto *NonConstFD = const_cast<FunctionDecl *>(FD);
   DiagnosticsEngine &Diags = S.getDiagnostics();
   if (Diags.isIgnored(diag::warn_falloff_nonvoid, FD->getLocation()) &&
@@ -2034,7 +2041,8 @@ bool Sema::CheckAttrTarget(const ParsedAttr &AL) {
   // Check whether the attribute is valid on the current target.
   if (!AL.existsInTarget(Context.getTargetInfo())) {
     if (AL.isRegularKeywordAttribute())
-      Diag(AL.getLoc(), diag::err_keyword_not_supported_on_target);
+      Diag(AL.getLoc(), diag::err_keyword_not_supported_on_target)
+          << AL << AL.getRange();
     else
       DiagnoseUnknownAttribute(AL);
     AL.setInvalid();
@@ -3254,9 +3262,8 @@ static void handleCodeSegAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
 }
 
 bool Sema::checkTargetAttr(SourceLocation LiteralLoc, StringRef AttrStr) {
-  enum FirstParam { Unsupported, Duplicate, Unknown };
-  enum SecondParam { None, CPU, Tune };
-  enum ThirdParam { Target, TargetClones };
+  using namespace DiagAttrParams;
+
   if (AttrStr.contains("fpmath="))
     return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
            << Unsupported << None << "fpmath=" << Target;
@@ -3331,80 +3338,22 @@ bool Sema::checkTargetAttr(SourceLocation LiteralLoc, StringRef AttrStr) {
   return false;
 }
 
-bool Sema::checkTargetVersionAttr(SourceLocation LiteralLoc, Decl *D,
-                                  StringRef AttrStr) {
-  enum FirstParam { Unsupported };
-  enum SecondParam { None };
-  enum ThirdParam { Target, TargetClones, TargetVersion };
-  llvm::SmallVector<StringRef, 8> Features;
-  if (Context.getTargetInfo().getTriple().isRISCV()) {
-    llvm::SmallVector<StringRef, 8> AttrStrs;
-    AttrStr.split(AttrStrs, ';');
-
-    bool HasArch = false;
-    bool HasPriority = false;
-    bool HasDefault = false;
-    bool DuplicateAttr = false;
-    for (auto &AttrStr : AttrStrs) {
-      // Only support arch=+ext,... syntax.
-      if (AttrStr.starts_with("arch=+")) {
-        if (HasArch)
-          DuplicateAttr = true;
-        HasArch = true;
-        ParsedTargetAttr TargetAttr =
-            Context.getTargetInfo().parseTargetAttr(AttrStr);
-
-        if (TargetAttr.Features.empty() ||
-            llvm::any_of(TargetAttr.Features, [&](const StringRef Ext) {
-              return !RISCV().isValidFMVExtension(Ext);
-            }))
-          return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
-                 << Unsupported << None << AttrStr << TargetVersion;
-      } else if (AttrStr.starts_with("default")) {
-        if (HasDefault)
-          DuplicateAttr = true;
-        HasDefault = true;
-      } else if (AttrStr.consume_front("priority=")) {
-        if (HasPriority)
-          DuplicateAttr = true;
-        HasPriority = true;
-        unsigned Digit;
-        if (AttrStr.getAsInteger(0, Digit))
-          return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
-                 << Unsupported << None << AttrStr << TargetVersion;
-      } else {
-        return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
-               << Unsupported << None << AttrStr << TargetVersion;
-      }
-    }
-
-    if (((HasPriority || HasArch) && HasDefault) || DuplicateAttr ||
-        (HasPriority && !HasArch))
-      return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
-             << Unsupported << None << AttrStr << TargetVersion;
+static void handleTargetVersionAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
+  StringRef Param;
+  SourceLocation Loc;
+  if (!S.checkStringLiteralArgumentAttr(AL, 0, Param, &Loc))
+    return;
 
-    return false;
-  }
-  AttrStr.split(Features, "+");
-  for (auto &CurFeature : Features) {
-    CurFeature = CurFeature.trim();
-    if (CurFeature == "default")
-      continue;
-    if (!Context.getTargetInfo().validateCpuSupports(CurFeature))
-      return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
-             << Unsupported << None << CurFeature << TargetVersion;
+  if (S.Context.getTargetInfo().getTriple().isAArch64()) {
+    if (S.ARM().checkTargetVersionAttr(Param, Loc))
+      return;
+  } else if (S.Context.getTargetInfo().getTriple().isRISCV()) {
+    if (S.RISCV().checkTargetVersionAttr(Param, Loc))
+      return;
   }
-  return false;
-}
 
-static void handleTargetVersionAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
-  StringRef Str;
-  SourceLocation LiteralLoc;
-  if (!S.checkStringLiteralArgumentAttr(AL, 0, Str, &LiteralLoc) ||
-      S.checkTargetVersionAttr(LiteralLoc, D, Str))
-    return;
   TargetVersionAttr *NewAttr =
-      ::new (S.Context) TargetVersionAttr(S.Context, AL, Str);
+      ::new (S.Context) TargetVersionAttr(S.Context, AL, Param);
   D->addAttr(NewAttr);
 }
 
@@ -3419,158 +3368,7 @@ static void handleTargetAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   D->addAttr(NewAttr);
 }
 
-bool Sema::checkTargetClonesAttrString(
-    SourceLocation LiteralLoc, StringRef Str, const StringLiteral *Literal,
-    Decl *D, bool &HasDefault, bool &HasCommas, bool &HasNotDefault,
-    SmallVectorImpl<SmallString<64>> &StringsBuffer) {
-  enum FirstParam { Unsupported, Duplicate, Unknown };
-  enum SecondParam { None, CPU, Tune };
-  enum ThirdParam { Target, TargetClones };
-  HasCommas = HasCommas || Str.contains(',');
-  const TargetInfo &TInfo = Context.getTargetInfo();
-  // Warn on empty at the beginning of a string.
-  if (Str.size() == 0)
-    return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
-           << Unsupported << None << "" << TargetClones;
-
-  std::pair<StringRef, StringRef> Parts = {{}, Str};
-  while (!Parts.second.empty()) {
-    Parts = Parts.second.split(',');
-    StringRef Cur = Parts.first.trim();
-    SourceLocation CurLoc =
-        Literal->getLocationOfByte(Cur.data() - Literal->getString().data(),
-                                   getSourceManager(), getLangOpts(), TInfo);
-
-    bool DefaultIsDupe = false;
-    bool HasCodeGenImpact = false;
-    if (Cur.empty())
-      return Diag(CurLoc, diag::warn_unsupported_target_attribute)
-             << Unsupported << None << "" << TargetClones;
-
-    if (TInfo.getTriple().isAArch64()) {
-      // AArch64 target clones specific
-      if (Cur == "default") {
-        DefaultIsDupe = HasDefault;
-        HasDefault = true;
-        if (llvm::is_contained(StringsBuffer, Cur) || DefaultIsDupe)
-          Diag(CurLoc, diag::warn_target_clone_duplicate_options);
-        else
-          StringsBuffer.push_back(Cur);
-      } else {
-        std::pair<StringRef, StringRef> CurParts = {{}, Cur};
-        llvm::SmallVector<StringRef, 8> CurFeatures;
-        while (!CurParts.second.empty()) {
-          CurParts = CurParts.second.split('+');
-          StringRef CurFeature = CurParts.first.trim();
-          if (!TInfo.validateCpuSupports(CurFeature)) {
-            Diag(CurLoc, diag::warn_unsupported_target_attribute)
-                << Unsupported << None << CurFeature << TargetClones;
-            continue;
-          }
-          if (TInfo.doesFeatureAffectCodeGen(CurFeature))
-            HasCodeGenImpact = true;
-          CurFeatures.push_back(CurFeature);
-        }
-        // Canonize TargetClones Attributes
-        llvm::sort(CurFeatures);
-        SmallString<64> Res;
-        for (auto &CurFeat : CurFeatures) {
-          if (!Res.empty())
-            Res.append("+");
-          Res.append(CurFeat);
-        }
-        if (llvm::is_contained(StringsBuffer, Res) || DefaultIsDupe)
-          Diag(CurLoc, diag::warn_target_clone_duplicate_options);
-        else if (!HasCodeGenImpact)
-          // Ignore features in target_clone attribute that don't impact
-          // code generation
-          Diag(CurLoc, diag::warn_target_clone_no_impact_options);
-        else if (!Res.empty()) {
-          StringsBuffer.push_back(Res);
-          HasNotDefault = true;
-        }
-      }
-    } else if (TInfo.getTriple().isRISCV()) {
-      // Suppress warn_target_clone_mixed_values
-      HasCommas = false;
-
-      // Cur is split's parts of Str. RISC-V uses Str directly,
-      // so skip when encountered more than once.
-      if (!Str.starts_with(Cur))
-        continue;
-
-      llvm::SmallVector<StringRef, 8> AttrStrs;
-      Str.split(AttrStrs, ";");
-
-      bool IsPriority = false;
-      bool IsDefault = false;
-      for (auto &AttrStr : AttrStrs) {
-        // Only support arch=+ext,... syntax.
-        if (AttrStr.starts_with("arch=+")) {
-          ParsedTargetAttr TargetAttr =
-              Context.getTargetInfo().parseTargetAttr(AttrStr);
-
-          if (TargetAttr.Features.empty() ||
-              llvm::any_of(TargetAttr.Features, [&](const StringRef Ext) {
-                return !RISCV().isValidFMVExtension(Ext);
-              }))
-            return Diag(CurLoc, diag::warn_unsupported_target_attribute)
-                   << Unsupported << None << Str << TargetClones;
-        } else if (AttrStr.starts_with("default")) {
-          IsDefault = true;
-          DefaultIsDupe = HasDefault;
-          HasDefault = true;
-        } else if (AttrStr.consume_front("priority=")) {
-          IsPriority = true;
-          unsigned Digit;
-          if (AttrStr.getAsInteger(0, Digit))
-            return Diag(CurLoc, diag::warn_unsupported_target_attribute)
-                   << Unsupported << None << Str << TargetClones;
-        } else {
-          return Diag(CurLoc, diag::warn_unsupported_target_attribute)
-                 << Unsupported << None << Str << TargetClones;
-        }
-      }
-
-      if (IsPriority && IsDefault)
-        return Diag(CurLoc, diag::warn_unsupported_target_attribute)
-               << Unsupported << None << Str << TargetClones;
-
-      if (llvm::is_contained(StringsBuffer, Str) || DefaultIsDupe)
-        Diag(CurLoc, diag::warn_target_clone_duplicate_options);
-      StringsBuffer.push_back(Str);
-    } else {
-      // Other targets ( currently X86 )
-      if (Cur.starts_with("arch=")) {
-        if (!Context.getTargetInfo().isValidCPUName(
-                Cur.drop_front(sizeof("arch=") - 1)))
-          return Diag(CurLoc, diag::warn_unsupported_target_attribute)
-                 << Unsupported << CPU << Cur.drop_front(sizeof("arch=") - 1)
-                 << TargetClones;
-      } else if (Cur == "default") {
-        DefaultIsDupe = HasDefault;
-        HasDefault = true;
-      } else if (!Context.getTargetInfo().isValidFeatureName(Cur) ||
-                 Context.getTargetInfo().getFMVPriority(Cur) == 0)
-        return Diag(CurLoc, diag::warn_unsupported_target_attribute)
-               << Unsupported << None << Cur << TargetClones;
-      if (llvm::is_contained(StringsBuffer, Cur) || DefaultIsDupe)
-        Diag(CurLoc, diag::warn_target_clone_duplicate_options);
-      // Note: Add even if there are duplicates, since it changes name mangling.
-      StringsBuffer.push_back(Cur);
-    }
-  }
-  if (Str.rtrim().ends_with(","))
-    return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
-           << Unsupported << None << "" << TargetClones;
-  return false;
-}
-
 static void handleTargetClonesAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
-  if (S.Context.getTargetInfo().getTriple().isAArch64() &&
-      !S.Context.getTargetInfo().hasFeature("fmv"))
-    return;
-
   // Ensure we don't combine these with themselves, since that causes some
   // confusing behavior.
   if (const auto *Other = D->getAttr<TargetClonesAttr>()) {
@@ -3581,31 +3379,6 @@ static void handleTargetClonesAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   if (checkAttrMutualExclusion<TargetClonesAttr>(S, D, AL))
     return;
 
-  SmallVector<StringRef, 2> Strings;
-  SmallVector<SmallString<64>, 2> StringsBuffer;
-  bool HasCommas = false, HasDefault = false, HasNotDefault = false;
-
-  for (unsigned I = 0, E = AL.getNumArgs(); I != E; ++I) {
-    StringRef CurStr;
-    SourceLocation LiteralLoc;
-    if (!S.checkStringLiteralArgumentAttr(AL, I, CurStr, &LiteralLoc) ||
-        S.checkTargetClonesAttrString(
-            LiteralLoc, CurStr,
-            cast<StringLiteral>(AL.getArgAsExpr(I)->IgnoreParenCasts()), D,
-            HasDefault, HasCommas, HasNotDefault, StringsBuffer))
-      return;
-  }
-  for (auto &SmallStr : StringsBuffer)
-    Strings.push_back(SmallStr.str());
-
-  if (HasCommas && AL.getNumArgs() > 1)
-    S.Diag(AL.getLoc(), diag::warn_target_clone_mixed_values);
-
-  if (!HasDefault && !S.Context.getTargetInfo().getTriple().isAArch64()) {
-    S.Diag(AL.getLoc(), diag::err_target_clone_must_have_default);
-    return;
-  }
-
   // FIXME: We could probably figure out how to get this to work for lambdas
   // someday.
   if (const auto *MD = dyn_cast<CXXMethodDecl>(D)) {
@@ -3617,13 +3390,34 @@ static void handleTargetClonesAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
     }
   }
 
-  // No multiversion if we have default version only.
-  if (S.Context.getTargetInfo().getTriple().isAArch64() && !HasNotDefault)
-    return;
+  SmallVector<StringRef, 2> Params;
+  SmallVector<SourceLocation, 2> Locations;
+  for (unsigned I = 0, E = AL.getNumArgs(); I != E; ++I) {
+    StringRef Param;
+    SourceLocation Loc;
+    if (!S.checkStringLiteralArgumentAttr(AL, I, Param, &Loc))
+      return;
+    Params.push_back(Param);
+    Locations.push_back(Loc);
+  }
+
+  SmallVector<SmallString<64>, 2> NewParams;
+  if (S.Context.getTargetInfo().getTriple().isAArch64()) {
+    if (S.ARM().checkTargetClonesAttr(Params, Locations, NewParams))
+      return;
+  } else if (S.Context.getTargetInfo().getTriple().isRISCV()) {
+    if (S.RISCV().checkTargetClonesAttr(Params, Locations, NewParams))
+      return;
+  } else if (S.Context.getTargetInfo().getTriple().isX86()) {
+    if (S.X86().checkTargetClonesAttr(Params, Locations, NewParams))
+      return;
+  }
+  Params.clear();
+  for (auto &SmallStr : NewParams)
+    Params.push_back(SmallStr.str());
 
-  cast<FunctionDecl>(D)->setIsMultiVersion();
   TargetClonesAttr *NewAttr = ::new (S.Context)
-      TargetClonesAttr(S.Context, AL, Strings.data(), Strings.size());
+      TargetClonesAttr(S.Context, AL, Params.data(), Params.size());
   D->addAttr(NewAttr);
 }
 
@@ -5011,10 +4805,10 @@ void Sema::AddModeAttr(Decl *D, const AttributeCommonInfo &CI,
 
 static void handleNonStringAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   // This only applies to fields and variable declarations which have an array
-  // type.
+  // type or pointer type, with character elements.
   QualType QT = cast<ValueDecl>(D)->getType();
-  if (!QT->isArrayType() ||
-      !QT->getBaseElementTypeUnsafe()->isAnyCharacterType()) {
+  if ((!QT->isArrayType() && !QT->isPointerType()) ||
+      !QT->getPointeeOrArrayElementType()->isAnyCharacterType()) {
     S.Diag(D->getBeginLoc(), diag::warn_attribute_non_character_array)
         << AL << AL.isRegularKeywordAttribute() << QT << AL.getRange();
     return;
diff --git a/clang/lib/Sema/SemaModule.cpp b/clang/lib/Sema/SemaModule.cpp
index 7c982bc..b137549 100644
--- a/clang/lib/Sema/SemaModule.cpp
+++ b/clang/lib/Sema/SemaModule.cpp
@@ -13,6 +13,7 @@
 
 #include "clang/AST/ASTConsumer.h"
 #include "clang/AST/ASTMutationListener.h"
+#include "clang/AST/DynamicRecursiveASTVisitor.h"
 #include "clang/Lex/HeaderSearch.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Sema/ParsedAttr.h"
@@ -485,6 +486,7 @@ Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc,
   // implementation unit importing its interface).  Make this module visible
   // and return the import decl to be added to the current TU.
   if (Interface) {
+    HadImportedNamedModules = true;
 
     makeTransitiveImportsVisible(getASTContext(), VisibleModules, Interface,
                                  Mod, ModuleLoc,
@@ -728,6 +730,8 @@ DeclResult Sema::ActOnModuleImport(SourceLocation StartLoc,
       getCurrentModule()->Imports.insert(Mod);
   }
 
+  HadImportedNamedModules = true;
+
   return Import;
 }
 
@@ -1102,3 +1106,467 @@ bool Sema::isCurrentModulePurview() const {
     return false;
   }
 }
+
+//===----------------------------------------------------------------------===//
+// Checking Exposure in modules                                               //
+//===----------------------------------------------------------------------===//
+
+namespace {
+class ExposureChecker {
+public:
+  ExposureChecker(Sema &S) : SemaRef(S) {}
+
+  bool checkExposure(const VarDecl *D, bool Diag);
+  bool checkExposure(const CXXRecordDecl *D, bool Diag);
+  bool checkExposure(const Stmt *S, bool Diag);
+  bool checkExposure(const FunctionDecl *D, bool Diag);
+  bool checkExposure(const NamedDecl *D, bool Diag);
+  void checkExposureInContext(const DeclContext *DC);
+  bool isExposureCandidate(const NamedDecl *D);
+
+  bool isTULocal(QualType Ty);
+  bool isTULocal(const NamedDecl *ND);
+  bool isTULocal(const Expr *E);
+
+  Sema &SemaRef;
+
+private:
+  llvm::DenseSet<const NamedDecl *> ExposureSet;
+  llvm::DenseSet<const NamedDecl *> KnownNonExposureSet;
+};
+
+bool ExposureChecker::isTULocal(QualType Ty) {
+  // [basic.link]p15:
+  // An entity is TU-local if it is
+  // - a type, type alias, namespace, namespace alias, function, variable, or
+  // template that
+  //   -- has internal linkage, or
+  return Ty->getLinkage() == Linkage::Internal;
+
+  // TODO:
+  // [basic.link]p15.2:
+  //   a type with no name that is defined outside a class-specifier, function
+  //   body, or initializer or is introduced by a defining-type-specifier that
+  //   is used to declare only TU-local entities,
+}
+
+bool ExposureChecker::isTULocal(const NamedDecl *D) {
+  if (!D)
+    return false;
+
+  // [basic.link]p15:
+  // An entity is TU-local if it is
+  // - a type, type alias, namespace, namespace alias, function, variable, or
+  // template that
+  //   -- has internal linkage, or
+  if (D->getLinkageInternal() == Linkage::Internal)
+    return true;
+
+  if (D->isInAnonymousNamespace())
+    return true;
+
+  // [basic.link]p15.1.2:
+  // does not have a name with linkage and is declared, or introduced by a
+  // lambda-expression, within the definition of a TU-local entity,
+  if (D->getLinkageInternal() == Linkage::None)
+    if (auto *ND = dyn_cast<NamedDecl>(D->getDeclContext());
+        ND && isTULocal(ND))
+      return true;
+
+  // [basic.link]p15.3, p15.4:
+  // - a specialization of a TU-local template,
+  // - a specialization of a template with any TU-local template argument, or
+  ArrayRef<TemplateArgument> TemplateArgs;
+  NamedDecl *PrimaryTemplate = nullptr;
+  if (auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(D)) {
+    TemplateArgs = CTSD->getTemplateArgs().asArray();
+    PrimaryTemplate = CTSD->getSpecializedTemplate();
+    if (isTULocal(PrimaryTemplate))
+      return true;
+  } else if (auto *VTSD = dyn_cast<VarTemplateSpecializationDecl>(D)) {
+    TemplateArgs = VTSD->getTemplateArgs().asArray();
+    PrimaryTemplate = VTSD->getSpecializedTemplate();
+    if (isTULocal(PrimaryTemplate))
+      return true;
+  } else if (auto *FD = dyn_cast<FunctionDecl>(D)) {
+    if (auto *TAList = FD->getTemplateSpecializationArgs())
+      TemplateArgs = TAList->asArray();
+
+    PrimaryTemplate = FD->getPrimaryTemplate();
+    if (isTULocal(PrimaryTemplate))
+      return true;
+  }
+
+  if (!PrimaryTemplate)
+    // Following off, we only check for specializations.
+    return false;
+
+  if (KnownNonExposureSet.count(D))
+    return false;
+
+  for (auto &TA : TemplateArgs) {
+    switch (TA.getKind()) {
+    case TemplateArgument::Type:
+      if (isTULocal(TA.getAsType()))
+        return true;
+      break;
+    case TemplateArgument::Declaration:
+      if (isTULocal(TA.getAsDecl()))
+        return true;
+      break;
+    default:
+      break;
+    }
+  }
+
+  // [basic.link]p15.5
+  // - a specialization of a template whose (possibly instantiated) declaration
+  // is an exposure.
+  if (checkExposure(PrimaryTemplate, /*Diag=*/false))
+    return true;
+
+  // Avoid calling checkExposure again since it is expensive.
+  KnownNonExposureSet.insert(D);
+  return false;
+}
+
+bool ExposureChecker::isTULocal(const Expr *E) {
+  if (!E)
+    return false;
+
+  // [basic.link]p16:
+  //    A value or object is TU-local if either
+  //    - it is of TU-local type,
+  if (isTULocal(E->getType()))
+    return true;
+
+  E = E->IgnoreParenImpCasts();
+  // [basic.link]p16.2:
+  //   - it is, or is a pointer to, a TU-local function or the object associated
+  //   with a TU-local variable,
+  //  - it is an object of class or array type and any of its subobjects or any
+  //  of the objects or functions to which its non-static data members of
+  //  reference type refer is TU-local and is usable in constant expressions, or
+  // FIXME: But how can we know the value of pointers or arrays at compile time?
+  if (const auto *DRE = dyn_cast<DeclRefExpr>(E)) {
+    if (auto *FD = dyn_cast_or_null<FunctionDecl>(DRE->getFoundDecl()))
+      return isTULocal(FD);
+    else if (auto *VD = dyn_cast_or_null<VarDecl>(DRE->getFoundDecl()))
+      return isTULocal(VD);
+    else if (auto *RD = dyn_cast_or_null<CXXRecordDecl>(DRE->getFoundDecl()))
+      return isTULocal(RD);
+  }
+
+  // TODO:
+  // [basic.link]p16.4:
+  //  it is a reflection value that represents...
+
+  return false;
+}
+
+bool ExposureChecker::isExposureCandidate(const NamedDecl *D) {
+  if (!D)
+    return false;
+
+  // [basic.link]p17:
+  //   If a (possibly instantiated) declaration of, or a deduction guide for,
+  //   a non-TU-local entity in a module interface unit
+  //   (outside the private-module-fragment, if any) or
+  //   module partition is an exposure, the program is ill-formed.
+  Module *M = D->getOwningModule();
+  if (!M || !M->isInterfaceOrPartition())
+    return false;
+
+  if (D->isImplicit())
+    return false;
+
+  // [basic.link]p14:
+  //      A declaration is an exposure if it either names a TU-local entity
+  //      (defined below), ignoring:
+  //      ...
+  //      - friend declarations in a class definition
+  if (D->getFriendObjectKind() &&
+      isa<CXXRecordDecl>(D->getLexicalDeclContext()))
+    return false;
+
+  return true;
+}
+
+bool ExposureChecker::checkExposure(const NamedDecl *D, bool Diag) {
+  if (!isExposureCandidate(D))
+    return false;
+
+  if (auto *FD = dyn_cast<FunctionDecl>(D))
+    return checkExposure(FD, Diag);
+  if (auto *FTD = dyn_cast<FunctionTemplateDecl>(D))
+    return checkExposure(FTD->getTemplatedDecl(), Diag);
+
+  if (auto *VD = dyn_cast<VarDecl>(D))
+    return checkExposure(VD, Diag);
+  if (auto *VTD = dyn_cast<VarTemplateDecl>(D))
+    return checkExposure(VTD->getTemplatedDecl(), Diag);
+
+  if (auto *RD = dyn_cast<CXXRecordDecl>(D))
+    return checkExposure(RD, Diag);
+
+  if (auto *CTD = dyn_cast<ClassTemplateDecl>(D))
+    return checkExposure(CTD->getTemplatedDecl(), Diag);
+
+  return false;
+}
+
+bool ExposureChecker::checkExposure(const FunctionDecl *FD, bool Diag) {
+  bool IsExposure = false;
+  if (isTULocal(FD->getReturnType())) {
+    IsExposure = true;
+    if (Diag)
+      SemaRef.Diag(FD->getReturnTypeSourceRange().getBegin(),
+                   diag::warn_exposure)
+          << FD->getReturnType();
+  }
+
+  for (ParmVarDecl *Parms : FD->parameters())
+    if (isTULocal(Parms->getType())) {
+      IsExposure = true;
+      if (Diag)
+        SemaRef.Diag(Parms->getLocation(), diag::warn_exposure)
+            << Parms->getType();
+    }
+
+  bool IsImplicitInstantiation =
+      FD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation;
+
+  // [basic.link]p14:
+  //      A declaration is an exposure if it either names a TU-local entity
+  //      (defined below), ignoring:
+  //      - the function-body for a non-inline function or function template
+  //      (but not the deduced return
+  //        type for a (possibly instantiated) definition of a function with a
+  //        declared return type that uses a placeholder type
+  //        ([dcl.spec.auto])),
+  Diag &=
+      (FD->isInlined() || IsImplicitInstantiation) && !FD->isDependentContext();
+
+  IsExposure |= checkExposure(FD->getBody(), Diag);
+  if (IsExposure)
+    ExposureSet.insert(FD);
+
+  return IsExposure;
+}
+
+bool ExposureChecker::checkExposure(const VarDecl *VD, bool Diag) {
+  bool IsExposure = false;
+  // [basic.link]p14:
+  //  A declaration is an exposure if it either names a TU-local entity (defined
+  //  below), ignoring:
+  //  ...
+  //  or defines a constexpr variable initialized to a TU-local value (defined
+  //  below).
+  if (VD->isConstexpr() && isTULocal(VD->getInit())) {
+    IsExposure = true;
+    if (Diag)
+      SemaRef.Diag(VD->getInit()->getExprLoc(), diag::warn_exposure)
+          << VD->getInit();
+  }
+
+  if (isTULocal(VD->getType())) {
+    IsExposure = true;
+    if (Diag)
+      SemaRef.Diag(VD->getLocation(), diag::warn_exposure) << VD->getType();
+  }
+
+  // [basic.link]p14:
+  //   ..., ignoring:
+  //   - the initializer for a variable or variable template (but not the
+  //   variable's type),
+  //
+  // Note: although the spec says to ignore the initializer for all variable,
+  // for the code we generated now for inline variables, it is dangerous if the
+  // initializer of an inline variable is TULocal.
+  Diag &= !VD->getDeclContext()->isDependentContext() && VD->isInline();
+  IsExposure |= checkExposure(VD->getInit(), Diag);
+  if (IsExposure)
+    ExposureSet.insert(VD);
+
+  return IsExposure;
+}
+
+bool ExposureChecker::checkExposure(const CXXRecordDecl *RD, bool Diag) {
+  if (!RD->hasDefinition())
+    return false;
+
+  bool IsExposure = false;
+  for (CXXMethodDecl *Method : RD->methods())
+    IsExposure |= checkExposure(Method, Diag);
+
+  for (FieldDecl *FD : RD->fields()) {
+    if (isTULocal(FD->getType())) {
+      IsExposure = true;
+      if (Diag)
+        SemaRef.Diag(FD->getLocation(), diag::warn_exposure) << FD->getType();
+    }
+  }
+
+  for (const CXXBaseSpecifier &Base : RD->bases()) {
+    if (isTULocal(Base.getType())) {
+      IsExposure = true;
+      if (Diag)
+        SemaRef.Diag(Base.getBaseTypeLoc(), diag::warn_exposure)
+            << Base.getType();
+    }
+  }
+
+  if (IsExposure)
+    ExposureSet.insert(RD);
+
+  return IsExposure;
+}
+
+class ReferenceTULocalChecker : public DynamicRecursiveASTVisitor {
+public:
+  using CallbackTy = std::function<void(DeclRefExpr *, ValueDecl *)>;
+
+  ReferenceTULocalChecker(ExposureChecker &C, CallbackTy &&Callback)
+      : Checker(C), Callback(std::move(Callback)) {}
+
+  bool VisitDeclRefExpr(DeclRefExpr *DRE) override {
+    ValueDecl *Referenced = DRE->getDecl();
+    if (!Referenced)
+      return true;
+
+    if (!Checker.isTULocal(Referenced))
+      // We don't care if the referenced declaration is not TU-local.
+      return true;
+
+    Qualifiers Qual = DRE->getType().getQualifiers();
+    // [basic.link]p14:
+    //      A declaration is an exposure if it either names a TU-local entity
+    //      (defined below), ignoring:
+    //      ...
+    //      - any reference to a non-volatile const object ...
+    if (Qual.hasConst() && !Qual.hasVolatile())
+      return true;
+
+    // [basic.link]p14:
+    // ..., ignoring:
+    //   ...
+    //   (p14.4) - ... or reference with internal or no linkage initialized with
+    //   a constant expression that is not an odr-use
+    ASTContext &Context = Referenced->getASTContext();
+    Linkage L = Referenced->getLinkageInternal();
+    if (DRE->isNonOdrUse() && (L == Linkage::Internal || L == Linkage::None))
+      if (auto *VD = dyn_cast<VarDecl>(Referenced);
+          VD && VD->getInit() && !VD->getInit()->isValueDependent() &&
+          VD->getInit()->isConstantInitializer(Context, /*IsForRef=*/false))
+        return true;
+
+    Callback(DRE, Referenced);
+    return true;
+  }
+
+  ExposureChecker &Checker;
+  CallbackTy Callback;
+};
+
+bool ExposureChecker::checkExposure(const Stmt *S, bool Diag) {
+  if (!S)
+    return false;
+
+  bool HasReferencedTULocals = false;
+  ReferenceTULocalChecker Checker(
+      *this, [this, &HasReferencedTULocals, Diag](DeclRefExpr *DRE,
+                                                  ValueDecl *Referenced) {
+        if (Diag) {
+          SemaRef.Diag(DRE->getExprLoc(), diag::warn_exposure) << Referenced;
+        }
+        HasReferencedTULocals = true;
+      });
+  Checker.TraverseStmt(const_cast<Stmt *>(S));
+  return HasReferencedTULocals;
+}
+
+void ExposureChecker::checkExposureInContext(const DeclContext *DC) {
+  for (auto *TopD : DC->noload_decls()) {
+    auto *TopND = dyn_cast<NamedDecl>(TopD);
+    if (!TopND)
+      continue;
+
+    if (auto *Namespace = dyn_cast<NamespaceDecl>(TopND)) {
+      checkExposureInContext(Namespace);
+      continue;
+    }
+
+    // [basic.link]p17:
+    //   If a (possibly instantiated) declaration of, or a deduction guide for,
+    //   a non-TU-local entity in a module interface unit
+    //   (outside the private-module-fragment, if any) or
+    //   module partition is an exposure, the program is ill-formed.
+    if (!TopND->isFromASTFile() && isExposureCandidate(TopND) &&
+        !isTULocal(TopND))
+      checkExposure(TopND, /*Diag=*/true);
+  }
+}
+
+} // namespace
+
+void Sema::checkExposure(const TranslationUnitDecl *TU) {
+  if (!TU)
+    return;
+
+  ExposureChecker Checker(*this);
+
+  Module *M = TU->getOwningModule();
+  if (M && M->isInterfaceOrPartition())
+    Checker.checkExposureInContext(TU);
+
+  // [basic.link]p18:
+  //   If a declaration that appears in one translation unit names a TU-local
+  //   entity declared in another translation unit that is not a header unit,
+  //   the program is ill-formed.
+  for (auto FDAndInstantiationLocPair : PendingCheckReferenceForTULocal) {
+    FunctionDecl *FD = FDAndInstantiationLocPair.first;
+    SourceLocation PointOfInstantiation = FDAndInstantiationLocPair.second;
+
+    if (!FD->hasBody())
+      continue;
+
+    ReferenceTULocalChecker(Checker, [&, this](DeclRefExpr *DRE,
+                                               ValueDecl *Referenced) {
+      // A "defect" in current implementation. Now an implicit instantiation of
+      // a template, the instantiation is considered to be in the same module
+      // unit as the template instead of the module unit where the instantiation
+      // happens.
+      //
+      // See test/Modules/Exposre-2.cppm for example.
+      if (!Referenced->isFromASTFile())
+        return;
+
+      if (!Referenced->isInAnotherModuleUnit())
+        return;
+
+      // This is not standard conforming. But given there are too many static
+      // (inline) functions in headers in existing code, it is more user
+      // friendly to ignore them temporarily now. maybe we can have another flag
+      // for this.
+      if (Referenced->getOwningModule()->isExplicitGlobalModule() &&
+          isa<FunctionDecl>(Referenced))
+        return;
+
+      Diag(PointOfInstantiation,
+           diag::warn_reference_tu_local_entity_in_other_tu)
+          << FD << Referenced
+          << Referenced->getOwningModule()->getTopLevelModuleName();
+    }).TraverseStmt(FD->getBody());
+  }
+}
+
+void Sema::checkReferenceToTULocalFromOtherTU(
+    FunctionDecl *FD, SourceLocation PointOfInstantiation) {
+  // Checking if a declaration have any reference to TU-local entities in other
+  // TU is expensive. Try to avoid it as much as possible.
+  if (!FD || !HadImportedNamedModules)
+    return;
+
+  PendingCheckReferenceForTULocal.push_back(
+      std::make_pair(FD, PointOfInstantiation));
+}
diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp
index 128a5db..8bfea62 100644
--- a/clang/lib/Sema/SemaOpenACC.cpp
+++ b/clang/lib/Sema/SemaOpenACC.cpp
@@ -699,11 +699,19 @@ ExprResult SemaOpenACC::ActOnVar(OpenACCDirectiveKind DK, OpenACCClauseKind CK,
   // OpenACC3.3 2.13:
   // A 'var' in a 'declare' directive must be a variable or array name.
   if ((CK == OpenACCClauseKind::UseDevice ||
-       DK == OpenACCDirectiveKind::Declare) &&
-      isa<ArraySectionExpr, ArraySubscriptExpr>(CurVarExpr)) {
-    Diag(VarExpr->getExprLoc(), diag::err_acc_not_a_var_ref_use_device_declare)
-        << (DK == OpenACCDirectiveKind::Declare);
-    return ExprError();
+       DK == OpenACCDirectiveKind::Declare)) {
+    if (isa<ArraySubscriptExpr>(CurVarExpr)) {
+      Diag(VarExpr->getExprLoc(),
+           diag::err_acc_not_a_var_ref_use_device_declare)
+          << (DK == OpenACCDirectiveKind::Declare);
+      return ExprError();
+    }
+    // As an extension, we allow 'array sections'/'sub-arrays'  here, as that is
+    // effectively defining an array, and are in common use.
+    if (isa<ArraySectionExpr>(CurVarExpr))
+      Diag(VarExpr->getExprLoc(),
+           diag::ext_acc_array_section_use_device_declare)
+          << (DK == OpenACCDirectiveKind::Declare);
   }
 
   // Sub-arrays/subscript-exprs are fine as long as the base is a
diff --git a/clang/lib/Sema/SemaOpenACCAtomic.cpp b/clang/lib/Sema/SemaOpenACCAtomic.cpp
index 9c8c8d1..a9319dc 100644
--- a/clang/lib/Sema/SemaOpenACCAtomic.cpp
+++ b/clang/lib/Sema/SemaOpenACCAtomic.cpp
@@ -576,6 +576,11 @@ class AtomicOperandChecker {
     return AssocStmt;
   }
 
+  const Expr *IgnoreBeforeCompare(const Expr *E) {
+    return E->IgnoreParenImpCasts()->IgnoreParenNoopCasts(
+        SemaRef.getASTContext());
+  }
+
   bool CheckVarRefsSame(IDACInfo::ExprKindTy FirstKind, const Expr *FirstX,
                         IDACInfo::ExprKindTy SecondKind, const Expr *SecondX) {
     llvm::FoldingSetNodeID First_ID, Second_ID;
@@ -648,8 +653,10 @@ class AtomicOperandChecker {
         if (CheckOperandVariable(AssignRes->RHS, PD))
           return getRecoveryExpr();
 
-        if (CheckVarRefsSame(FirstExprResults.ExprKind, FirstExprResults.X_Var,
-                             IDACInfo::SimpleAssign, AssignRes->RHS))
+        if (CheckVarRefsSame(FirstExprResults.ExprKind,
+                             IgnoreBeforeCompare(FirstExprResults.X_Var),
+                             IDACInfo::SimpleAssign,
+                             IgnoreBeforeCompare(AssignRes->RHS)))
           return getRecoveryExpr();
         break;
       }
@@ -660,9 +667,10 @@ class AtomicOperandChecker {
         if (SecondExprResults.Failed)
           return getRecoveryExpr();
 
-        if (CheckVarRefsSame(FirstExprResults.ExprKind, FirstExprResults.X_Var,
+        if (CheckVarRefsSame(FirstExprResults.ExprKind,
+                             IgnoreBeforeCompare(FirstExprResults.X_Var),
                              SecondExprResults.ExprKind,
-                             SecondExprResults.X_Var))
+                             IgnoreBeforeCompare(SecondExprResults.X_Var)))
           return getRecoveryExpr();
         break;
       }
diff --git a/clang/lib/Sema/SemaOpenACCClause.cpp b/clang/lib/Sema/SemaOpenACCClause.cpp
index 3f90fe8..b54a012 100644
--- a/clang/lib/Sema/SemaOpenACCClause.cpp
+++ b/clang/lib/Sema/SemaOpenACCClause.cpp
@@ -1919,6 +1919,14 @@ ExprResult SemaOpenACC::CheckReductionVar(OpenACCDirectiveKind DirectiveKind,
           << EltTy << /*Sub array base type*/ 1;
       return ExprError();
     }
+  } else if (VarExpr->getType()->isArrayType()) {
+    // Arrays are considered an 'aggregate variable' explicitly, so are OK, no
+    // additional checking required.
+    //
+    // Glossary: Aggregate variables – a variable of any non-scalar datatype,
+    // including array or composite variables.
+    //
+    // The next branch (record decl) checks for composite variables.
   } else if (auto *RD = VarExpr->getType()->getAsRecordDecl()) {
     if (!RD->isStruct() && !RD->isClass()) {
       Diag(VarExpr->getExprLoc(), diag::err_acc_reduction_composite_type)
@@ -2246,7 +2254,13 @@ bool SemaOpenACC::CheckDeclareClause(SemaOpenACC::OpenACCParsedClause &Clause,
         continue;
       }
     } else {
-      const auto *DRE = cast<DeclRefExpr>(VarExpr);
+
+      const Expr *VarExprTemp = VarExpr;
+
+      while (const auto *ASE = dyn_cast<ArraySectionExpr>(VarExprTemp))
+        VarExprTemp = ASE->getBase()->IgnoreParenImpCasts();
+
+      const auto *DRE = cast<DeclRefExpr>(VarExprTemp);
       if (const auto *Var = dyn_cast<VarDecl>(DRE->getDecl())) {
         CurDecl = Var->getCanonicalDecl();
 
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 4ecc9b0..2c5d97c 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -2829,7 +2829,7 @@ static void checkReductionClauses(Sema &S, DSAStackTy *Stack,
         continue;
       }
       for (Expr *Ref : RC->varlist()) {
-        assert(Ref && "NULL expr in OpenMP nontemporal clause.");
+        assert(Ref && "NULL expr in OpenMP reduction clause.");
         SourceLocation ELoc;
         SourceRange ERange;
         Expr *SimpleRefExpr = Ref;
@@ -7612,6 +7612,23 @@ void SemaOpenMP::ActOnOpenMPDeclareVariantDirective(
     return;
   }
 
+  // OpenMP 6.0 [9.6.2 (page 332, line 31-33, adjust_args clause, Restrictions]
+  // If the `need_device_addr` adjust-op modifier is present, each list item
+  // that appears in the clause must refer to an argument in the declaration of
+  // the function variant that has a reference type
+  if (getLangOpts().OpenMP >= 60) {
+    for (Expr *E : AdjustArgsNeedDeviceAddr) {
+      E = E->IgnoreParenImpCasts();
+      if (const auto *DRE = dyn_cast<DeclRefExpr>(E)) {
+        if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
+          if (!VD->getType()->isReferenceType())
+            Diag(E->getExprLoc(),
+                 diag::err_omp_non_by_ref_need_device_addr_modifier_argument);
+        }
+      }
+    }
+  }
+
   auto *NewAttr = OMPDeclareVariantAttr::CreateImplicit(
       getASTContext(), VariantRef, &TI,
       const_cast<Expr **>(AdjustArgsNothing.data()), AdjustArgsNothing.size(),
@@ -18344,7 +18361,7 @@ OMPClause *SemaOpenMP::ActOnOpenMPSharedClause(ArrayRef<Expr *> VarList,
                                                SourceLocation EndLoc) {
   SmallVector<Expr *, 8> Vars;
   for (Expr *RefExpr : VarList) {
-    assert(RefExpr && "NULL expr in OpenMP lastprivate clause.");
+    assert(RefExpr && "NULL expr in OpenMP shared clause.");
     SourceLocation ELoc;
     SourceRange ERange;
     Expr *SimpleRefExpr = RefExpr;
@@ -19991,7 +20008,7 @@ OMPClause *SemaOpenMP::ActOnOpenMPAlignedClause(
     SourceLocation LParenLoc, SourceLocation ColonLoc, SourceLocation EndLoc) {
   SmallVector<Expr *, 8> Vars;
   for (Expr *RefExpr : VarList) {
-    assert(RefExpr && "NULL expr in OpenMP linear clause.");
+    assert(RefExpr && "NULL expr in OpenMP aligned clause.");
     SourceLocation ELoc;
     SourceRange ERange;
     Expr *SimpleRefExpr = RefExpr;
@@ -20167,7 +20184,7 @@ OMPClause *SemaOpenMP::ActOnOpenMPCopyprivateClause(ArrayRef<Expr *> VarList,
   SmallVector<Expr *, 8> DstExprs;
   SmallVector<Expr *, 8> AssignmentOps;
   for (Expr *RefExpr : VarList) {
-    assert(RefExpr && "NULL expr in OpenMP linear clause.");
+    assert(RefExpr && "NULL expr in OpenMP copyprivate clause.");
     SourceLocation ELoc;
     SourceRange ERange;
     Expr *SimpleRefExpr = RefExpr;
@@ -20526,7 +20543,7 @@ OMPClause *SemaOpenMP::ActOnOpenMPDependClause(
     TotalDepCount = VarOffset.TotalDepCount;
   } else {
     for (Expr *RefExpr : VarList) {
-      assert(RefExpr && "NULL expr in OpenMP shared clause.");
+      assert(RefExpr && "NULL expr in OpenMP depend clause.");
       if (isa<DependentScopeDeclRefExpr>(RefExpr)) {
         // It will be analyzed later.
         Vars.push_back(RefExpr);
@@ -23737,7 +23754,7 @@ OMPClause *SemaOpenMP::ActOnOpenMPAllocateClause(
   // Analyze and build list of variables.
   SmallVector<Expr *, 8> Vars;
   for (Expr *RefExpr : VarList) {
-    assert(RefExpr && "NULL expr in OpenMP private clause.");
+    assert(RefExpr && "NULL expr in OpenMP allocate clause.");
     SourceLocation ELoc;
     SourceRange ERange;
     Expr *SimpleRefExpr = RefExpr;
@@ -23829,7 +23846,7 @@ OMPClause *SemaOpenMP::ActOnOpenMPInclusiveClause(ArrayRef<Expr *> VarList,
                                                   SourceLocation EndLoc) {
   SmallVector<Expr *, 8> Vars;
   for (Expr *RefExpr : VarList) {
-    assert(RefExpr && "NULL expr in OpenMP nontemporal clause.");
+    assert(RefExpr && "NULL expr in OpenMP inclusive clause.");
     SourceLocation ELoc;
     SourceRange ERange;
     Expr *SimpleRefExpr = RefExpr;
@@ -23870,7 +23887,7 @@ OMPClause *SemaOpenMP::ActOnOpenMPExclusiveClause(ArrayRef<Expr *> VarList,
                                                   SourceLocation EndLoc) {
   SmallVector<Expr *, 8> Vars;
   for (Expr *RefExpr : VarList) {
-    assert(RefExpr && "NULL expr in OpenMP nontemporal clause.");
+    assert(RefExpr && "NULL expr in OpenMP exclusive clause.");
     SourceLocation ELoc;
     SourceRange ERange;
     Expr *SimpleRefExpr = RefExpr;
@@ -24063,7 +24080,7 @@ OMPClause *SemaOpenMP::ActOnOpenMPAffinityClause(
     SourceLocation EndLoc, Expr *Modifier, ArrayRef<Expr *> Locators) {
   SmallVector<Expr *, 8> Vars;
   for (Expr *RefExpr : Locators) {
-    assert(RefExpr && "NULL expr in OpenMP shared clause.");
+    assert(RefExpr && "NULL expr in OpenMP affinity clause.");
     if (isa<DependentScopeDeclRefExpr>(RefExpr) || RefExpr->isTypeDependent()) {
       // It will be analyzed later.
       Vars.push_back(RefExpr);
@@ -24375,7 +24392,7 @@ ExprResult SemaOpenMP::ActOnOMPArraySectionExpr(
         return ExprError();
       }
     }
-  } else if (ColonLocFirst.isValid() &&
+  } else if (SemaRef.getLangOpts().OpenMP < 60 && ColonLocFirst.isValid() &&
              (OriginalTy.isNull() || (!OriginalTy->isConstantArrayType() &&
                                       !OriginalTy->isVariableArrayType()))) {
     // OpenMP 5.0, [2.1.5 Array Sections]
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 5dd5b49..76e189d 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -8042,8 +8042,8 @@ static void AddTemplateOverloadCandidateImmediately(
 
     Candidate.IgnoreObjectArgument =
         isa<CXXMethodDecl>(Candidate.Function) &&
-        cast<CXXMethodDecl>(Candidate.Function)
-            ->isImplicitObjectMemberFunction() &&
+        !cast<CXXMethodDecl>(Candidate.Function)
+             ->isExplicitObjectMemberFunction() &&
         !isa<CXXConstructorDecl>(Candidate.Function);
 
     Candidate.ExplicitCallArguments = Args.size();
diff --git a/clang/lib/Sema/SemaRISCV.cpp b/clang/lib/Sema/SemaRISCV.cpp
index 43f7992..994cd07 100644
--- a/clang/lib/Sema/SemaRISCV.cpp
+++ b/clang/lib/Sema/SemaRISCV.cpp
@@ -1635,6 +1635,116 @@ bool SemaRISCV::isValidFMVExtension(StringRef Ext) {
   return -1 != RISCVISAInfo::getRISCVFeaturesBitsInfo(Ext).second;
 }
 
+bool SemaRISCV::checkTargetVersionAttr(const StringRef Param,
+                                       const SourceLocation Loc) {
+  using namespace DiagAttrParams;
+
+  llvm::SmallVector<StringRef, 8> AttrStrs;
+  Param.split(AttrStrs, ';');
+
+  bool HasArch = false;
+  bool HasPriority = false;
+  bool HasDefault = false;
+  bool DuplicateAttr = false;
+  for (StringRef AttrStr : AttrStrs) {
+    AttrStr = AttrStr.trim();
+    // Only support arch=+ext,... syntax.
+    if (AttrStr.starts_with("arch=+")) {
+      DuplicateAttr = HasArch;
+      HasArch = true;
+      ParsedTargetAttr TargetAttr =
+          getASTContext().getTargetInfo().parseTargetAttr(AttrStr);
+
+      if (TargetAttr.Features.empty() ||
+          llvm::any_of(TargetAttr.Features, [&](const StringRef Ext) {
+            return !isValidFMVExtension(Ext);
+          }))
+        return Diag(Loc, diag::warn_unsupported_target_attribute)
+               << Unsupported << None << AttrStr << TargetVersion;
+    } else if (AttrStr == "default") {
+      DuplicateAttr = HasDefault;
+      HasDefault = true;
+    } else if (AttrStr.consume_front("priority=")) {
+      DuplicateAttr = HasPriority;
+      HasPriority = true;
+      unsigned Digit;
+      if (AttrStr.getAsInteger(0, Digit))
+        return Diag(Loc, diag::warn_unsupported_target_attribute)
+               << Unsupported << None << AttrStr << TargetVersion;
+    } else {
+      return Diag(Loc, diag::warn_unsupported_target_attribute)
+             << Unsupported << None << AttrStr << TargetVersion;
+    }
+  }
+
+  if (((HasPriority || HasArch) && HasDefault) || DuplicateAttr ||
+      (HasPriority && !HasArch))
+    return Diag(Loc, diag::warn_unsupported_target_attribute)
+           << Unsupported << None << Param << TargetVersion;
+
+  return false;
+}
+
+bool SemaRISCV::checkTargetClonesAttr(
+    SmallVectorImpl<StringRef> &Params, SmallVectorImpl<SourceLocation> &Locs,
+    SmallVectorImpl<SmallString<64>> &NewParams) {
+  using namespace DiagAttrParams;
+
+  assert(Params.size() == Locs.size() &&
+         "Mismatch between number of string parameters and locations");
+
+  bool HasDefault = false;
+  for (unsigned I = 0, E = Params.size(); I < E; ++I) {
+    const StringRef Param = Params[I].trim();
+    const SourceLocation &Loc = Locs[I];
+
+    llvm::SmallVector<StringRef, 8> AttrStrs;
+    Param.split(AttrStrs, ';');
+
+    bool IsPriority = false;
+    bool IsDefault = false;
+    for (StringRef AttrStr : AttrStrs) {
+      AttrStr = AttrStr.trim();
+      // Only support arch=+ext,... syntax.
+      if (AttrStr.starts_with("arch=+")) {
+        ParsedTargetAttr TargetAttr =
+            getASTContext().getTargetInfo().parseTargetAttr(AttrStr);
+
+        if (TargetAttr.Features.empty() ||
+            llvm::any_of(TargetAttr.Features, [&](const StringRef Ext) {
+              return !isValidFMVExtension(Ext);
+            }))
+          return Diag(Loc, diag::warn_unsupported_target_attribute)
+                 << Unsupported << None << Param << TargetClones;
+      } else if (AttrStr == "default") {
+        IsDefault = true;
+        HasDefault = true;
+      } else if (AttrStr.consume_front("priority=")) {
+        IsPriority = true;
+        unsigned Digit;
+        if (AttrStr.getAsInteger(0, Digit))
+          return Diag(Loc, diag::warn_unsupported_target_attribute)
+                 << Unsupported << None << Param << TargetClones;
+      } else {
+        return Diag(Loc, diag::warn_unsupported_target_attribute)
+               << Unsupported << None << Param << TargetClones;
+      }
+    }
+
+    if (IsPriority && IsDefault)
+      return Diag(Loc, diag::warn_unsupported_target_attribute)
+             << Unsupported << None << Param << TargetClones;
+
+    if (llvm::is_contained(NewParams, Param))
+      Diag(Loc, diag::warn_target_clone_duplicate_options);
+    NewParams.push_back(Param);
+  }
+  if (!HasDefault)
+    return Diag(Locs[0], diag::err_target_clone_must_have_default);
+
+  return false;
+}
+
 SemaRISCV::SemaRISCV(Sema &S) : SemaBase(S) {}
 
 } // namespace clang
diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp
index f85826a..3f89843 100644
--- a/clang/lib/Sema/SemaStmt.cpp
+++ b/clang/lib/Sema/SemaStmt.cpp
@@ -295,8 +295,7 @@ void DiagnoseUnused(Sema &S, const Expr *E, std::optional<unsigned> DiagID) {
       return;
 
     auto [OffendingDecl, A] = CE->getUnusedResultAttr(S.Context);
-    if (DiagnoseNoDiscard(S, OffendingDecl,
-                          cast_or_null<WarnUnusedResultAttr>(A), Loc, R1, R2,
+    if (DiagnoseNoDiscard(S, OffendingDecl, A, Loc, R1, R2,
                           /*isCtor=*/false))
       return;
 
@@ -344,13 +343,11 @@ void DiagnoseUnused(Sema &S, const Expr *E, std::optional<unsigned> DiagID) {
       S.Diag(Loc, diag::err_arc_unused_init_message) << R1;
       return;
     }
-    const ObjCMethodDecl *MD = ME->getMethodDecl();
-    if (MD) {
-      if (DiagnoseNoDiscard(S, nullptr, MD->getAttr<WarnUnusedResultAttr>(),
-                            Loc, R1, R2,
-                            /*isCtor=*/false))
-        return;
-    }
+
+    auto [OffendingDecl, A] = ME->getUnusedResultAttr(S.Context);
+    if (DiagnoseNoDiscard(S, OffendingDecl, A, Loc, R1, R2,
+                          /*isCtor=*/false))
+      return;
   } else if (const PseudoObjectExpr *POE = dyn_cast<PseudoObjectExpr>(E)) {
     const Expr *Source = POE->getSyntacticForm();
     // Handle the actually selected call of an OpenMP specialized call.
diff --git a/clang/lib/Sema/SemaStmtAttr.cpp b/clang/lib/Sema/SemaStmtAttr.cpp
index 857d46a..77aa716 100644
--- a/clang/lib/Sema/SemaStmtAttr.cpp
+++ b/clang/lib/Sema/SemaStmtAttr.cpp
@@ -795,6 +795,10 @@ ExprResult Sema::BuildCXXAssumeExpr(Expr *Assumption,
   if (Res.isInvalid())
     return ExprError();
 
+  Res = ActOnFinishFullExpr(Res.get(), /*DiscardedValue=*/false);
+  if (Res.isInvalid())
+    return ExprError();
+
   Assumption = Res.get();
   if (Assumption->HasSideEffects(Context))
     Diag(Assumption->getBeginLoc(), diag::warn_assume_side_effects)
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index 698d127..21fed2e 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -4749,8 +4749,6 @@ Sema::CheckConceptTemplateId(const CXXScopeSpec &SS,
   EnterExpressionEvaluationContext EECtx{
       *this, ExpressionEvaluationContext::Unevaluated, CSD};
 
-  ContextRAII CurContext(*this, CSD->getDeclContext(),
-                         /*NewThisContext=*/false);
   if (!AreArgsDependent &&
       CheckConstraintSatisfaction(
           NamedConcept, AssociatedConstraint(NamedConcept->getConstraintExpr()),
diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp
index e1a975b..9e56e697 100644
--- a/clang/lib/Sema/SemaTemplateDeduction.cpp
+++ b/clang/lib/Sema/SemaTemplateDeduction.cpp
@@ -5523,6 +5523,15 @@ static TemplateDeductionResult CheckDeductionConsistency(
   // FIXME: A substitution can be incomplete on a non-structural part of the
   // type. Use the canonical type for now, until the TemplateInstantiator can
   // deal with that.
+
+  // Workaround: Implicit deduction guides use InjectedClassNameTypes, whereas
+  // the explicit guides don't. The substitution doesn't transform these types,
+  // so let it transform their specializations instead.
+  bool IsDeductionGuide = isa<CXXDeductionGuideDecl>(FTD->getTemplatedDecl());
+  if (IsDeductionGuide) {
+    if (auto *Injected = P->getAs<InjectedClassNameType>())
+      P = Injected->getInjectedSpecializationType();
+  }
   QualType InstP = S.SubstType(P.getCanonicalType(), MLTAL, FTD->getLocation(),
                                FTD->getDeclName(), &IsIncompleteSubstitution);
   if (InstP.isNull() && !IsIncompleteSubstitution)
@@ -5537,9 +5546,15 @@ static TemplateDeductionResult CheckDeductionConsistency(
   if (auto *PA = dyn_cast<PackExpansionType>(A);
       PA && !isa<PackExpansionType>(InstP))
     A = PA->getPattern();
-  if (!S.Context.hasSameType(
-          S.Context.getUnqualifiedArrayType(InstP.getNonReferenceType()),
-          S.Context.getUnqualifiedArrayType(A.getNonReferenceType())))
+  auto T1 = S.Context.getUnqualifiedArrayType(InstP.getNonReferenceType());
+  auto T2 = S.Context.getUnqualifiedArrayType(A.getNonReferenceType());
+  if (IsDeductionGuide) {
+    if (auto *Injected = T1->getAs<InjectedClassNameType>())
+      T1 = Injected->getInjectedSpecializationType();
+    if (auto *Injected = T2->getAs<InjectedClassNameType>())
+      T2 = Injected->getInjectedSpecializationType();
+  }
+  if (!S.Context.hasSameType(T1, T2))
     return TemplateDeductionResult::NonDeducedMismatch;
   return TemplateDeductionResult::Success;
 }
diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp
index 20bac0e..d84d0ca1 100644
--- a/clang/lib/Sema/SemaTemplateInstantiate.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp
@@ -2270,11 +2270,6 @@ TemplateInstantiator::TransformCXXAssumeAttr(const CXXAssumeAttr *AA) {
   if (!Res.isUsable())
     return AA;
 
-  Res = getSema().ActOnFinishFullExpr(Res.get(),
-                                      /*DiscardedValue=*/false);
-  if (!Res.isUsable())
-    return AA;
-
   if (!(Res.get()->getDependence() & ExprDependence::TypeValueInstantiation)) {
     Res = getSema().BuildCXXAssumeExpr(Res.get(), AA->getAttrName(),
                                        AA->getRange());
diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index e2c3cdc..233bb65 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -5853,6 +5853,8 @@ void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation,
     // context seems wrong. Investigate more.
     ActOnFinishFunctionBody(Function, Body.get(), /*IsInstantiation=*/true);
 
+    checkReferenceToTULocalFromOtherTU(Function, PointOfInstantiation);
+
     PerformDependentDiagnostics(PatternDecl, TemplateArgs);
 
     if (auto *Listener = getASTMutationListener())
diff --git a/clang/lib/Sema/SemaTypeTraits.cpp b/clang/lib/Sema/SemaTypeTraits.cpp
index 1d8687e..c2f0600 100644
--- a/clang/lib/Sema/SemaTypeTraits.cpp
+++ b/clang/lib/Sema/SemaTypeTraits.cpp
@@ -11,9 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/AST/DeclCXX.h"
-#include "clang/AST/TemplateBase.h"
 #include "clang/AST/Type.h"
-#include "clang/Basic/DiagnosticIDs.h"
 #include "clang/Basic/DiagnosticParse.h"
 #include "clang/Basic/DiagnosticSema.h"
 #include "clang/Basic/TypeTraits.h"
@@ -1965,7 +1963,6 @@ static std::optional<TypeTrait> StdNameToTypeTrait(StringRef Name) {
       .Case("is_assignable", TypeTrait::BTT_IsAssignable)
       .Case("is_empty", TypeTrait::UTT_IsEmpty)
       .Case("is_standard_layout", TypeTrait::UTT_IsStandardLayout)
-      .Case("is_constructible", TypeTrait::TT_IsConstructible)
       .Default(std::nullopt);
 }
 
@@ -2002,16 +1999,8 @@ static ExtractedTypeTraitInfo ExtractTypeTraitFromExpression(const Expr *E) {
     Trait = StdNameToTypeTrait(Name);
     if (!Trait)
       return std::nullopt;
-    for (const auto &Arg : VD->getTemplateArgs().asArray()) {
-      if (Arg.getKind() == TemplateArgument::ArgKind::Pack) {
-        for (const auto &InnerArg : Arg.pack_elements())
-          Args.push_back(InnerArg.getAsType());
-      } else if (Arg.getKind() == TemplateArgument::ArgKind::Type) {
-        Args.push_back(Arg.getAsType());
-      } else {
-        llvm_unreachable("Unexpected kind");
-      }
-    }
+    for (const auto &Arg : VD->getTemplateArgs().asArray())
+      Args.push_back(Arg.getAsType());
     return {{Trait.value(), std::move(Args)}};
   }
 
@@ -2284,60 +2273,6 @@ static void DiagnoseNonTriviallyCopyableReason(Sema &SemaRef,
   }
 }
 
-static void DiagnoseNonConstructibleReason(
-    Sema &SemaRef, SourceLocation Loc,
-    const llvm::SmallVector<clang::QualType, 1> &Ts) {
-  if (Ts.empty()) {
-    return;
-  }
-
-  bool ContainsVoid = false;
-  for (const QualType &ArgTy : Ts) {
-    ContainsVoid |= ArgTy->isVoidType();
-  }
-
-  if (ContainsVoid)
-    SemaRef.Diag(Loc, diag::note_unsatisfied_trait_reason)
-        << diag::TraitNotSatisfiedReason::CVVoidType;
-
-  QualType T = Ts[0];
-  if (T->isFunctionType())
-    SemaRef.Diag(Loc, diag::note_unsatisfied_trait_reason)
-        << diag::TraitNotSatisfiedReason::FunctionType;
-
-  if (T->isIncompleteArrayType())
-    SemaRef.Diag(Loc, diag::note_unsatisfied_trait_reason)
-        << diag::TraitNotSatisfiedReason::IncompleteArrayType;
-
-  const CXXRecordDecl *D = T->getAsCXXRecordDecl();
-  if (!D || D->isInvalidDecl() || !D->hasDefinition())
-    return;
-
-  llvm::BumpPtrAllocator OpaqueExprAllocator;
-  SmallVector<Expr *, 2> ArgExprs;
-  ArgExprs.reserve(Ts.size() - 1);
-  for (unsigned I = 1, N = Ts.size(); I != N; ++I) {
-    QualType ArgTy = Ts[I];
-    if (ArgTy->isObjectType() || ArgTy->isFunctionType())
-      ArgTy = SemaRef.Context.getRValueReferenceType(ArgTy);
-    ArgExprs.push_back(
-        new (OpaqueExprAllocator.Allocate<OpaqueValueExpr>())
-            OpaqueValueExpr(Loc, ArgTy.getNonLValueExprType(SemaRef.Context),
-                            Expr::getValueKindForType(ArgTy)));
-  }
-
-  EnterExpressionEvaluationContext Unevaluated(
-      SemaRef, Sema::ExpressionEvaluationContext::Unevaluated);
-  Sema::ContextRAII TUContext(SemaRef,
-                              SemaRef.Context.getTranslationUnitDecl());
-  InitializedEntity To(InitializedEntity::InitializeTemporary(T));
-  InitializationKind InitKind(InitializationKind::CreateDirect(Loc, Loc, Loc));
-  InitializationSequence Init(SemaRef, To, InitKind, ArgExprs);
-
-  Init.Diagnose(SemaRef, To, InitKind, ArgExprs);
-  SemaRef.Diag(D->getLocation(), diag::note_defined_here) << D;
-}
-
 static void DiagnoseNonTriviallyCopyableReason(Sema &SemaRef,
                                                SourceLocation Loc, QualType T) {
   SemaRef.Diag(Loc, diag::note_unsatisfied_trait)
@@ -2624,9 +2559,6 @@ void Sema::DiagnoseTypeTraitDetails(const Expr *E) {
   case UTT_IsStandardLayout:
     DiagnoseNonStandardLayoutReason(*this, E->getBeginLoc(), Args[0]);
     break;
-  case TT_IsConstructible:
-    DiagnoseNonConstructibleReason(*this, E->getBeginLoc(), Args);
-    break;
   default:
     break;
   }
diff --git a/clang/lib/Sema/SemaWasm.cpp b/clang/lib/Sema/SemaWasm.cpp
index 6faea24..8998492 100644
--- a/clang/lib/Sema/SemaWasm.cpp
+++ b/clang/lib/Sema/SemaWasm.cpp
@@ -227,6 +227,53 @@ bool SemaWasm::BuiltinWasmTableCopy(CallExpr *TheCall) {
   return false;
 }
 
+bool SemaWasm::BuiltinWasmTestFunctionPointerSignature(CallExpr *TheCall) {
+  if (SemaRef.checkArgCount(TheCall, 1))
+    return true;
+
+  Expr *FuncPtrArg = TheCall->getArg(0);
+  QualType ArgType = FuncPtrArg->getType();
+
+  // Check that the argument is a function pointer
+  const PointerType *PtrTy = ArgType->getAs<PointerType>();
+  if (!PtrTy) {
+    return Diag(FuncPtrArg->getBeginLoc(),
+                diag::err_typecheck_expect_function_pointer)
+           << ArgType << FuncPtrArg->getSourceRange();
+  }
+
+  const FunctionProtoType *FuncTy =
+      PtrTy->getPointeeType()->getAs<FunctionProtoType>();
+  if (!FuncTy) {
+    return Diag(FuncPtrArg->getBeginLoc(),
+                diag::err_typecheck_expect_function_pointer)
+           << ArgType << FuncPtrArg->getSourceRange();
+  }
+
+  // Check that the function pointer doesn't use reference types
+  if (FuncTy->getReturnType().isWebAssemblyReferenceType()) {
+    return Diag(
+               FuncPtrArg->getBeginLoc(),
+               diag::err_wasm_builtin_test_fp_sig_cannot_include_reference_type)
+           << 0 << FuncTy->getReturnType() << FuncPtrArg->getSourceRange();
+  }
+  auto NParams = FuncTy->getNumParams();
+  for (unsigned I = 0; I < NParams; I++) {
+    if (FuncTy->getParamType(I).isWebAssemblyReferenceType()) {
+      return Diag(
+                 FuncPtrArg->getBeginLoc(),
+                 diag::
+                     err_wasm_builtin_test_fp_sig_cannot_include_reference_type)
+             << 1 << FuncPtrArg->getSourceRange();
+    }
+  }
+
+  // Set return type to int (the result of the test)
+  TheCall->setType(getASTContext().IntTy);
+
+  return false;
+}
+
 bool SemaWasm::CheckWebAssemblyBuiltinFunctionCall(const TargetInfo &TI,
                                                    unsigned BuiltinID,
                                                    CallExpr *TheCall) {
@@ -249,6 +296,8 @@ bool SemaWasm::CheckWebAssemblyBuiltinFunctionCall(const TargetInfo &TI,
     return BuiltinWasmTableFill(TheCall);
   case WebAssembly::BI__builtin_wasm_table_copy:
     return BuiltinWasmTableCopy(TheCall);
+  case WebAssembly::BI__builtin_wasm_test_function_pointer_signature:
+    return BuiltinWasmTestFunctionPointerSignature(TheCall);
   }
 
   return false;
diff --git a/clang/lib/Sema/SemaX86.cpp b/clang/lib/Sema/SemaX86.cpp
index 5c149bd..850bcb1 100644
--- a/clang/lib/Sema/SemaX86.cpp
+++ b/clang/lib/Sema/SemaX86.cpp
@@ -954,6 +954,11 @@ bool SemaX86::CheckBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
     l = 0;
     u = 15;
     break;
+  case X86::BI__builtin_ia32_prefetchi:
+    i = 1;
+    l = 2; // _MM_HINT_T1
+    u = 3; // _MM_HINT_T0
+    break;
   }
 
   // Note that we don't force a hard error on the range check here, allowing
@@ -1056,4 +1061,61 @@ void SemaX86::handleForceAlignArgPointerAttr(Decl *D, const ParsedAttr &AL) {
                  X86ForceAlignArgPointerAttr(getASTContext(), AL));
 }
 
+bool SemaX86::checkTargetClonesAttr(
+    SmallVectorImpl<StringRef> &Params, SmallVectorImpl<SourceLocation> &Locs,
+    SmallVectorImpl<SmallString<64>> &NewParams) {
+  using namespace DiagAttrParams;
+
+  assert(Params.size() == Locs.size() &&
+         "Mismatch between number of string parameters and locations");
+
+  bool HasDefault = false;
+  bool HasComma = false;
+  for (unsigned I = 0, E = Params.size(); I < E; ++I) {
+    const StringRef Param = Params[I].trim();
+    const SourceLocation &Loc = Locs[I];
+
+    if (Param.empty() || Param.ends_with(','))
+      return Diag(Loc, diag::warn_unsupported_target_attribute)
+             << Unsupported << None << "" << TargetClones;
+
+    if (Param.contains(','))
+      HasComma = true;
+
+    StringRef LHS;
+    StringRef RHS = Param;
+    do {
+      std::tie(LHS, RHS) = RHS.split(',');
+      LHS = LHS.trim();
+      const SourceLocation &CurLoc =
+          Loc.getLocWithOffset(LHS.data() - Param.data());
+
+      if (LHS.starts_with("arch=")) {
+        if (!getASTContext().getTargetInfo().isValidCPUName(
+                LHS.drop_front(sizeof("arch=") - 1)))
+          return Diag(CurLoc, diag::warn_unsupported_target_attribute)
+                 << Unsupported << CPU << LHS.drop_front(sizeof("arch=") - 1)
+                 << TargetClones;
+      } else if (LHS == "default")
+        HasDefault = true;
+      else if (!getASTContext().getTargetInfo().isValidFeatureName(LHS) ||
+               getASTContext().getTargetInfo().getFMVPriority(LHS) == 0)
+        return Diag(CurLoc, diag::warn_unsupported_target_attribute)
+               << Unsupported << None << LHS << TargetClones;
+
+      if (llvm::is_contained(NewParams, LHS))
+        Diag(CurLoc, diag::warn_target_clone_duplicate_options);
+      // Note: Add even if there are duplicates, since it changes name mangling.
+      NewParams.push_back(LHS);
+    } while (!RHS.empty());
+  }
+  if (HasComma && Params.size() > 1)
+    Diag(Locs[0], diag::warn_target_clone_mixed_values);
+
+  if (!HasDefault)
+    return Diag(Locs[0], diag::err_target_clone_must_have_default);
+
+  return false;
+}
+
 } // namespace clang
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 10aedb6..f896f9f1 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -8488,6 +8488,7 @@ bool ASTReader::LoadExternalSpecializationsImpl(SpecLookupTableTy &SpecLookups,
 bool ASTReader::LoadExternalSpecializations(const Decl *D, bool OnlyPartial) {
   assert(D);
 
+  CompleteRedeclChain(D);
   bool NewSpecsFound =
       LoadExternalSpecializationsImpl(PartialSpecializationsLookups, D);
   if (OnlyPartial)
diff --git a/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp b/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp
index 88feb6a..e682c4e 100644
--- a/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp
@@ -99,7 +99,7 @@ class NilArgChecker : public Checker<check::PreObjCMessage,
                                      check::PostStmt<ObjCDictionaryLiteral>,
                                      check::PostStmt<ObjCArrayLiteral>,
                                      EventDispatcher<ImplicitNullDerefEvent>> {
-  mutable std::unique_ptr<APIMisuse> BT;
+  const APIMisuse BT{this, "nil argument"};
 
   mutable llvm::SmallDenseMap<Selector, unsigned, 16> StringSelectors;
   mutable Selector ArrayWithObjectSel;
@@ -218,10 +218,7 @@ void NilArgChecker::generateBugReport(ExplodedNode *N,
                                       SourceRange Range,
                                       const Expr *E,
                                       CheckerContext &C) const {
-  if (!BT)
-    BT.reset(new APIMisuse(this, "nil argument"));
-
-  auto R = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
+  auto R = std::make_unique<PathSensitiveBugReport>(BT, Msg, N);
   R->addRange(Range);
   bugreporter::trackExpressionValue(N, E, *R);
   C.emitReport(std::move(R));
@@ -350,7 +347,7 @@ void NilArgChecker::checkPostStmt(const ObjCDictionaryLiteral *DL,
 
 namespace {
 class CFNumberChecker : public Checker< check::PreStmt<CallExpr> > {
-  mutable std::unique_ptr<APIMisuse> BT;
+  const APIMisuse BT{this, "Bad use of CFNumber APIs"};
   mutable IdentifierInfo *ICreate = nullptr, *IGetValue = nullptr;
 public:
   CFNumberChecker() = default;
@@ -524,10 +521,7 @@ void CFNumberChecker::checkPreStmt(const CallExpr *CE,
       << " bits of the integer value will be "
       << (isCreate ? "lost." : "garbage.");
 
-    if (!BT)
-      BT.reset(new APIMisuse(this, "Bad use of CFNumber APIs"));
-
-    auto report = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), N);
+    auto report = std::make_unique<PathSensitiveBugReport>(BT, os.str(), N);
     report->addRange(CE->getArg(2)->getSourceRange());
     C.emitReport(std::move(report));
   }
@@ -539,7 +533,7 @@ void CFNumberChecker::checkPreStmt(const CallExpr *CE,
 
 namespace {
 class CFRetainReleaseChecker : public Checker<check::PreCall> {
-  mutable APIMisuse BT{this, "null passed to CF memory management function"};
+  const APIMisuse BT{this, "null passed to CF memory management function"};
   const CallDescriptionSet ModelledCalls = {
       {CDM::CLibrary, {"CFRetain"}, 1},
       {CDM::CLibrary, {"CFRelease"}, 1},
@@ -600,7 +594,8 @@ class ClassReleaseChecker : public Checker<check::PreObjCMessage> {
   mutable Selector retainS;
   mutable Selector autoreleaseS;
   mutable Selector drainS;
-  mutable std::unique_ptr<BugType> BT;
+  const APIMisuse BT{
+      this, "message incorrectly sent to class instead of class instance"};
 
 public:
   void checkPreObjCMessage(const ObjCMethodCall &msg, CheckerContext &C) const;
@@ -609,10 +604,7 @@ public:
 
 void ClassReleaseChecker::checkPreObjCMessage(const ObjCMethodCall &msg,
                                               CheckerContext &C) const {
-  if (!BT) {
-    BT.reset(new APIMisuse(
-        this, "message incorrectly sent to class instead of class instance"));
-
+  if (releaseS.isNull()) {
     ASTContext &Ctx = C.getASTContext();
     releaseS = GetNullarySelector("release", Ctx);
     retainS = GetNullarySelector("retain", Ctx);
@@ -639,7 +631,7 @@ void ClassReleaseChecker::checkPreObjCMessage(const ObjCMethodCall &msg,
           "of class '" << Class->getName()
        << "' and not the class directly";
 
-    auto report = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), N);
+    auto report = std::make_unique<PathSensitiveBugReport>(BT, os.str(), N);
     report->addRange(msg.getSourceRange());
     C.emitReport(std::move(report));
   }
@@ -658,7 +650,8 @@ class VariadicMethodTypeChecker : public Checker<check::PreObjCMessage> {
   mutable Selector orderedSetWithObjectsS;
   mutable Selector initWithObjectsS;
   mutable Selector initWithObjectsAndKeysS;
-  mutable std::unique_ptr<BugType> BT;
+  const APIMisuse BT{this, "Arguments passed to variadic method aren't all "
+                           "Objective-C pointer types"};
 
   bool isVariadicMessage(const ObjCMethodCall &msg) const;
 
@@ -717,11 +710,7 @@ VariadicMethodTypeChecker::isVariadicMessage(const ObjCMethodCall &msg) const {
 
 void VariadicMethodTypeChecker::checkPreObjCMessage(const ObjCMethodCall &msg,
                                                     CheckerContext &C) const {
-  if (!BT) {
-    BT.reset(new APIMisuse(this,
-                           "Arguments passed to variadic method aren't all "
-                           "Objective-C pointer types"));
-
+  if (arrayWithObjectsS.isNull()) {
     ASTContext &Ctx = C.getASTContext();
     arrayWithObjectsS = GetUnarySelector("arrayWithObjects", Ctx);
     dictionaryWithObjectsAndKeysS =
@@ -792,8 +781,7 @@ void VariadicMethodTypeChecker::checkPreObjCMessage(const ObjCMethodCall &msg,
     ArgTy.print(os, C.getLangOpts());
     os << "'";
 
-    auto R =
-        std::make_unique<PathSensitiveBugReport>(*BT, os.str(), *errorNode);
+    auto R = std::make_unique<PathSensitiveBugReport>(BT, os.str(), *errorNode);
     R->addRange(msg.getArgSourceRange(I));
     C.emitReport(std::move(R));
   }
diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index 31cb150..fd0a398 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -78,35 +78,30 @@ static QualType getCharPtrType(ASTContext &Ctx, CharKind CK) {
                                                     : Ctx.WideCharTy);
 }
 
-class CStringChecker : public Checker< eval::Call,
-                                         check::PreStmt<DeclStmt>,
-                                         check::LiveSymbols,
-                                         check::DeadSymbols,
-                                         check::RegionChanges
-                                         > {
-  mutable std::unique_ptr<BugType> BT_Null, BT_Bounds, BT_Overlap,
-      BT_NotCString, BT_AdditionOverflow, BT_UninitRead;
-
+class CStringChecker
+    : public CheckerFamily<eval::Call, check::PreStmt<DeclStmt>,
+                           check::LiveSymbols, check::DeadSymbols,
+                           check::RegionChanges> {
   mutable const char *CurrentFunctionDescription = nullptr;
 
 public:
-  /// The filter is used to filter out the diagnostics which are not enabled by
-  /// the user.
-  struct CStringChecksFilter {
-    bool CheckCStringNullArg = false;
-    bool CheckCStringOutOfBounds = false;
-    bool CheckCStringBufferOverlap = false;
-    bool CheckCStringNotNullTerm = false;
-    bool CheckCStringUninitializedRead = false;
-
-    CheckerNameRef CheckNameCStringNullArg;
-    CheckerNameRef CheckNameCStringOutOfBounds;
-    CheckerNameRef CheckNameCStringBufferOverlap;
-    CheckerNameRef CheckNameCStringNotNullTerm;
-    CheckerNameRef CheckNameCStringUninitializedRead;
-  };
-
-  CStringChecksFilter Filter;
+  // FIXME: The bug types emitted by this checker family have confused garbage
+  // in their Description and Category fields (e.g. `categories::UnixAPI` is
+  // passed as the description in several cases and `uninitialized` is mistyped
+  // as `unitialized`). This should be cleaned up.
+  CheckerFrontendWithBugType NullArg{categories::UnixAPI};
+  CheckerFrontendWithBugType OutOfBounds{"Out-of-bound array access"};
+  CheckerFrontendWithBugType BufferOverlap{categories::UnixAPI,
+                                           "Improper arguments"};
+  CheckerFrontendWithBugType NotNullTerm{categories::UnixAPI};
+  CheckerFrontendWithBugType UninitializedRead{
+      "Accessing unitialized/garbage values"};
+
+  // FIXME: This bug type should be removed because it is only emitted in a
+  // situation that is practically impossible.
+  const BugType AdditionOverflow{&OutOfBounds, "API"};
+
+  StringRef getDebugTag() const override { return "MallocChecker"; }
 
   static void *getTag() { static int tag; return &tag; }
 
@@ -384,7 +379,7 @@ ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
       assumeZero(C, State, l, Arg.Expression->getType());
 
   if (stateNull && !stateNonNull) {
-    if (Filter.CheckCStringNullArg) {
+    if (NullArg.isEnabled()) {
       SmallString<80> buf;
       llvm::raw_svector_ostream OS(buf);
       assert(CurrentFunctionDescription);
@@ -468,7 +463,7 @@ ProgramStateRef CStringChecker::checkInit(CheckerContext &C,
     return State;
 
   // Ensure that we wouldn't read uninitialized value.
-  if (Filter.CheckCStringUninitializedRead &&
+  if (UninitializedRead.isEnabled() &&
       State->getSVal(*FirstElementVal).isUndef()) {
     llvm::SmallString<258> Buf;
     llvm::raw_svector_ostream OS(Buf);
@@ -524,7 +519,7 @@ ProgramStateRef CStringChecker::checkInit(CheckerContext &C,
   if (!isa<Loc>(LastElementVal))
     return State;
 
-  if (Filter.CheckCStringUninitializedRead &&
+  if (UninitializedRead.isEnabled() &&
       State->getSVal(LastElementVal.castAs<Loc>()).isUndef()) {
     const llvm::APSInt *IdxInt = LastIdx.getAsInteger();
     // If we can't get emit a sensible last element index, just bail out --
@@ -581,13 +576,9 @@ ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
 
   auto [StInBound, StOutBound] = state->assumeInBoundDual(*Idx, Size);
   if (StOutBound && !StInBound) {
-    // These checks are either enabled by the CString out-of-bounds checker
-    // explicitly or implicitly by the Malloc checker.
-    // In the latter case we only do modeling but do not emit warning.
-    if (!Filter.CheckCStringOutOfBounds)
+    if (!OutOfBounds.isEnabled())
       return nullptr;
 
-    // Emit a bug report.
     ErrorMessage Message =
         createOutOfBoundErrorMsg(CurrentFunctionDescription, Access);
     emitOutOfBoundsBug(C, StOutBound, Buffer.Expression, Message);
@@ -620,7 +611,7 @@ CStringChecker::CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
     return nullptr;
 
   // If out-of-bounds checking is turned off, skip the rest.
-  if (!Filter.CheckCStringOutOfBounds)
+  if (!OutOfBounds.isEnabled())
     return State;
 
   SVal BufStart =
@@ -670,7 +661,7 @@ ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,
                                              SizeArgExpr Size, AnyArgExpr First,
                                              AnyArgExpr Second,
                                              CharKind CK) const {
-  if (!Filter.CheckCStringBufferOverlap)
+  if (!BufferOverlap.isEnabled())
     return state;
 
   // Do a simple check for overlap: if the two arguments are from the same
@@ -789,13 +780,9 @@ void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state,
   if (!N)
     return;
 
-  if (!BT_Overlap)
-    BT_Overlap.reset(new BugType(Filter.CheckNameCStringBufferOverlap,
-                                 categories::UnixAPI, "Improper arguments"));
-
   // Generate a report for this bug.
   auto report = std::make_unique<PathSensitiveBugReport>(
-      *BT_Overlap, "Arguments must not be overlapping buffers", N);
+      BufferOverlap, "Arguments must not be overlapping buffers", N);
   report->addRange(First->getSourceRange());
   report->addRange(Second->getSourceRange());
 
@@ -805,15 +792,8 @@ void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state,
 void CStringChecker::emitNullArgBug(CheckerContext &C, ProgramStateRef State,
                                     const Stmt *S, StringRef WarningMsg) const {
   if (ExplodedNode *N = C.generateErrorNode(State)) {
-    if (!BT_Null) {
-      // FIXME: This call uses the string constant 'categories::UnixAPI' as the
-      // description of the bug; it should be replaced by a real description.
-      BT_Null.reset(
-          new BugType(Filter.CheckNameCStringNullArg, categories::UnixAPI));
-    }
-
     auto Report =
-        std::make_unique<PathSensitiveBugReport>(*BT_Null, WarningMsg, N);
+        std::make_unique<PathSensitiveBugReport>(NullArg, WarningMsg, N);
     Report->addRange(S->getSourceRange());
     if (const auto *Ex = dyn_cast<Expr>(S))
       bugreporter::trackExpressionValue(N, Ex, *Report);
@@ -826,12 +806,8 @@ void CStringChecker::emitUninitializedReadBug(CheckerContext &C,
                                               const Expr *E, const MemRegion *R,
                                               StringRef Msg) const {
   if (ExplodedNode *N = C.generateErrorNode(State)) {
-    if (!BT_UninitRead)
-      BT_UninitRead.reset(new BugType(Filter.CheckNameCStringUninitializedRead,
-                                      "Accessing unitialized/garbage values"));
-
     auto Report =
-        std::make_unique<PathSensitiveBugReport>(*BT_UninitRead, Msg, N);
+        std::make_unique<PathSensitiveBugReport>(UninitializedRead, Msg, N);
     Report->addNote("Other elements might also be undefined",
                     Report->getLocation());
     Report->addRange(E->getSourceRange());
@@ -845,17 +821,11 @@ void CStringChecker::emitOutOfBoundsBug(CheckerContext &C,
                                         ProgramStateRef State, const Stmt *S,
                                         StringRef WarningMsg) const {
   if (ExplodedNode *N = C.generateErrorNode(State)) {
-    if (!BT_Bounds)
-      BT_Bounds.reset(new BugType(Filter.CheckCStringOutOfBounds
-                                      ? Filter.CheckNameCStringOutOfBounds
-                                      : Filter.CheckNameCStringNullArg,
-                                  "Out-of-bound array access"));
-
     // FIXME: It would be nice to eventually make this diagnostic more clear,
     // e.g., by referencing the original declaration or by saying *why* this
     // reference is outside the range.
     auto Report =
-        std::make_unique<PathSensitiveBugReport>(*BT_Bounds, WarningMsg, N);
+        std::make_unique<PathSensitiveBugReport>(OutOfBounds, WarningMsg, N);
     Report->addRange(S->getSourceRange());
     C.emitReport(std::move(Report));
   }
@@ -865,15 +835,8 @@ void CStringChecker::emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
                                        const Stmt *S,
                                        StringRef WarningMsg) const {
   if (ExplodedNode *N = C.generateNonFatalErrorNode(State)) {
-    if (!BT_NotCString) {
-      // FIXME: This call uses the string constant 'categories::UnixAPI' as the
-      // description of the bug; it should be replaced by a real description.
-      BT_NotCString.reset(
-          new BugType(Filter.CheckNameCStringNotNullTerm, categories::UnixAPI));
-    }
-
     auto Report =
-        std::make_unique<PathSensitiveBugReport>(*BT_NotCString, WarningMsg, N);
+        std::make_unique<PathSensitiveBugReport>(NotNullTerm, WarningMsg, N);
 
     Report->addRange(S->getSourceRange());
     C.emitReport(std::move(Report));
@@ -883,14 +846,6 @@ void CStringChecker::emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
 void CStringChecker::emitAdditionOverflowBug(CheckerContext &C,
                                              ProgramStateRef State) const {
   if (ExplodedNode *N = C.generateErrorNode(State)) {
-    if (!BT_AdditionOverflow) {
-      // FIXME: This call uses the word "API" as the description of the bug;
-      // it should be replaced by a better error message (if this unlikely
-      // situation continues to exist as a separate bug type).
-      BT_AdditionOverflow.reset(
-          new BugType(Filter.CheckNameCStringOutOfBounds, "API"));
-    }
-
     // This isn't a great error message, but this should never occur in real
     // code anyway -- you'd have to create a buffer longer than a size_t can
     // represent, which is sort of a contradiction.
@@ -898,7 +853,7 @@ void CStringChecker::emitAdditionOverflowBug(CheckerContext &C,
         "This expression will create a string whose length is too big to "
         "be represented as a size_t";
 
-    auto Report = std::make_unique<PathSensitiveBugReport>(*BT_AdditionOverflow,
+    auto Report = std::make_unique<PathSensitiveBugReport>(AdditionOverflow,
                                                            WarningMsg, N);
     C.emitReport(std::move(Report));
   }
@@ -909,7 +864,7 @@ ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
                                                      NonLoc left,
                                                      NonLoc right) const {
   // If out-of-bounds checking is turned off, skip the rest.
-  if (!Filter.CheckCStringOutOfBounds)
+  if (!OutOfBounds.isEnabled())
     return state;
 
   // If a previous check has failed, propagate the failure.
@@ -1048,7 +1003,7 @@ SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
     // C string. In the context of locations, the only time we can issue such
     // a warning is for labels.
     if (std::optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) {
-      if (Filter.CheckCStringNotNullTerm) {
+      if (NotNullTerm.isEnabled()) {
         SmallString<120> buf;
         llvm::raw_svector_ostream os(buf);
         assert(CurrentFunctionDescription);
@@ -1110,7 +1065,7 @@ SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
     // Other regions (mostly non-data) can't have a reliable C string length.
     // In this case, an error is emitted and UndefinedVal is returned.
     // The caller should always be prepared to handle this case.
-    if (Filter.CheckCStringNotNullTerm) {
+    if (NotNullTerm.isEnabled()) {
       SmallString<120> buf;
       llvm::raw_svector_ostream os(buf);
 
@@ -2873,24 +2828,27 @@ void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
 }
 
 void ento::registerCStringModeling(CheckerManager &Mgr) {
-  Mgr.registerChecker<CStringChecker>();
+  // Other checker relies on the modeling implemented in this checker family,
+  // so this "modeling checker" can register the 'CStringChecker' backend for
+  // its callbacks without enabling any of its frontends.
+  Mgr.getChecker<CStringChecker>();
 }
 
-bool ento::shouldRegisterCStringModeling(const CheckerManager &mgr) {
+bool ento::shouldRegisterCStringModeling(const CheckerManager &) {
   return true;
 }
 
-#define REGISTER_CHECKER(name)                                                 \
-  void ento::register##name(CheckerManager &mgr) {                             \
-    CStringChecker *checker = mgr.getChecker<CStringChecker>();                \
-    checker->Filter.Check##name = true;                                        \
-    checker->Filter.CheckName##name = mgr.getCurrentCheckerName();             \
+#define REGISTER_CHECKER(NAME)                                                 \
+  void ento::registerCString##NAME(CheckerManager &Mgr) {                      \
+    Mgr.getChecker<CStringChecker>()->NAME.enable(Mgr);                        \
   }                                                                            \
                                                                                \
-  bool ento::shouldRegister##name(const CheckerManager &mgr) { return true; }
+  bool ento::shouldRegisterCString##NAME(const CheckerManager &) {             \
+    return true;                                                               \
+  }
 
-REGISTER_CHECKER(CStringNullArg)
-REGISTER_CHECKER(CStringOutOfBounds)
-REGISTER_CHECKER(CStringBufferOverlap)
-REGISTER_CHECKER(CStringNotNullTerm)
-REGISTER_CHECKER(CStringUninitializedRead)
+REGISTER_CHECKER(NullArg)
+REGISTER_CHECKER(OutOfBounds)
+REGISTER_CHECKER(BufferOverlap)
+REGISTER_CHECKER(NotNullTerm)
+REGISTER_CHECKER(UninitializedRead)
diff --git a/clang/lib/StaticAnalyzer/Checkers/CheckPlacementNew.cpp b/clang/lib/StaticAnalyzer/Checkers/CheckPlacementNew.cpp
index 839c8bc..a227ca0 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CheckPlacementNew.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CheckPlacementNew.cpp
@@ -111,32 +111,12 @@ bool PlacementNewChecker::checkPlaceCapacityIsSufficient(
   if (!SizeOfPlaceCI)
     return true;
 
-  if ((SizeOfPlaceCI->getValue() < SizeOfTargetCI->getValue()) ||
-      (IsArrayTypeAllocated &&
-       SizeOfPlaceCI->getValue() >= SizeOfTargetCI->getValue())) {
+  if ((SizeOfPlaceCI->getValue() < SizeOfTargetCI->getValue())) {
     if (ExplodedNode *N = C.generateErrorNode(C.getState())) {
-      std::string Msg;
-      // TODO: use clang constant
-      if (IsArrayTypeAllocated &&
-          SizeOfPlaceCI->getValue() > SizeOfTargetCI->getValue())
-        Msg = std::string(llvm::formatv(
-            "{0} bytes is possibly not enough for array allocation which "
-            "requires {1} bytes. Current overhead requires the size of {2} "
-            "bytes",
-            SizeOfPlaceCI->getValue(), SizeOfTargetCI->getValue(),
-            *SizeOfPlaceCI->getValue() - SizeOfTargetCI->getValue()));
-      else if (IsArrayTypeAllocated &&
-               SizeOfPlaceCI->getValue() == SizeOfTargetCI->getValue())
-        Msg = std::string(llvm::formatv(
-            "Storage provided to placement new is only {0} bytes, "
-            "whereas the allocated array type requires more space for "
-            "internal needs",
-            SizeOfPlaceCI->getValue()));
-      else
-        Msg = std::string(llvm::formatv(
-            "Storage provided to placement new is only {0} bytes, "
-            "whereas the allocated type requires {1} bytes",
-            SizeOfPlaceCI->getValue(), SizeOfTargetCI->getValue()));
+      std::string Msg =
+          llvm::formatv("Storage provided to placement new is only {0} bytes, "
+                        "whereas the allocated type requires {1} bytes",
+                        SizeOfPlaceCI->getValue(), SizeOfTargetCI->getValue());
 
       auto R = std::make_unique<PathSensitiveBugReport>(SBT, Msg, N);
       bugreporter::trackExpressionValue(N, NE->getPlacementArg(0), *R);
diff --git a/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp
index d7eea7e..152129e 100644
--- a/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp
@@ -25,18 +25,22 @@ using namespace clang;
 using namespace ento;
 
 namespace {
+
+class DerefBugType : public BugType {
+  StringRef ArrayMsg, FieldMsg;
+
+public:
+  DerefBugType(CheckerFrontend *FE, StringRef Desc, const char *AMsg,
+               const char *FMsg = nullptr)
+      : BugType(FE, Desc), ArrayMsg(AMsg), FieldMsg(FMsg ? FMsg : AMsg) {}
+  StringRef getArrayMsg() const { return ArrayMsg; }
+  StringRef getFieldMsg() const { return FieldMsg; }
+};
+
 class DereferenceChecker
-    : public Checker< check::Location,
-                      check::Bind,
-                      EventDispatcher<ImplicitNullDerefEvent> > {
-  enum DerefKind {
-    NullPointer,
-    UndefinedPointerValue,
-    AddressOfLabel,
-    FixedAddress,
-  };
-
-  void reportBug(DerefKind K, ProgramStateRef State, const Stmt *S,
+    : public CheckerFamily<check::Location, check::Bind,
+                           EventDispatcher<ImplicitNullDerefEvent>> {
+  void reportBug(const DerefBugType &BT, ProgramStateRef State, const Stmt *S,
                  CheckerContext &C) const;
 
   bool suppressReport(CheckerContext &C, const Expr *E) const;
@@ -52,13 +56,23 @@ public:
                              const LocationContext *LCtx,
                              bool loadedFrom = false);
 
-  bool CheckNullDereference = false;
-  bool CheckFixedDereference = false;
-
-  std::unique_ptr<BugType> BT_Null;
-  std::unique_ptr<BugType> BT_Undef;
-  std::unique_ptr<BugType> BT_Label;
-  std::unique_ptr<BugType> BT_FixedAddress;
+  CheckerFrontend NullDerefChecker, FixedDerefChecker;
+  const DerefBugType NullBug{&NullDerefChecker, "Dereference of null pointer",
+                             "a null pointer dereference",
+                             "a dereference of a null pointer"};
+  const DerefBugType UndefBug{&NullDerefChecker,
+                              "Dereference of undefined pointer value",
+                              "an undefined pointer dereference",
+                              "a dereference of an undefined pointer value"};
+  const DerefBugType LabelBug{&NullDerefChecker,
+                              "Dereference of the address of a label",
+                              "an undefined pointer dereference",
+                              "a dereference of an address of a label"};
+  const DerefBugType FixedAddressBug{&FixedDerefChecker,
+                                     "Dereference of a fixed address",
+                                     "a dereference of a fixed address"};
+
+  StringRef getDebugTag() const override { return "DereferenceChecker"; }
 };
 } // end anonymous namespace
 
@@ -158,115 +172,87 @@ static bool isDeclRefExprToReference(const Expr *E) {
   return false;
 }
 
-void DereferenceChecker::reportBug(DerefKind K, ProgramStateRef State,
-                                   const Stmt *S, CheckerContext &C) const {
-  const BugType *BT = nullptr;
-  llvm::StringRef DerefStr1;
-  llvm::StringRef DerefStr2;
-  switch (K) {
-  case DerefKind::NullPointer:
-    if (!CheckNullDereference) {
-      C.addSink();
-      return;
-    }
-    BT = BT_Null.get();
-    DerefStr1 = " results in a null pointer dereference";
-    DerefStr2 = " results in a dereference of a null pointer";
-    break;
-  case DerefKind::UndefinedPointerValue:
-    if (!CheckNullDereference) {
-      C.addSink();
+void DereferenceChecker::reportBug(const DerefBugType &BT,
+                                   ProgramStateRef State, const Stmt *S,
+                                   CheckerContext &C) const {
+  if (&BT == &FixedAddressBug) {
+    if (!FixedDerefChecker.isEnabled())
+      // Deliberately don't add a sink node if check is disabled.
+      // This situation may be valid in special cases.
       return;
-    }
-    BT = BT_Undef.get();
-    DerefStr1 = " results in an undefined pointer dereference";
-    DerefStr2 = " results in a dereference of an undefined pointer value";
-    break;
-  case DerefKind::AddressOfLabel:
-    if (!CheckNullDereference) {
+  } else {
+    if (!NullDerefChecker.isEnabled()) {
       C.addSink();
       return;
     }
-    BT = BT_Label.get();
-    DerefStr1 = " results in an undefined pointer dereference";
-    DerefStr2 = " results in a dereference of an address of a label";
-    break;
-  case DerefKind::FixedAddress:
-    // Deliberately don't add a sink node if check is disabled.
-    // This situation may be valid in special cases.
-    if (!CheckFixedDereference)
-      return;
-
-    BT = BT_FixedAddress.get();
-    DerefStr1 = " results in a dereference of a fixed address";
-    DerefStr2 = " results in a dereference of a fixed address";
-    break;
-  };
+  }
 
   // Generate an error node.
   ExplodedNode *N = C.generateErrorNode(State);
   if (!N)
     return;
 
-  SmallString<100> buf;
-  llvm::raw_svector_ostream os(buf);
+  SmallString<100> Buf;
+  llvm::raw_svector_ostream Out(Buf);
 
   SmallVector<SourceRange, 2> Ranges;
 
   switch (S->getStmtClass()) {
   case Stmt::ArraySubscriptExprClass: {
-    os << "Array access";
+    Out << "Array access";
     const ArraySubscriptExpr *AE = cast<ArraySubscriptExpr>(S);
-    AddDerefSource(os, Ranges, AE->getBase()->IgnoreParenCasts(),
-                   State.get(), N->getLocationContext());
-    os << DerefStr1;
+    AddDerefSource(Out, Ranges, AE->getBase()->IgnoreParenCasts(), State.get(),
+                   N->getLocationContext());
+    Out << " results in " << BT.getArrayMsg();
     break;
   }
   case Stmt::ArraySectionExprClass: {
-    os << "Array access";
+    Out << "Array access";
     const ArraySectionExpr *AE = cast<ArraySectionExpr>(S);
-    AddDerefSource(os, Ranges, AE->getBase()->IgnoreParenCasts(),
-                   State.get(), N->getLocationContext());
-    os << DerefStr1;
+    AddDerefSource(Out, Ranges, AE->getBase()->IgnoreParenCasts(), State.get(),
+                   N->getLocationContext());
+    Out << " results in " << BT.getArrayMsg();
     break;
   }
   case Stmt::UnaryOperatorClass: {
-    os << BT->getDescription();
+    Out << BT.getDescription();
     const UnaryOperator *U = cast<UnaryOperator>(S);
-    AddDerefSource(os, Ranges, U->getSubExpr()->IgnoreParens(),
-                   State.get(), N->getLocationContext(), true);
+    AddDerefSource(Out, Ranges, U->getSubExpr()->IgnoreParens(), State.get(),
+                   N->getLocationContext(), true);
     break;
   }
   case Stmt::MemberExprClass: {
     const MemberExpr *M = cast<MemberExpr>(S);
     if (M->isArrow() || isDeclRefExprToReference(M->getBase())) {
-      os << "Access to field '" << M->getMemberNameInfo() << "'" << DerefStr2;
-      AddDerefSource(os, Ranges, M->getBase()->IgnoreParenCasts(),
-                     State.get(), N->getLocationContext(), true);
+      Out << "Access to field '" << M->getMemberNameInfo() << "' results in "
+          << BT.getFieldMsg();
+      AddDerefSource(Out, Ranges, M->getBase()->IgnoreParenCasts(), State.get(),
+                     N->getLocationContext(), true);
     }
     break;
   }
   case Stmt::ObjCIvarRefExprClass: {
     const ObjCIvarRefExpr *IV = cast<ObjCIvarRefExpr>(S);
-    os << "Access to instance variable '" << *IV->getDecl() << "'" << DerefStr2;
-    AddDerefSource(os, Ranges, IV->getBase()->IgnoreParenCasts(),
-                   State.get(), N->getLocationContext(), true);
+    Out << "Access to instance variable '" << *IV->getDecl() << "' results in "
+        << BT.getFieldMsg();
+    AddDerefSource(Out, Ranges, IV->getBase()->IgnoreParenCasts(), State.get(),
+                   N->getLocationContext(), true);
     break;
   }
   default:
     break;
   }
 
-  auto report = std::make_unique<PathSensitiveBugReport>(
-      *BT, buf.empty() ? BT->getDescription() : buf.str(), N);
+  auto BR = std::make_unique<PathSensitiveBugReport>(
+      BT, Buf.empty() ? BT.getDescription() : Buf.str(), N);
 
-  bugreporter::trackExpressionValue(N, bugreporter::getDerefExpr(S), *report);
+  bugreporter::trackExpressionValue(N, bugreporter::getDerefExpr(S), *BR);
 
   for (SmallVectorImpl<SourceRange>::iterator
        I = Ranges.begin(), E = Ranges.end(); I!=E; ++I)
-    report->addRange(*I);
+    BR->addRange(*I);
 
-  C.emitReport(std::move(report));
+  C.emitReport(std::move(BR));
 }
 
 void DereferenceChecker::checkLocation(SVal l, bool isLoad, const Stmt* S,
@@ -275,7 +261,7 @@ void DereferenceChecker::checkLocation(SVal l, bool isLoad, const Stmt* S,
   if (l.isUndef()) {
     const Expr *DerefExpr = getDereferenceExpr(S);
     if (!suppressReport(C, DerefExpr))
-      reportBug(DerefKind::UndefinedPointerValue, C.getState(), DerefExpr, C);
+      reportBug(UndefBug, C.getState(), DerefExpr, C);
     return;
   }
 
@@ -296,7 +282,7 @@ void DereferenceChecker::checkLocation(SVal l, bool isLoad, const Stmt* S,
       // we call an "explicit" null dereference.
       const Expr *expr = getDereferenceExpr(S);
       if (!suppressReport(C, expr)) {
-        reportBug(DerefKind::NullPointer, nullState, expr, C);
+        reportBug(NullBug, nullState, expr, C);
         return;
       }
     }
@@ -314,7 +300,7 @@ void DereferenceChecker::checkLocation(SVal l, bool isLoad, const Stmt* S,
   if (location.isConstant()) {
     const Expr *DerefExpr = getDereferenceExpr(S, isLoad);
     if (!suppressReport(C, DerefExpr))
-      reportBug(DerefKind::FixedAddress, notNullState, DerefExpr, C);
+      reportBug(FixedAddressBug, notNullState, DerefExpr, C);
     return;
   }
 
@@ -330,7 +316,7 @@ void DereferenceChecker::checkBind(SVal L, SVal V, const Stmt *S,
 
   // One should never write to label addresses.
   if (auto Label = L.getAs<loc::GotoLabel>()) {
-    reportBug(DerefKind::AddressOfLabel, C.getState(), S, C);
+    reportBug(LabelBug, C.getState(), S, C);
     return;
   }
 
@@ -351,7 +337,7 @@ void DereferenceChecker::checkBind(SVal L, SVal V, const Stmt *S,
     if (!StNonNull) {
       const Expr *expr = getDereferenceExpr(S, /*IsBind=*/true);
       if (!suppressReport(C, expr)) {
-        reportBug(DerefKind::NullPointer, StNull, expr, C);
+        reportBug(NullBug, StNull, expr, C);
         return;
       }
     }
@@ -369,7 +355,7 @@ void DereferenceChecker::checkBind(SVal L, SVal V, const Stmt *S,
   if (V.isConstant()) {
     const Expr *DerefExpr = getDereferenceExpr(S, true);
     if (!suppressReport(C, DerefExpr))
-      reportBug(DerefKind::FixedAddress, State, DerefExpr, C);
+      reportBug(FixedAddressBug, State, DerefExpr, C);
     return;
   }
 
@@ -392,26 +378,8 @@ void DereferenceChecker::checkBind(SVal L, SVal V, const Stmt *S,
   C.addTransition(State, this);
 }
 
-void ento::registerDereferenceModeling(CheckerManager &Mgr) {
-  Mgr.registerChecker<DereferenceChecker>();
-}
-
-bool ento::shouldRegisterDereferenceModeling(const CheckerManager &) {
-  return true;
-}
-
 void ento::registerNullDereferenceChecker(CheckerManager &Mgr) {
-  auto *Chk = Mgr.getChecker<DereferenceChecker>();
-  Chk->CheckNullDereference = true;
-  Chk->BT_Null.reset(new BugType(Mgr.getCurrentCheckerName(),
-                                 "Dereference of null pointer",
-                                 categories::LogicError));
-  Chk->BT_Undef.reset(new BugType(Mgr.getCurrentCheckerName(),
-                                  "Dereference of undefined pointer value",
-                                  categories::LogicError));
-  Chk->BT_Label.reset(new BugType(Mgr.getCurrentCheckerName(),
-                                  "Dereference of the address of a label",
-                                  categories::LogicError));
+  Mgr.getChecker<DereferenceChecker>()->NullDerefChecker.enable(Mgr);
 }
 
 bool ento::shouldRegisterNullDereferenceChecker(const CheckerManager &) {
@@ -419,11 +387,7 @@ bool ento::shouldRegisterNullDereferenceChecker(const CheckerManager &) {
 }
 
 void ento::registerFixedAddressDereferenceChecker(CheckerManager &Mgr) {
-  auto *Chk = Mgr.getChecker<DereferenceChecker>();
-  Chk->CheckFixedDereference = true;
-  Chk->BT_FixedAddress.reset(new BugType(Mgr.getCurrentCheckerName(),
-                                         "Dereference of a fixed address",
-                                         categories::LogicError));
+  Mgr.getChecker<DereferenceChecker>()->FixedDerefChecker.enable(Mgr);
 }
 
 bool ento::shouldRegisterFixedAddressDereferenceChecker(
diff --git a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
index 68efdba..369d619 100644
--- a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
@@ -2693,7 +2693,7 @@ void MallocChecker::HandleUseAfterFree(CheckerContext &C, SourceRange Range,
         Frontend->UseFreeBug,
         AF.Kind == AF_InnerBuffer
             ? "Inner pointer of container used after re/deallocation"
-            : "Use of memory after it is freed",
+            : "Use of memory after it is released",
         N);
 
     R->markInteresting(Sym);
@@ -2721,8 +2721,8 @@ void MallocChecker::HandleDoubleFree(CheckerContext &C, SourceRange Range,
   if (ExplodedNode *N = C.generateErrorNode()) {
     auto R = std::make_unique<PathSensitiveBugReport>(
         Frontend->DoubleFreeBug,
-        (Released ? "Attempt to free released memory"
-                  : "Attempt to free non-owned memory"),
+        (Released ? "Attempt to release already released memory"
+                  : "Attempt to release non-owned memory"),
         N);
     if (Range.isValid())
       R->addRange(Range);
@@ -3730,13 +3730,15 @@ PathDiagnosticPieceRef MallocBugVisitor::VisitNode(const ExplodedNode *N,
           return nullptr;
         }
 
-        // Save the first destructor/function as release point.
-        assert(!ReleaseFunctionLC && "There should be only one release point");
+        // Record the stack frame that is _responsible_ for this memory release
+        // event. This will be used by the false positive suppression heuristics
+        // that recognize the release points of reference-counted objects.
+        //
+        // Usually (e.g. in C) we say that the _responsible_ stack frame is the
+        // current innermost stack frame:
         ReleaseFunctionLC = CurrentLC->getStackFrame();
-
-        // See if we're releasing memory while inlining a destructor that
-        // decrement reference counters (or one of its callees).
-        // This turns on various common false positive suppressions.
+        // ...but if the stack contains a destructor call, then we say that the
+        // outermost destructor stack frame is the _responsible_ one:
         for (const LocationContext *LC = CurrentLC; LC; LC = LC->getParent()) {
           if (const auto *DD = dyn_cast<CXXDestructorDecl>(LC->getDecl())) {
             if (isReferenceCountingPointerDestructor(DD)) {
diff --git a/clang/lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp
index 15fd9a0..d2760ca 100644
--- a/clang/lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp
@@ -142,34 +142,19 @@ void CFErrorFunctionChecker::checkASTDecl(const FunctionDecl *D,
 //===----------------------------------------------------------------------===//
 
 namespace {
+class NSOrCFErrorDerefChecker
+    : public CheckerFamily<check::Location,
+                           check::Event<ImplicitNullDerefEvent>> {
+  mutable IdentifierInfo *NSErrorII = nullptr;
+  mutable IdentifierInfo *CFErrorII = nullptr;
 
-class NSErrorDerefBug : public BugType {
-public:
-  NSErrorDerefBug(const CheckerNameRef Checker)
-      : BugType(Checker, "NSError** null dereference",
-                "Coding conventions (Apple)") {}
-};
-
-class CFErrorDerefBug : public BugType {
 public:
-  CFErrorDerefBug(const CheckerNameRef Checker)
-      : BugType(Checker, "CFErrorRef* null dereference",
-                "Coding conventions (Apple)") {}
-};
-
-}
+  CheckerFrontendWithBugType NSError{"NSError** null dereference",
+                                     "Coding conventions (Apple)"};
+  CheckerFrontendWithBugType CFError{"CFErrorRef* null dereference",
+                                     "Coding conventions (Apple)"};
 
-namespace {
-class NSOrCFErrorDerefChecker
-    : public Checker< check::Location,
-                        check::Event<ImplicitNullDerefEvent> > {
-  mutable IdentifierInfo *NSErrorII, *CFErrorII;
-  mutable std::unique_ptr<NSErrorDerefBug> NSBT;
-  mutable std::unique_ptr<CFErrorDerefBug> CFBT;
-public:
-  bool ShouldCheckNSError = false, ShouldCheckCFError = false;
-  CheckerNameRef NSErrorName, CFErrorName;
-  NSOrCFErrorDerefChecker() : NSErrorII(nullptr), CFErrorII(nullptr) {}
+  StringRef getDebugTag() const override { return "NSOrCFErrorDerefChecker"; }
 
   void checkLocation(SVal loc, bool isLoad, const Stmt *S,
                      CheckerContext &C) const;
@@ -236,12 +221,12 @@ void NSOrCFErrorDerefChecker::checkLocation(SVal loc, bool isLoad,
   if (!CFErrorII)
     CFErrorII = &Ctx.Idents.get("CFErrorRef");
 
-  if (ShouldCheckNSError && IsNSError(parmT, NSErrorII)) {
+  if (NSError.isEnabled() && IsNSError(parmT, NSErrorII)) {
     setFlag<NSErrorOut>(state, state->getSVal(loc.castAs<Loc>()), C);
     return;
   }
 
-  if (ShouldCheckCFError && IsCFError(parmT, CFErrorII)) {
+  if (CFError.isEnabled() && IsCFError(parmT, CFErrorII)) {
     setFlag<CFErrorOut>(state, state->getSVal(loc.castAs<Loc>()), C);
     return;
   }
@@ -274,19 +259,9 @@ void NSOrCFErrorDerefChecker::checkEvent(ImplicitNullDerefEvent event) const {
 
   os  << " may be null";
 
-  BugType *bug = nullptr;
-  if (isNSError) {
-    if (!NSBT)
-      NSBT.reset(new NSErrorDerefBug(NSErrorName));
-    bug = NSBT.get();
-  }
-  else {
-    if (!CFBT)
-      CFBT.reset(new CFErrorDerefBug(CFErrorName));
-    bug = CFBT.get();
-  }
+  const BugType &BT = isNSError ? NSError : CFError;
   BR.emitReport(
-      std::make_unique<PathSensitiveBugReport>(*bug, os.str(), event.SinkNode));
+      std::make_unique<PathSensitiveBugReport>(BT, os.str(), event.SinkNode));
 }
 
 static bool IsNSError(QualType T, IdentifierInfo *II) {
@@ -320,32 +295,21 @@ static bool IsCFError(QualType T, IdentifierInfo *II) {
   return TT->getDecl()->getIdentifier() == II;
 }
 
-void ento::registerNSOrCFErrorDerefChecker(CheckerManager &mgr) {
-  mgr.registerChecker<NSOrCFErrorDerefChecker>();
-}
-
-bool ento::shouldRegisterNSOrCFErrorDerefChecker(const CheckerManager &mgr) {
-  return true;
-}
-
-void ento::registerNSErrorChecker(CheckerManager &mgr) {
-  mgr.registerChecker<NSErrorMethodChecker>();
-  NSOrCFErrorDerefChecker *checker = mgr.getChecker<NSOrCFErrorDerefChecker>();
-  checker->ShouldCheckNSError = true;
-  checker->NSErrorName = mgr.getCurrentCheckerName();
-}
-
-bool ento::shouldRegisterNSErrorChecker(const CheckerManager &mgr) {
-  return true;
-}
-
-void ento::registerCFErrorChecker(CheckerManager &mgr) {
-  mgr.registerChecker<CFErrorFunctionChecker>();
-  NSOrCFErrorDerefChecker *checker = mgr.getChecker<NSOrCFErrorDerefChecker>();
-  checker->ShouldCheckCFError = true;
-  checker->CFErrorName = mgr.getCurrentCheckerName();
-}
+// This source file implements two user-facing checkers ("osx.cocoa.NSError"
+// and "osx.coreFoundation.CFError") which are both implemented as the
+// combination of two `CheckerFrontend`s that are registered under the same
+// name (but otherwise act independently). Among these 2+2 `CheckerFrontend`s
+// two are coming from the checker family `NSOrCFErrorDerefChecker` while the
+// other two (the `ADDITIONAL_PART`s) are small standalone checkers.
+#define REGISTER_CHECKER(NAME, ADDITIONAL_PART)                                \
+  void ento::register##NAME##Checker(CheckerManager &Mgr) {                    \
+    Mgr.getChecker<NSOrCFErrorDerefChecker>()->NAME.enable(Mgr);               \
+    Mgr.registerChecker<ADDITIONAL_PART>();                                    \
+  }                                                                            \
+                                                                               \
+  bool ento::shouldRegister##NAME##Checker(const CheckerManager &) {           \
+    return true;                                                               \
+  }
 
-bool ento::shouldRegisterCFErrorChecker(const CheckerManager &mgr) {
-  return true;
-}
+REGISTER_CHECKER(NSError, NSErrorMethodChecker)
+REGISTER_CHECKER(CFError, CFErrorFunctionChecker)
diff --git a/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp
index a63497c..019e81f 100644
--- a/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp
@@ -28,23 +28,22 @@ using namespace ento;
 
 namespace {
 class StackAddrEscapeChecker
-    : public Checker<check::PreCall, check::PreStmt<ReturnStmt>,
-                     check::EndFunction> {
+    : public CheckerFamily<check::PreCall, check::PreStmt<ReturnStmt>,
+                           check::EndFunction> {
   mutable IdentifierInfo *dispatch_semaphore_tII = nullptr;
-  mutable std::unique_ptr<BugType> BT_stackleak;
-  mutable std::unique_ptr<BugType> BT_returnstack;
-  mutable std::unique_ptr<BugType> BT_capturedstackasync;
-  mutable std::unique_ptr<BugType> BT_capturedstackret;
 
 public:
-  enum CheckKind {
-    CK_StackAddrEscapeChecker,
-    CK_StackAddrAsyncEscapeChecker,
-    CK_NumCheckKinds
-  };
+  StringRef getDebugTag() const override { return "StackAddrEscapeChecker"; }
+
+  CheckerFrontend StackAddrEscape;
+  CheckerFrontend StackAddrAsyncEscape;
 
-  bool ChecksEnabled[CK_NumCheckKinds] = {false};
-  CheckerNameRef CheckNames[CK_NumCheckKinds];
+  const BugType StackLeak{&StackAddrEscape,
+                          "Stack address leaks outside of stack frame"};
+  const BugType ReturnStack{&StackAddrEscape,
+                            "Return of address to stack-allocated memory"};
+  const BugType CapturedStackAsync{
+      &StackAddrAsyncEscape, "Address of stack-allocated memory is captured"};
 
   void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
   void checkPreStmt(const ReturnStmt *RS, CheckerContext &C) const;
@@ -170,10 +169,6 @@ void StackAddrEscapeChecker::EmitReturnLeakError(CheckerContext &C,
   ExplodedNode *N = C.generateNonFatalErrorNode();
   if (!N)
     return;
-  if (!BT_returnstack)
-    BT_returnstack = std::make_unique<BugType>(
-        CheckNames[CK_StackAddrEscapeChecker],
-        "Return of address to stack-allocated memory");
 
   // Generate a report for this bug.
   SmallString<128> buf;
@@ -184,7 +179,7 @@ void StackAddrEscapeChecker::EmitReturnLeakError(CheckerContext &C,
   EmitReturnedAsPartOfError(os, C.getSVal(RetE), R);
 
   auto report =
-      std::make_unique<PathSensitiveBugReport>(*BT_returnstack, os.str(), N);
+      std::make_unique<PathSensitiveBugReport>(ReturnStack, os.str(), N);
   report->addRange(RetE->getSourceRange());
   if (range.isValid())
     report->addRange(range);
@@ -215,16 +210,12 @@ void StackAddrEscapeChecker::checkAsyncExecutedBlockCaptures(
     ExplodedNode *N = C.generateNonFatalErrorNode();
     if (!N)
       continue;
-    if (!BT_capturedstackasync)
-      BT_capturedstackasync = std::make_unique<BugType>(
-          CheckNames[CK_StackAddrAsyncEscapeChecker],
-          "Address of stack-allocated memory is captured");
     SmallString<128> Buf;
     llvm::raw_svector_ostream Out(Buf);
     SourceRange Range = genName(Out, Region, C.getASTContext());
     Out << " is captured by an asynchronously-executed block";
-    auto Report = std::make_unique<PathSensitiveBugReport>(
-        *BT_capturedstackasync, Out.str(), N);
+    auto Report = std::make_unique<PathSensitiveBugReport>(CapturedStackAsync,
+                                                           Out.str(), N);
     if (Range.isValid())
       Report->addRange(Range);
     C.emitReport(std::move(Report));
@@ -233,7 +224,7 @@ void StackAddrEscapeChecker::checkAsyncExecutedBlockCaptures(
 
 void StackAddrEscapeChecker::checkPreCall(const CallEvent &Call,
                                           CheckerContext &C) const {
-  if (!ChecksEnabled[CK_StackAddrAsyncEscapeChecker])
+  if (!StackAddrAsyncEscape.isEnabled())
     return;
   if (!Call.isGlobalCFunction("dispatch_after") &&
       !Call.isGlobalCFunction("dispatch_async"))
@@ -357,7 +348,7 @@ FindEscapingStackRegions(CheckerContext &C, const Expr *RetE, SVal RetVal) {
 
 void StackAddrEscapeChecker::checkPreStmt(const ReturnStmt *RS,
                                           CheckerContext &C) const {
-  if (!ChecksEnabled[CK_StackAddrEscapeChecker])
+  if (!StackAddrEscape.isEnabled())
     return;
 
   const Expr *RetE = RS->getRetValue();
@@ -456,7 +447,7 @@ static bool isInvalidatedSymbolRegion(const MemRegion *Region) {
 
 void StackAddrEscapeChecker::checkEndFunction(const ReturnStmt *RS,
                                               CheckerContext &Ctx) const {
-  if (!ChecksEnabled[CK_StackAddrEscapeChecker])
+  if (!StackAddrEscape.isEnabled())
     return;
 
   ExplodedNode *Node = Ctx.getPredecessor();
@@ -581,11 +572,6 @@ void StackAddrEscapeChecker::checkEndFunction(const ReturnStmt *RS,
   if (!N)
     return;
 
-  if (!BT_stackleak)
-    BT_stackleak =
-        std::make_unique<BugType>(CheckNames[CK_StackAddrEscapeChecker],
-                                  "Stack address leaks outside of stack frame");
-
   for (const auto &P : Cb.V) {
     const MemRegion *Referrer = P.first->getBaseRegion();
     const MemRegion *Referred = P.second;
@@ -604,7 +590,7 @@ void StackAddrEscapeChecker::checkEndFunction(const ReturnStmt *RS,
       Out << " is still referred to by a temporary object on the stack"
           << CommonSuffix;
       auto Report =
-          std::make_unique<PathSensitiveBugReport>(*BT_stackleak, Out.str(), N);
+          std::make_unique<PathSensitiveBugReport>(StackLeak, Out.str(), N);
       if (Range.isValid())
         Report->addRange(Range);
       Ctx.emitReport(std::move(Report));
@@ -618,7 +604,7 @@ void StackAddrEscapeChecker::checkEndFunction(const ReturnStmt *RS,
 
     Out << " is still referred to by the " << *ReferrerVariable << CommonSuffix;
     auto Report =
-        std::make_unique<PathSensitiveBugReport>(*BT_stackleak, Out.str(), N);
+        std::make_unique<PathSensitiveBugReport>(StackLeak, Out.str(), N);
     if (Range.isValid())
       Report->addRange(Range);
 
@@ -626,23 +612,14 @@ void StackAddrEscapeChecker::checkEndFunction(const ReturnStmt *RS,
   }
 }
 
-void ento::registerStackAddrEscapeBase(CheckerManager &mgr) {
-  mgr.registerChecker<StackAddrEscapeChecker>();
-}
-
-bool ento::shouldRegisterStackAddrEscapeBase(const CheckerManager &mgr) {
-  return true;
-}
-
-#define REGISTER_CHECKER(name)                                                 \
-  void ento::register##name(CheckerManager &Mgr) {                             \
-    StackAddrEscapeChecker *Chk = Mgr.getChecker<StackAddrEscapeChecker>();    \
-    Chk->ChecksEnabled[StackAddrEscapeChecker::CK_##name] = true;              \
-    Chk->CheckNames[StackAddrEscapeChecker::CK_##name] =                       \
-        Mgr.getCurrentCheckerName();                                           \
+#define REGISTER_CHECKER(NAME)                                                 \
+  void ento::register##NAME##Checker(CheckerManager &Mgr) {                    \
+    Mgr.getChecker<StackAddrEscapeChecker>()->NAME.enable(Mgr);                \
   }                                                                            \
                                                                                \
-  bool ento::shouldRegister##name(const CheckerManager &mgr) { return true; }
+  bool ento::shouldRegister##NAME##Checker(const CheckerManager &) {           \
+    return true;                                                               \
+  }
 
-REGISTER_CHECKER(StackAddrEscapeChecker)
-REGISTER_CHECKER(StackAddrAsyncEscapeChecker)
+REGISTER_CHECKER(StackAddrEscape)
+REGISTER_CHECKER(StackAddrAsyncEscape)
diff --git a/clang/lib/Tooling/Core/Replacement.cpp b/clang/lib/Tooling/Core/Replacement.cpp
index 1506218..a3214de 100644
--- a/clang/lib/Tooling/Core/Replacement.cpp
+++ b/clang/lib/Tooling/Core/Replacement.cpp
@@ -581,8 +581,8 @@ llvm::Expected<std::string> applyAllReplacements(StringRef Code,
   if (Replaces.empty())
     return Code.str();
 
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   FileManager Files(FileSystemOptions(), InMemoryFileSystem);
   DiagnosticOptions DiagOpts;
   DiagnosticsEngine Diagnostics(
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
index 9bd8547..b2b61de7 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
@@ -24,7 +24,6 @@
 #include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
 #include "clang/Tooling/DependencyScanning/InProcessModuleCache.h"
 #include "clang/Tooling/DependencyScanning/ModuleDepCollector.h"
-#include "clang/Tooling/Tooling.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Error.h"
@@ -376,25 +375,23 @@ public:
 
 /// A clang tool that runs the preprocessor in a mode that's optimized for
 /// dependency scanning for the given compiler invocation.
-class DependencyScanningAction : public tooling::ToolAction {
+class DependencyScanningAction {
 public:
   DependencyScanningAction(
       DependencyScanningService &Service, StringRef WorkingDirectory,
       DependencyConsumer &Consumer, DependencyActionController &Controller,
       llvm::IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS,
-      bool DisableFree, std::optional<StringRef> ModuleName = std::nullopt)
+      std::optional<StringRef> ModuleName = std::nullopt)
       : Service(Service), WorkingDirectory(WorkingDirectory),
         Consumer(Consumer), Controller(Controller), DepFS(std::move(DepFS)),
-        DisableFree(DisableFree), ModuleName(ModuleName) {}
+        ModuleName(ModuleName) {}
 
   bool runInvocation(std::shared_ptr<CompilerInvocation> Invocation,
-                     FileManager *DriverFileMgr,
+                     IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
                      std::shared_ptr<PCHContainerOperations> PCHContainerOps,
-                     DiagnosticConsumer *DiagConsumer) override {
+                     DiagnosticConsumer *DiagConsumer) {
     // Make a deep copy of the original Clang invocation.
     CompilerInvocation OriginalInvocation(*Invocation);
-    // Restore the value of DisableFree, which may be modified by Tooling.
-    OriginalInvocation.getFrontendOpts().DisableFree = DisableFree;
     if (any(Service.getOptimizeArgs() & ScanningOptimizations::Macros))
       canonicalizeDefines(OriginalInvocation.getPreprocessorOpts());
 
@@ -419,8 +416,8 @@ public:
     // Create the compiler's actual diagnostics engine.
     sanitizeDiagOpts(ScanInstance.getDiagnosticOpts());
     assert(!DiagConsumerFinished && "attempt to reuse finished consumer");
-    ScanInstance.createDiagnostics(DriverFileMgr->getVirtualFileSystem(),
-                                   DiagConsumer, /*ShouldOwnClient=*/false);
+    ScanInstance.createDiagnostics(*FS, DiagConsumer,
+                                   /*ShouldOwnClient=*/false);
     if (!ScanInstance.hasDiagnostics())
       return false;
 
@@ -431,6 +428,7 @@ public:
       ScanInstance.getHeaderSearchOpts().BuildSessionTimestamp =
           Service.getBuildSessionTimestamp();
 
+    ScanInstance.getFrontendOpts().DisableFree = false;
     ScanInstance.getFrontendOpts().GenerateGlobalModuleIndex = false;
     ScanInstance.getFrontendOpts().UseGlobalModuleIndex = false;
     // This will prevent us compiling individual modules asynchronously since
@@ -441,9 +439,9 @@ public:
         any(Service.getOptimizeArgs() & ScanningOptimizations::VFS);
 
     // Support for virtual file system overlays.
-    auto FS = createVFSFromCompilerInvocation(
-        ScanInstance.getInvocation(), ScanInstance.getDiagnostics(),
-        DriverFileMgr->getVirtualFileSystemPtr());
+    FS = createVFSFromCompilerInvocation(ScanInstance.getInvocation(),
+                                         ScanInstance.getDiagnostics(),
+                                         std::move(FS));
 
     // Create a new FileManager to match the invocation's FileSystemOptions.
     auto *FileMgr = ScanInstance.createFileManager(FS);
@@ -554,9 +552,6 @@ public:
     if (Result)
       setLastCC1Arguments(std::move(OriginalInvocation));
 
-    // Propagate the statistics to the parent FileManager.
-    DriverFileMgr->AddStats(ScanInstance.getFileManager());
-
     return Result;
   }
 
@@ -584,7 +579,6 @@ private:
   DependencyConsumer &Consumer;
   DependencyActionController &Controller;
   llvm::IntrusiveRefCntPtr<DependencyScanningWorkerFilesystem> DepFS;
-  bool DisableFree;
   std::optional<StringRef> ModuleName;
   std::optional<CompilerInstance> ScanInstanceStorage;
   std::shared_ptr<ModuleDepCollector> MDC;
@@ -611,8 +605,8 @@ DependencyScanningWorker::DependencyScanningWorker(
 
   switch (Service.getMode()) {
   case ScanningMode::DependencyDirectivesScan:
-    DepFS =
-        new DependencyScanningWorkerFilesystem(Service.getSharedCache(), FS);
+    DepFS = llvm::makeIntrusiveRefCnt<DependencyScanningWorkerFilesystem>(
+        Service.getSharedCache(), FS);
     BaseFS = DepFS;
     break;
   case ScanningMode::CanonicalPreprocessing:
@@ -669,15 +663,14 @@ llvm::Error DependencyScanningWorker::computeDependencies(
 }
 
 static bool forEachDriverJob(
-    ArrayRef<std::string> ArgStrs, DiagnosticsEngine &Diags, FileManager &FM,
+    ArrayRef<std::string> ArgStrs, DiagnosticsEngine &Diags,
+    IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
     llvm::function_ref<bool(const driver::Command &Cmd)> Callback) {
   SmallVector<const char *, 256> Argv;
   Argv.reserve(ArgStrs.size());
   for (const std::string &Arg : ArgStrs)
     Argv.push_back(Arg.c_str());
 
-  llvm::vfs::FileSystem *FS = &FM.getVirtualFileSystem();
-
   std::unique_ptr<driver::Driver> Driver = std::make_unique<driver::Driver>(
       Argv[0], llvm::sys::getDefaultTargetTriple(), Diags,
       "clang LLVM compiler", FS);
@@ -687,7 +680,8 @@ static bool forEachDriverJob(
   bool CLMode = driver::IsClangCL(
       driver::getDriverMode(Argv[0], ArrayRef(Argv).slice(1)));
 
-  if (llvm::Error E = driver::expandResponseFiles(Argv, CLMode, Alloc, FS)) {
+  if (llvm::Error E =
+          driver::expandResponseFiles(Argv, CLMode, Alloc, FS.get())) {
     Diags.Report(diag::err_drv_expand_response_file)
         << llvm::toString(std::move(E));
     return false;
@@ -710,17 +704,25 @@ static bool forEachDriverJob(
 
 static bool createAndRunToolInvocation(
     std::vector<std::string> CommandLine, DependencyScanningAction &Action,
-    FileManager &FM,
+    IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
     std::shared_ptr<clang::PCHContainerOperations> &PCHContainerOps,
     DiagnosticsEngine &Diags, DependencyConsumer &Consumer) {
 
   // Save executable path before providing CommandLine to ToolInvocation
   std::string Executable = CommandLine[0];
-  ToolInvocation Invocation(std::move(CommandLine), &Action, &FM,
-                            PCHContainerOps);
-  Invocation.setDiagnosticConsumer(Diags.getClient());
-  Invocation.setDiagnosticOptions(&Diags.getDiagnosticOptions());
-  if (!Invocation.run())
+
+  llvm::opt::ArgStringList Argv;
+  for (const std::string &Str : ArrayRef(CommandLine).drop_front())
+    Argv.push_back(Str.c_str());
+
+  auto Invocation = std::make_shared<CompilerInvocation>();
+  if (!CompilerInvocation::CreateFromArgs(*Invocation, Argv, Diags)) {
+    // FIXME: Should we just go on like cc1_main does?
+    return false;
+  }
+
+  if (!Action.runInvocation(std::move(Invocation), std::move(FS),
+                            PCHContainerOps, Diags.getClient()))
     return false;
 
   std::vector<std::string> Args = Action.takeLastCC1Arguments();
@@ -733,37 +735,24 @@ bool DependencyScanningWorker::scanDependencies(
     DependencyConsumer &Consumer, DependencyActionController &Controller,
     DiagnosticConsumer &DC, llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS,
     std::optional<StringRef> ModuleName) {
-  auto FileMgr =
-      llvm::makeIntrusiveRefCnt<FileManager>(FileSystemOptions{}, FS);
-
   std::vector<const char *> CCommandLine(CommandLine.size(), nullptr);
   llvm::transform(CommandLine, CCommandLine.begin(),
                   [](const std::string &Str) { return Str.c_str(); });
   auto DiagOpts = CreateAndPopulateDiagOpts(CCommandLine);
   sanitizeDiagOpts(*DiagOpts);
-  IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
-      CompilerInstance::createDiagnostics(FileMgr->getVirtualFileSystem(),
-                                          *DiagOpts, &DC,
-                                          /*ShouldOwnClient=*/false);
-
-  // Although `Diagnostics` are used only for command-line parsing, the
-  // custom `DiagConsumer` might expect a `SourceManager` to be present.
-  SourceManager SrcMgr(*Diags, *FileMgr);
-  Diags->setSourceManager(&SrcMgr);
-  // DisableFree is modified by Tooling for running
-  // in-process; preserve the original value, which is
-  // always true for a driver invocation.
-  bool DisableFree = true;
+  auto Diags = CompilerInstance::createDiagnostics(*FS, *DiagOpts, &DC,
+                                                   /*ShouldOwnClient=*/false);
+
   DependencyScanningAction Action(Service, WorkingDirectory, Consumer,
-                                  Controller, DepFS, DisableFree, ModuleName);
+                                  Controller, DepFS, ModuleName);
 
   bool Success = false;
   if (CommandLine[1] == "-cc1") {
-    Success = createAndRunToolInvocation(CommandLine, Action, *FileMgr,
+    Success = createAndRunToolInvocation(CommandLine, Action, FS,
                                          PCHContainerOps, *Diags, Consumer);
   } else {
     Success = forEachDriverJob(
-        CommandLine, *Diags, *FileMgr, [&](const driver::Command &Cmd) {
+        CommandLine, *Diags, FS, [&](const driver::Command &Cmd) {
           if (StringRef(Cmd.getCreator().getName()) != "clang") {
             // Non-clang command. Just pass through to the dependency
             // consumer.
@@ -782,7 +771,7 @@ bool DependencyScanningWorker::scanDependencies(
           // system to ensure that any file system requests that
           // are made by the driver do not go through the
           // dependency scanning filesystem.
-          return createAndRunToolInvocation(std::move(Argv), Action, *FileMgr,
+          return createAndRunToolInvocation(std::move(Argv), Action, FS,
                                             PCHContainerOps, *Diags, Consumer);
         });
   }
diff --git a/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp b/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp
index 9f10ee1..2b5a293 100644
--- a/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp
+++ b/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp
@@ -285,8 +285,7 @@ HeaderIncludes::HeaderIncludes(StringRef FileName, StringRef Code,
       MaxInsertOffset(MinInsertOffset +
                       getMaxHeaderInsertionOffset(
                           FileName, Code.drop_front(MinInsertOffset), Style)),
-      MainIncludeFound(false),
-      Categories(Style, FileName) {
+      MainIncludeFound(false), Categories(Style, FileName) {
   // Add 0 for main header and INT_MAX for headers that are not in any
   // category.
   Priorities = {0, INT_MAX};
diff --git a/clang/lib/Tooling/Inclusions/Stdlib/StandardLibrary.cpp b/clang/lib/Tooling/Inclusions/Stdlib/StandardLibrary.cpp
index b88e6db..807a8d8 100644
--- a/clang/lib/Tooling/Inclusions/Stdlib/StandardLibrary.cpp
+++ b/clang/lib/Tooling/Inclusions/Stdlib/StandardLibrary.cpp
@@ -131,7 +131,7 @@ static int initialize(Lang Language) {
     Mapping->SymbolNames[SymIndex] = {
         QName.data(), NSLen, static_cast<unsigned int>(QName.size() - NSLen)};
     if (!HeaderName.empty())
-       Mapping->SymbolHeaderIDs[SymIndex].push_back(AddHeader(HeaderName));
+      Mapping->SymbolHeaderIDs[SymIndex].push_back(AddHeader(HeaderName));
 
     NSSymbolMap &NSSymbols = AddNS(QName.take_front(NSLen));
     NSSymbols.try_emplace(QName.drop_front(NSLen), SymIndex);
@@ -236,7 +236,7 @@ std::optional<Symbol> Symbol::named(llvm::StringRef Scope, llvm::StringRef Name,
   return std::nullopt;
 }
 std::optional<Header> Symbol::header() const {
-  const auto& Headers = getMappingPerLang(Language)->SymbolHeaderIDs[ID];
+  const auto &Headers = getMappingPerLang(Language)->SymbolHeaderIDs[ID];
   if (Headers.empty())
     return std::nullopt;
   return Header(Headers.front(), Language);
diff --git a/clang/lib/Tooling/Tooling.cpp b/clang/lib/Tooling/Tooling.cpp
index 5333956..ecafe26 100644
--- a/clang/lib/Tooling/Tooling.cpp
+++ b/clang/lib/Tooling/Tooling.cpp
@@ -227,10 +227,11 @@ bool runToolOnCodeWithArgs(
     const Twine &ToolName,
     std::shared_ptr<PCHContainerOperations> PCHContainerOps,
     const FileContentMappings &VirtualMappedFiles) {
-  llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayFileSystem(
-      new llvm::vfs::OverlayFileSystem(llvm::vfs::getRealFileSystem()));
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto OverlayFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(
+          llvm::vfs::getRealFileSystem());
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   OverlayFileSystem->pushOverlay(InMemoryFileSystem);
 
   SmallString<1024> CodeStorage;
@@ -403,7 +404,7 @@ bool ToolInvocation::run() {
   }
 
   const std::unique_ptr<driver::Driver> Driver(
-      newDriver(&*Diagnostics, BinaryName, &Files->getVirtualFileSystem()));
+      newDriver(&*Diagnostics, BinaryName, Files->getVirtualFileSystemPtr()));
   // The "input file not found" diagnostics from the driver are useful.
   // The driver is only aware of the VFS working directory, but some clients
   // change this at the FileManager level instead.
@@ -473,8 +474,10 @@ ClangTool::ClangTool(const CompilationDatabase &Compilations,
                      IntrusiveRefCntPtr<FileManager> Files)
     : Compilations(Compilations), SourcePaths(SourcePaths),
       PCHContainerOps(std::move(PCHContainerOps)),
-      OverlayFileSystem(new llvm::vfs::OverlayFileSystem(std::move(BaseFS))),
-      InMemoryFileSystem(new llvm::vfs::InMemoryFileSystem),
+      OverlayFileSystem(llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(
+          std::move(BaseFS))),
+      InMemoryFileSystem(
+          llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>()),
       Files(Files ? Files
                   : new FileManager(FileSystemOptions(), OverlayFileSystem)) {
   OverlayFileSystem->pushOverlay(InMemoryFileSystem);
@@ -692,10 +695,11 @@ std::unique_ptr<ASTUnit> buildASTFromCodeWithArgs(
     IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS) {
   std::vector<std::unique_ptr<ASTUnit>> ASTs;
   ASTBuilderAction Action(ASTs);
-  llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayFileSystem(
-      new llvm::vfs::OverlayFileSystem(std::move(BaseFS)));
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto OverlayFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(
+          std::move(BaseFS));
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   OverlayFileSystem->pushOverlay(InMemoryFileSystem);
   llvm::IntrusiveRefCntPtr<FileManager> Files(
       new FileManager(FileSystemOptions(), OverlayFileSystem));
diff --git a/clang/test/APINotes/Inputs/Headers/SwiftImportAs.apinotes b/clang/test/APINotes/Inputs/Headers/SwiftImportAs.apinotes
index 66fc46e..c096822 100644
--- a/clang/test/APINotes/Inputs/Headers/SwiftImportAs.apinotes
+++ b/clang/test/APINotes/Inputs/Headers/SwiftImportAs.apinotes
@@ -19,6 +19,10 @@ Tags:
   SwiftReleaseOp: release
   SwiftRetainOp: retain
   SwiftDefaultOwnership: unretained
+- Name: OpaqueRefCountedType
+  SwiftImportAs: reference
+  SwiftReleaseOp: ORCRelease
+  SwiftRetainOp: ORCRetain
 - Name: NonCopyableType
   SwiftCopyable: false
   SwiftConformsTo: MySwiftModule.MySwiftNonCopyableProtocol
diff --git a/clang/test/APINotes/Inputs/Headers/SwiftImportAs.h b/clang/test/APINotes/Inputs/Headers/SwiftImportAs.h
index 20b8f04..5f817ac 100644
--- a/clang/test/APINotes/Inputs/Headers/SwiftImportAs.h
+++ b/clang/test/APINotes/Inputs/Headers/SwiftImportAs.h
@@ -23,3 +23,9 @@ struct EscapableType { int value; };
 struct RefCountedTypeWithDefaultConvention {};
 inline void retain(RefCountedType *x) {}
 inline void release(RefCountedType *x) {}
+
+struct OpaqueRefCountedType;
+struct OpaqueRefCountedType; // redeclaration
+
+inline void ORCRetain(struct OpaqueRefCountedType *x);
+inline void ORCRelease(struct OpaqueRefCountedType *x);
diff --git a/clang/test/APINotes/swift-import-as.cpp b/clang/test/APINotes/swift-import-as.cpp
index 929f924..179170f 100644
--- a/clang/test/APINotes/swift-import-as.cpp
+++ b/clang/test/APINotes/swift-import-as.cpp
@@ -3,6 +3,7 @@
 // RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fdisable-module-hash -fapinotes-modules -I %S/Inputs/Headers %s -x c++ -ast-dump -ast-dump-filter ImmortalRefType | FileCheck -check-prefix=CHECK-IMMORTAL %s
 // RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fdisable-module-hash -fapinotes-modules -I %S/Inputs/Headers %s -x c++ -ast-dump -ast-dump-filter RefCountedType | FileCheck -check-prefix=CHECK-REF-COUNTED %s
 // RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fdisable-module-hash -fapinotes-modules -I %S/Inputs/Headers %s -x c++ -ast-dump -ast-dump-filter RefCountedTypeWithDefaultConvention | FileCheck -check-prefix=CHECK-REF-COUNTED-DEFAULT %s
+// RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fdisable-module-hash -fapinotes-modules -I %S/Inputs/Headers %s -x c++ -ast-dump -ast-dump-filter OpaqueRefCountedType | FileCheck -check-prefix=CHECK-OPAQUE-REF-COUNTED %s
 // RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fdisable-module-hash -fapinotes-modules -I %S/Inputs/Headers %s -x c++ -ast-dump -ast-dump-filter NonCopyableType | FileCheck -check-prefix=CHECK-NON-COPYABLE %s
 // RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fdisable-module-hash -fapinotes-modules -I %S/Inputs/Headers %s -x c++ -ast-dump -ast-dump-filter CopyableType | FileCheck -check-prefix=CHECK-COPYABLE %s
 // RUN: %clang_cc1 -fmodules -fblocks -fimplicit-module-maps -fmodules-cache-path=%t/ModulesCache -fdisable-module-hash -fapinotes-modules -I %S/Inputs/Headers %s -x c++ -ast-dump -ast-dump-filter NonEscapableType | FileCheck -check-prefix=CHECK-NON-ESCAPABLE %s
@@ -34,6 +35,20 @@
 // CHECK-REF-COUNTED-DEFAULT: SwiftAttrAttr {{.+}} <<invalid sloc>> "release:release"
 // CHECK-REF-COUNTED-DEFAULT: SwiftAttrAttr {{.+}} <<invalid sloc>> "returned_as_unretained_by_default"
 
+// CHECK-OPAQUE-REF-COUNTED: Dumping OpaqueRefCountedType:
+// CHECK-OPAQUE-REF-COUNTED-NEXT: CXXRecordDecl {{.+}} imported in SwiftImportAs{{.*}}struct OpaqueRefCountedType
+// CHECK-OPAQUE-REF-COUNTED: SwiftAttrAttr {{.+}} <<invalid sloc>> "import_reference"
+// CHECK-OPAQUE-REF-COUNTED: SwiftAttrAttr {{.+}} <<invalid sloc>> "retain:ORCRetain"
+// CHECK-OPAQUE-REF-COUNTED: SwiftAttrAttr {{.+}} <<invalid sloc>> "release:ORCRelease"
+// CHECK-OPAQUE-REF-COUNTED-NOT: SwiftAttrAttr {{.+}} <<invalid sloc>> "release:ORCRelease"
+
+// CHECK-OPAQUE-REF-COUNTED: Dumping OpaqueRefCountedType:
+// CHECK-OPAQUE-REF-COUNTED-NEXT: CXXRecordDecl {{.+}} imported in SwiftImportAs{{.*}}struct OpaqueRefCountedType
+// CHECK-OPAQUE-REF-COUNTED: SwiftAttrAttr {{.+}} <<invalid sloc>> "import_reference"
+// CHECK-OPAQUE-REF-COUNTED: SwiftAttrAttr {{.+}} <<invalid sloc>> "retain:ORCRetain"
+// CHECK-OPAQUE-REF-COUNTED: SwiftAttrAttr {{.+}} <<invalid sloc>> "release:ORCRelease"
+
+// CHECK-OPAQUE-REF-COUNTED-NOT: SwiftAttrAttr {{.+}} <<invalid sloc>> "release:
 // CHECK-NON-COPYABLE: Dumping NonCopyableType:
 // CHECK-NON-COPYABLE-NEXT: CXXRecordDecl {{.+}} imported in SwiftImportAs {{.+}} struct NonCopyableType
 // CHECK-NON-COPYABLE: SwiftAttrAttr {{.+}} <<invalid sloc>> "conforms_to:MySwiftModule.MySwiftNonCopyableProtocol"
diff --git a/clang/test/AST/ByteCode/builtin-bit-cast.cpp b/clang/test/AST/ByteCode/builtin-bit-cast.cpp
index 3c5e89d7..bc356b0 100644
--- a/clang/test/AST/ByteCode/builtin-bit-cast.cpp
+++ b/clang/test/AST/ByteCode/builtin-bit-cast.cpp
@@ -22,6 +22,10 @@ typedef __INTPTR_TYPE__ intptr_t;
 static_assert(sizeof(int) == 4);
 static_assert(sizeof(long long) == 8);
 
+
+constexpr bool test_bad_bool = __builtin_bit_cast(bool, (char)0xff); // both-error {{must be initialized by a constant expression}} \
+                                                                     // both-note {{value 255 cannot be represented in type 'bool'}}
+
 template <class To, class From>
 constexpr To bit_cast(const From &from) {
   static_assert(sizeof(To) == sizeof(From));
diff --git a/clang/test/AST/ByteCode/codegen.cpp b/clang/test/AST/ByteCode/codegen.cpp
index 6f9e75e..1bc756c 100644
--- a/clang/test/AST/ByteCode/codegen.cpp
+++ b/clang/test/AST/ByteCode/codegen.cpp
@@ -23,6 +23,10 @@ S s;
 // CHECK: @sp = constant ptr getelementptr (i8, ptr @s, i64 16), align 8
 float &sp = s.c[3];
 
+namespace NearlyZeroInit {
+  // CHECK: @_ZN14NearlyZeroInit1bE ={{.*}} global{{.*}} { i32, <{ i32, [2147483647 x i32] }> } { i32 1, <{ i32, [2147483647 x i32] }> <{ i32 2, [2147483647 x i32] zeroinitializer }> }{{.*}}
+  struct B { int n; int arr[1024 * 1024 * 1024 * 2u]; } b = {1, {2}};
+}
 
 namespace BaseClassOffsets {
   struct A { int a; };
diff --git a/clang/test/AST/ByteCode/functions.cpp b/clang/test/AST/ByteCode/functions.cpp
index b5e6f5b..363b6a5 100644
--- a/clang/test/AST/ByteCode/functions.cpp
+++ b/clang/test/AST/ByteCode/functions.cpp
@@ -5,6 +5,8 @@
 // RUN: %clang_cc1 -pedantic -std=c++14 -verify=ref,both %s
 // RUN: %clang_cc1 -pedantic -std=c++20 -verify=ref,both %s
 
+#define fold(x) (__builtin_constant_p(0) ? (x) : (x))
+
 constexpr void doNothing() {}
 constexpr int gimme5() {
   doNothing();
@@ -654,14 +656,26 @@ namespace {
 }
 
 namespace FunctionCast {
-  // When folding, we allow functions to be cast to different types. Such
-  // cast functions cannot be called, even if they're constexpr.
+  // When folding, we allow functions to be cast to different types. We only
+  // allow calls if the dynamic type of the pointer matches the type of the
+  // call.
   constexpr int f() { return 1; }
+  constexpr void* f2() { return nullptr; }
+  constexpr int f3(int a) { return a; }
   typedef double (*DoubleFn)();
   typedef int (*IntFn)();
-  int a[(int)DoubleFn(f)()]; // both-error {{variable length array}} \
-                             // both-warning {{are a Clang extension}}
-  int b[(int)IntFn(f)()];    // ok
+  typedef int* (*IntPtrFn)();
+  constexpr int test1 = (int)DoubleFn(f)(); // both-error {{constant expression}} both-note {{reinterpret_cast}}
+  // FIXME: We should print a note explaining the error.
+  constexpr int test2 = (int)fold(DoubleFn(f))(); // both-error {{constant expression}}
+  constexpr int test3 = (int)IntFn(f)();    // no-op cast
+  constexpr int test4 = fold(IntFn(DoubleFn(f)))();
+  constexpr int test5 = IntFn(fold(DoubleFn(f)))(); // both-error {{constant expression}} \
+                                                    // both-note {{cast that performs the conversions of a reinterpret_cast is not allowed in a constant expression}}
+  // FIXME: Interpreter is less strict here.
+  constexpr int test6 = fold(IntPtrFn(f2))() == nullptr; // ref-error {{constant expression}}
+  // FIXME: The following crashes interpreter
+  // constexpr int test6 = fold(IntFn(f3)());
 }
 
 #if __cplusplus >= 202002L
diff --git a/clang/test/AST/ByteCode/intap.cpp b/clang/test/AST/ByteCode/intap.cpp
index 6888387..05ab319b 100644
--- a/clang/test/AST/ByteCode/intap.cpp
+++ b/clang/test/AST/ByteCode/intap.cpp
@@ -292,7 +292,19 @@ constexpr int shifts() { // both-error {{never produces a constant expression}}
   (void)(2 << b); // ref-warning {{shift count is negative}}
   return 1;
 }
-#endif
 
+namespace UnderlyingInt128 {
+  enum F  {
+    a = (__int128)-1
+  };
+
+  constexpr int foo() { // both-error {{never produces a constant expression}}
+    F f = (F)(__int128)10; // both-note 2{{integer value 10 is outside the valid range of values [-1, 0] for the enumeration type 'F'}}
+    return (int)f;
+  }
+  static_assert(foo() == 0, ""); // both-error {{not an integral constant expression}} \
+                                 // both-note {{in call to}}
+}
+#endif
 
 #endif
diff --git a/clang/test/AST/ByteCode/unions.cpp b/clang/test/AST/ByteCode/unions.cpp
index 7cfd0d6..139e318 100644
--- a/clang/test/AST/ByteCode/unions.cpp
+++ b/clang/test/AST/ByteCode/unions.cpp
@@ -79,10 +79,9 @@ namespace DefaultInit {
 
   constexpr U1 u1; /// OK.
 
-  constexpr int foo() { // expected-error {{never produces a constant expression}}
+  constexpr int foo() {
     U1 u;
-    return u.a; // both-note {{read of member 'a' of union with active member 'b'}} \
-                // expected-note {{read of member 'a' of union with active member 'b'}}
+    return u.a; // both-note {{read of member 'a' of union with active member 'b'}}
   }
   static_assert(foo() == 42); // both-error {{not an integral constant expression}} \
                               // both-note {{in call to}}
@@ -861,6 +860,73 @@ namespace CopyCtorMutable {
                        // both-note {{in call}}
 }
 
+
+namespace NonTrivialCtor {
+  struct A { int x = 1; constexpr int f() { return 1; } };
+  struct B : A { int y = 1; constexpr int g() { return 2; } };
+  struct C {
+    int x;
+    constexpr virtual int f() = 0;
+  };
+  struct D : C {
+    int y;
+    constexpr virtual int f() override { return 3; }
+  };
+
+  union U {
+    int n;
+    B b;
+    D d;
+  };
+
+  consteval int test(int which) {
+    if (which == 0) {}
+
+    U u{.n = 5};
+    assert_active(u);
+    assert_active(u.n);
+    assert_inactive(u.b);
+
+    switch (which) {
+    case 0:
+      u.b.x = 10; // both-note {{assignment to member 'b' of union with active member 'n'}}
+      return u.b.f();
+    case 1:
+      u.b.y = 10; // both-note {{assignment to member 'b' of union with active member 'n'}}
+      return u.b.g();
+    case 2:
+      u.d.x = 10; // both-note {{assignment to member 'd' of union with active member 'n'}}
+     return u.d.f();
+    case 3:
+    u.d.y = 10; // both-note {{assignment to member 'd' of union with active member 'n'}}
+      return u.d.f();
+    }
+
+    return 1;
+  }
+  static_assert(test(0)); // both-error {{not an integral constant expression}} \
+                          // both-note {{in call}}
+  static_assert(test(1)); // both-error {{not an integral constant expression}} \
+                          // both-note {{in call}}
+  static_assert(test(2)); // both-error {{not an integral constant expression}} \
+                          // both-note {{in call}}
+  static_assert(test(3)); // both-error {{not an integral constant expression}} \
+                          // both-note {{in call}}
+
+}
+
+namespace PrimitiveFieldInitActivates {
+  /// The initializer of a needs the field to be active _before_ it's visited.
+  template<int> struct X {};
+  union V {
+    int a, b;
+    constexpr V(X<0>) : a(a = 1) {} // ok
+    constexpr V(X<2>) : a() { b = 1; } // ok
+  };
+  constinit V v0 = X<0>();
+  constinit V v2 = X<2>();
+}
+
 #endif
 
 namespace AddressComparison {
diff --git a/clang/test/AST/ast-dump-APValue-lvalue.cpp b/clang/test/AST/ast-dump-APValue-lvalue.cpp
index 51d22a5..f4cf2f5 100644
--- a/clang/test/AST/ast-dump-APValue-lvalue.cpp
+++ b/clang/test/AST/ast-dump-APValue-lvalue.cpp
@@ -67,6 +67,10 @@ void Test(int (&arr)[10]) {
   // CHECK-NEXT:  |   |-value: LValue Base=TypeInfoLValue typeid(int), Null=0, Offset=0, HasPath=1, PathLength=0, Path=()
 
   constexpr int(MP::*pmi) = (int MP::*)&P::x;
-  // CHECK:    `-VarDecl {{.*}} <col:{{.*}}, col:{{.*}}> col:{{.*}} pmi 'int (MP::*const)' constexpr cinit
-  // CHECK-NEXT:      |-value: MemberPointer MP::x
+  // CHECK:  | `-VarDecl {{.*}} <col:{{.*}}, col:{{.*}}> col:{{.*}} pmi 'int (MP::*const)' constexpr cinit
+  // CHECK-NEXT:  |   |-value: MemberPointer MP::x
+
+  constexpr int(MP::*pmn) = (int MP::*)nullptr;
+  // CHECK:    `-VarDecl {{.*}} <col:{{.*}}, col:{{.*}}> col:{{.*}} pmn 'int (MP::*const)' constexpr cinit
+  // CHECK-NEXT:      |-value: MemberPointer null
 }
diff --git a/clang/test/ASTMerge/enum/Inputs/enum3.c b/clang/test/ASTMerge/enum/Inputs/enum3.c
new file mode 100644
index 0000000..32ad536
--- /dev/null
+++ b/clang/test/ASTMerge/enum/Inputs/enum3.c
@@ -0,0 +1,14 @@
+// [C23] missing underlying types
+enum E1 : int {
+  E1Enumerator1
+};
+
+enum E2 {
+  E2Enumerator1
+};
+
+// [C23] Incompatible underlying types
+enum E3 : long {
+  E3Enumerator1
+};
+
diff --git a/clang/test/ASTMerge/enum/Inputs/enum4.c b/clang/test/ASTMerge/enum/Inputs/enum4.c
new file mode 100644
index 0000000..15f5c603
--- /dev/null
+++ b/clang/test/ASTMerge/enum/Inputs/enum4.c
@@ -0,0 +1,14 @@
+// [C23] missing underlying types
+enum E1 {
+  E1Enumerator1
+};
+
+enum E2 : int {
+  E2Enumerator1
+};
+
+// [C23] Incompatible underlying types
+enum E3 : short {
+  E3Enumerator1
+};
+
diff --git a/clang/test/ASTMerge/enum/test2.c b/clang/test/ASTMerge/enum/test2.c
new file mode 100644
index 0000000..bdd8b13
--- /dev/null
+++ b/clang/test/ASTMerge/enum/test2.c
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -std=c23 -emit-pch -o %t.1.ast %S/Inputs/enum3.c
+// RUN: %clang_cc1 -std=c23 -emit-pch -o %t.2.ast %S/Inputs/enum4.c
+// RUN: %clang_cc1 -std=c23 -ast-merge %t.1.ast -ast-merge %t.2.ast -fsyntax-only %s 2>&1 | FileCheck %s
+
+// CHECK: enum3.c:2:6: warning: type 'enum E1' has incompatible definitions in different translation units
+// CHECK: enum4.c:2:6: note: enumeration 'E1' missing fixed underlying type here
+// CHECK: enum3.c:2:6: note: enumeration 'E1' has fixed underlying type here
+// CHECK: enum3.c:6:6: warning: type 'enum E2' has incompatible definitions in different translation units
+// CHECK: enum4.c:6:6: note: enumeration 'E2' has fixed underlying type here
+// CHECK: enum3.c:6:6: note: enumeration 'E2' missing fixed underlying type here
+// CHECK: enum3.c:11:6: warning: type 'enum E3' has incompatible definitions in different translation units
+// CHECK: enum3.c:11:6: note: enumeration 'E3' declared with incompatible fixed underlying types ('long' vs. 'short')
+// CHECK: 3 warnings generated
+
diff --git a/clang/test/Analysis/Inputs/expected-plists/NewDelete-path-notes.cpp.plist b/clang/test/Analysis/Inputs/expected-plists/NewDelete-path-notes.cpp.plist
index 957988b..e4adeca 100644
--- a/clang/test/Analysis/Inputs/expected-plists/NewDelete-path-notes.cpp.plist
+++ b/clang/test/Analysis/Inputs/expected-plists/NewDelete-path-notes.cpp.plist
@@ -33,9 +33,9 @@
      </array>
      <key>depth</key><integer>0</integer>
      <key>extended_message</key>
-     <string>Attempt to free released memory</string>
+     <string>Attempt to release already released memory</string>
      <key>message</key>
-     <string>Attempt to free released memory</string>
+     <string>Attempt to release already released memory</string>
     </dict>
     <dict>
      <key>kind</key><string>control</string>
@@ -232,7 +232,7 @@
       </array>
     </dict>
    </array>
-   <key>description</key><string>Attempt to free released memory</string>
+   <key>description</key><string>Attempt to release already released memory</string>
    <key>category</key><string>Memory error</string>
    <key>type</key><string>Double free</string>
    <key>check_name</key><string>cplusplus.NewDelete</string>
@@ -456,12 +456,12 @@
      </array>
      <key>depth</key><integer>0</integer>
      <key>extended_message</key>
-     <string>Attempt to free released memory</string>
+     <string>Attempt to release already released memory</string>
      <key>message</key>
-     <string>Attempt to free released memory</string>
+     <string>Attempt to release already released memory</string>
     </dict>
    </array>
-   <key>description</key><string>Attempt to free released memory</string>
+   <key>description</key><string>Attempt to release already released memory</string>
    <key>category</key><string>Memory error</string>
    <key>type</key><string>Double free</string>
    <key>check_name</key><string>cplusplus.NewDelete</string>
diff --git a/clang/test/Analysis/Inputs/expected-plists/malloc-plist.c.plist b/clang/test/Analysis/Inputs/expected-plists/malloc-plist.c.plist
index 8ae5850..3fdf5da4 100644
--- a/clang/test/Analysis/Inputs/expected-plists/malloc-plist.c.plist
+++ b/clang/test/Analysis/Inputs/expected-plists/malloc-plist.c.plist
@@ -1725,12 +1725,12 @@
      </array>
      <key>depth</key><integer>0</integer>
      <key>extended_message</key>
-     <string>Use of memory after it is freed</string>
+     <string>Use of memory after it is released</string>
      <key>message</key>
-     <string>Use of memory after it is freed</string>
+     <string>Use of memory after it is released</string>
     </dict>
    </array>
-   <key>description</key><string>Use of memory after it is freed</string>
+   <key>description</key><string>Use of memory after it is released</string>
    <key>category</key><string>Memory error</string>
    <key>type</key><string>Use-after-free</string>
    <key>check_name</key><string>unix.Malloc</string>
@@ -2985,12 +2985,12 @@
      </array>
      <key>depth</key><integer>0</integer>
      <key>extended_message</key>
-     <string>Use of memory after it is freed</string>
+     <string>Use of memory after it is released</string>
      <key>message</key>
-     <string>Use of memory after it is freed</string>
+     <string>Use of memory after it is released</string>
     </dict>
    </array>
-   <key>description</key><string>Use of memory after it is freed</string>
+   <key>description</key><string>Use of memory after it is released</string>
    <key>category</key><string>Memory error</string>
    <key>type</key><string>Use-after-free</string>
    <key>check_name</key><string>unix.Malloc</string>
diff --git a/clang/test/Analysis/Inputs/overloaded-delete-in-header.h b/clang/test/Analysis/Inputs/overloaded-delete-in-header.h
index 8243961..96aa4af 100644
--- a/clang/test/Analysis/Inputs/overloaded-delete-in-header.h
+++ b/clang/test/Analysis/Inputs/overloaded-delete-in-header.h
@@ -12,7 +12,7 @@ void DeleteInHeader::operator delete(void *ptr) {
 
   ::operator delete(ptr);
 
-  self->data = 2; // expected-warning {{Use of memory after it is freed [cplusplus.NewDelete]}}
+  self->data = 2; // expected-warning {{Use of memory after it is released [cplusplus.NewDelete]}}
 }
 
 #endif // OVERLOADED_DELETE_IN_SYSTEM_HEADER
diff --git a/clang/test/Analysis/Malloc+MismatchedDeallocator+NewDelete.cpp b/clang/test/Analysis/Malloc+MismatchedDeallocator+NewDelete.cpp
index 6c20b4b..b9eb85d 100644
--- a/clang/test/Analysis/Malloc+MismatchedDeallocator+NewDelete.cpp
+++ b/clang/test/Analysis/Malloc+MismatchedDeallocator+NewDelete.cpp
@@ -9,7 +9,7 @@
 void testMallocDoubleFree() {
   int *p = (int *)malloc(sizeof(int));
   free(p);
-  free(p); // expected-warning{{Attempt to free released memory}}
+  free(p); // expected-warning{{Attempt to release already released memory}}
 }
 
 void testMallocLeak() {
@@ -19,7 +19,7 @@ void testMallocLeak() {
 void testMallocUseAfterFree() {
   int *p = (int *)malloc(sizeof(int));
   free(p);
-  int j = *p; // expected-warning{{Use of memory after it is freed}}
+  int j = *p; // expected-warning{{Use of memory after it is released}}
 }
 
 void testMallocBadFree() {
@@ -46,7 +46,7 @@ void testMismatchedDeallocator() {
 void testNewDoubleFree() {
   int *p = new int;
   delete p;
-  delete p; // expected-warning{{Attempt to free released memory}}
+  delete p; // expected-warning{{Attempt to release already released memory}}
 }
 
 void testNewLeak() {
@@ -59,7 +59,7 @@ void testNewLeak() {
 void testNewUseAfterFree() {
   int *p = (int *)operator new(0);
   delete p;
-  int j = *p; // expected-warning{{Use of memory after it is freed}}
+  int j = *p; // expected-warning{{Use of memory after it is released}}
 }
 
 void testNewBadFree() {
@@ -95,7 +95,7 @@ void testShouldReportDoubleFreeNotMismatched() {
   int *p = (int*)malloc(sizeof(int)*4);
   globalPtr = p;
   free(p);
-  delete globalPtr; // expected-warning {{Attempt to free released memory}}
+  delete globalPtr; // expected-warning {{Attempt to release already released memory}}
 }
 int *allocIntArray(unsigned c) {
   return new int[c];
diff --git a/clang/test/Analysis/NewDelete-checker-test.cpp b/clang/test/Analysis/NewDelete-checker-test.cpp
index 7c3e142..c417b9c 100644
--- a/clang/test/Analysis/NewDelete-checker-test.cpp
+++ b/clang/test/Analysis/NewDelete-checker-test.cpp
@@ -155,52 +155,52 @@ void g(SomeClass &c, ...);
 void testUseFirstArgAfterDelete() {
   int *p = new int;
   delete p;
-  f(p); // newdelete-warning{{Use of memory after it is freed}}
+  f(p); // newdelete-warning{{Use of memory after it is released}}
 }
 
 void testUseMiddleArgAfterDelete(int *p) {
   delete p;
-  f(0, p); // newdelete-warning{{Use of memory after it is freed}}
+  f(0, p); // newdelete-warning{{Use of memory after it is released}}
 }
 
 void testUseLastArgAfterDelete(int *p) {
   delete p;
-  f(0, 0, p); // newdelete-warning{{Use of memory after it is freed}}
+  f(0, 0, p); // newdelete-warning{{Use of memory after it is released}}
 }
 
 void testUseSeveralArgsAfterDelete(int *p) {
   delete p;
-  f(p, p, p); // newdelete-warning{{Use of memory after it is freed}}
+  f(p, p, p); // newdelete-warning{{Use of memory after it is released}}
 }
 
 void testUseRefArgAfterDelete(SomeClass &c) {
   delete &c;
-  g(c); // newdelete-warning{{Use of memory after it is freed}}
+  g(c); // newdelete-warning{{Use of memory after it is released}}
 }
 
 void testVariadicArgAfterDelete() {
   SomeClass c;
   int *p = new int;
   delete p;
-  g(c, 0, p); // newdelete-warning{{Use of memory after it is freed}}
+  g(c, 0, p); // newdelete-warning{{Use of memory after it is released}}
 }
 
 void testUseMethodArgAfterDelete(int *p) {
   SomeClass *c = new SomeClass;
   delete p;
-  c->f(p); // newdelete-warning{{Use of memory after it is freed}}
+  c->f(p); // newdelete-warning{{Use of memory after it is released}}
 }
 
 void testUseThisAfterDelete() {
   SomeClass *c = new SomeClass;
   delete c;
-  c->f(0); // newdelete-warning{{Use of memory after it is freed}}
+  c->f(0); // newdelete-warning{{Use of memory after it is released}}
 }
 
 void testDoubleDelete() {
   int *p = new int;
   delete p;
-  delete p; // newdelete-warning{{Attempt to free released memory}}
+  delete p; // newdelete-warning{{Attempt to release already released memory}}
 }
 
 void testExprDeleteArg() {
@@ -412,7 +412,7 @@ public:
 void testDoubleDeleteClassInstance() {
   DerefClass *foo = new DerefClass();
   delete foo;
-  delete foo; // newdelete-warning {{Attempt to free released memory}}
+  delete foo; // newdelete-warning {{Attempt to release already released memory}}
 }
 
 class EmptyClass{
@@ -424,7 +424,7 @@ public:
 void testDoubleDeleteEmptyClass() {
   EmptyClass *foo = new EmptyClass();
   delete foo;
-  delete foo; // newdelete-warning {{Attempt to free released memory}}
+  delete foo; // newdelete-warning {{Attempt to release already released memory}}
 }
 
 struct Base {
diff --git a/clang/test/Analysis/NewDelete-intersections.mm b/clang/test/Analysis/NewDelete-intersections.mm
index e897f48..eddfb32 100644
--- a/clang/test/Analysis/NewDelete-intersections.mm
+++ b/clang/test/Analysis/NewDelete-intersections.mm
@@ -78,11 +78,11 @@ void testObjcFreeNewed() {
 void testFreeAfterDelete() {
   int *p = new int;
   delete p;
-  free(p); // newdelete-warning{{Use of memory after it is freed}}
+  free(p); // newdelete-warning{{Use of memory after it is released}}
 }
 
 void testStandardPlacementNewAfterDelete() {
   int *p = new int;
   delete p;
-  p = new (p) int; // newdelete-warning{{Use of memory after it is freed}}
+  p = new (p) int; // newdelete-warning{{Use of memory after it is released}}
 }
diff --git a/clang/test/Analysis/NewDelete-path-notes.cpp b/clang/test/Analysis/NewDelete-path-notes.cpp
index 2837fd1..852632f 100644
--- a/clang/test/Analysis/NewDelete-path-notes.cpp
+++ b/clang/test/Analysis/NewDelete-path-notes.cpp
@@ -16,8 +16,8 @@ void test() {
     delete p;
     // expected-note@-1 {{Memory is released}}
 
-  delete p; // expected-warning {{Attempt to free released memory}}
-  // expected-note@-1 {{Attempt to free released memory}}
+  delete p; // expected-warning {{Attempt to release already released memory}}
+  // expected-note@-1 {{Attempt to release already released memory}}
 }
 
 struct Odd {
@@ -29,7 +29,7 @@ struct Odd {
 void test(Odd *odd) {
 	odd->kill(); // expected-note{{Calling 'Odd::kill'}}
                // expected-note@-1 {{Returning; memory was released}}
-	delete odd; // expected-warning {{Attempt to free released memory}}
-              // expected-note@-1 {{Attempt to free released memory}}
+	delete odd; // expected-warning {{Attempt to release already released memory}}
+              // expected-note@-1 {{Attempt to release already released memory}}
 }
 
diff --git a/clang/test/Analysis/analyzer-enabled-checkers.c b/clang/test/Analysis/analyzer-enabled-checkers.c
index 78ee00de..32afcf3 100644
--- a/clang/test/Analysis/analyzer-enabled-checkers.c
+++ b/clang/test/Analysis/analyzer-enabled-checkers.c
@@ -14,14 +14,12 @@
 // CHECK-NEXT: core.BitwiseShift
 // CHECK-NEXT: core.CallAndMessageModeling
 // CHECK-NEXT: core.CallAndMessage
-// CHECK-NEXT: core.DereferenceModeling
 // CHECK-NEXT: core.DivideZero
 // CHECK-NEXT: core.DynamicTypePropagation
 // CHECK-NEXT: core.FixedAddressDereference
 // CHECK-NEXT: core.NonNullParamChecker
 // CHECK-NEXT: core.NonnilStringConstants
 // CHECK-NEXT: core.NullDereference
-// CHECK-NEXT: core.StackAddrEscapeBase
 // CHECK-NEXT: core.StackAddressEscape
 // CHECK-NEXT: core.UndefinedBinaryOperatorResult
 // CHECK-NEXT: core.VLASize
diff --git a/clang/test/Analysis/diagnostics/dtors.cpp b/clang/test/Analysis/diagnostics/dtors.cpp
index 6a8349d..61e71fd 100644
--- a/clang/test/Analysis/diagnostics/dtors.cpp
+++ b/clang/test/Analysis/diagnostics/dtors.cpp
@@ -19,8 +19,8 @@ struct smart_ptr {
     return (x || 0) ? nullptr : s; // expected-note{{Field 'x' is 0}}
                                    // expected-note@-1{{Left side of '||' is false}}
                                    // expected-note@-2{{'?' condition is false}}
-                                   // expected-warning@-3{{Use of memory after it is freed}}
-                                   // expected-note@-4{{Use of memory after it is freed}}
+                                   // expected-warning@-3{{Use of memory after it is released}}
+                                   // expected-note@-4{{Use of memory after it is released}}
   }
 };
 
diff --git a/clang/test/Analysis/dtor.cpp b/clang/test/Analysis/dtor.cpp
index c17c886..9e00e93 100644
--- a/clang/test/Analysis/dtor.cpp
+++ b/clang/test/Analysis/dtor.cpp
@@ -35,7 +35,7 @@ void testSmartPointer() {
     SmartPointer Deleter(mem);
     // destructor called here
   }
-  *mem = 0; // expected-warning{{Use of memory after it is freed}}
+  *mem = 0; // expected-warning{{Use of memory after it is released}}
 }
 
 
@@ -48,7 +48,7 @@ void testSmartPointer2() {
     doSomething();
     // destructor called here
   }
-  *mem = 0; // expected-warning{{Use of memory after it is freed}}
+  *mem = 0; // expected-warning{{Use of memory after it is released}}
 }
 
 
@@ -65,7 +65,7 @@ void testSubclassSmartPointer() {
     doSomething();
     // destructor called here
   }
-  *mem = 0; // expected-warning{{Use of memory after it is freed}}
+  *mem = 0; // expected-warning{{Use of memory after it is released}}
 }
 
 
@@ -82,7 +82,7 @@ void testMultipleInheritance1() {
     doSomething();
     // destructor called here
   }
-  *mem = 0; // expected-warning{{Use of memory after it is freed}}
+  *mem = 0; // expected-warning{{Use of memory after it is released}}
 }
 
 void testMultipleInheritance2() {
@@ -93,7 +93,7 @@ void testMultipleInheritance2() {
     doSomething();
     // destructor called here
   }
-  *mem = 0; // expected-warning{{Use of memory after it is freed}}
+  *mem = 0; // expected-warning{{Use of memory after it is released}}
 }
 
 void testMultipleInheritance3() {
@@ -103,7 +103,7 @@ void testMultipleInheritance3() {
     // Remove dead bindings...
     doSomething();
     // destructor called here
-    // expected-warning@28 {{Attempt to free released memory}}
+    // expected-warning@28 {{Attempt to release already released memory}}
   }
 }
 
@@ -122,7 +122,7 @@ void testSmartPointerMember() {
     doSomething();
     // destructor called here
   }
-  *mem = 0; // expected-warning{{Use of memory after it is freed}}
+  *mem = 0; // expected-warning{{Use of memory after it is release}}
 }
 
 
@@ -524,7 +524,7 @@ struct NonTrivial {
     return *this;
   }
   ~NonTrivial() {
-    delete[] p; // expected-warning {{free released memory}}
+    delete[] p; // expected-warning {{release already released memory}}
   }
 };
 
@@ -593,5 +593,5 @@ void overrideLeak() {
 void overrideDoubleDelete() {
   auto *a = new CustomOperators();
   delete a;
-  delete a; // expected-warning@577 {{Attempt to free released memory}}
+  delete a; // expected-warning@577 {{Attempt to release already released memory}}
 }
diff --git a/clang/test/Analysis/getline-alloc.c b/clang/test/Analysis/getline-alloc.c
index 74a40a1..43d0a2d 100644
--- a/clang/test/Analysis/getline-alloc.c
+++ b/clang/test/Analysis/getline-alloc.c
@@ -29,7 +29,7 @@ void test_getline_malloc_buffer() {
 
   ssize_t r = getdelim(&buffer, &n, '\r', F1);
   // ptr may be dangling
-  free(ptr);    // expected-warning {{Attempt to free released memory}}
+  free(ptr);    // expected-warning {{Attempt to release already released memory}}
   free(buffer); // ok
   fclose(F1);
 }
diff --git a/clang/test/Analysis/gmalloc.c b/clang/test/Analysis/gmalloc.c
index dae28eb..a94e5db 100644
--- a/clang/test/Analysis/gmalloc.c
+++ b/clang/test/Analysis/gmalloc.c
@@ -41,7 +41,7 @@ void f1(void) {
 
   g_free(g1);
   g_free(g2);
-  g_free(g2); // expected-warning{{Attempt to free released memory}}
+  g_free(g2); // expected-warning{{Attempt to release already released memory}}
 }
 
 void f2(void) {
@@ -61,7 +61,7 @@ void f2(void) {
   g_free(g1);
   g_free(g2);
   g_free(g3);
-  g3 = g_memdup(g3, n_bytes); // expected-warning{{Use of memory after it is freed}}
+  g3 = g_memdup(g3, n_bytes); // expected-warning{{Use of memory after it is released}}
 }
 
 void f3(void) {
diff --git a/clang/test/Analysis/malloc-annotations.c b/clang/test/Analysis/malloc-annotations.c
index 3a8b1b2..68ac71d 100644
--- a/clang/test/Analysis/malloc-annotations.c
+++ b/clang/test/Analysis/malloc-annotations.c
@@ -45,13 +45,13 @@ void f1(void) {
 void f2(void) {
   int *p = malloc(12);
   free(p);
-  free(p); // expected-warning{{Attempt to free released memory}}
+  free(p); // expected-warning{{Attempt to release already released memory}}
 }
 
 void f2_realloc_0(void) {
   int *p = malloc(12);
   realloc(p,0);
-  realloc(p,0); // expected-warning{{Attempt to free released memory}}
+  realloc(p,0); // expected-warning{{Attempt to release already released memory}}
 }
 
 void f2_realloc_1(void) {
@@ -106,25 +106,25 @@ void af1_g(struct stuff **pps) {
 void af2(void) {
   int *p = my_malloc(12);
   my_free(p);
-  free(p); // expected-warning{{Attempt to free released memory}}
+  free(p); // expected-warning{{Attempt to release already released memory}}
 }
 
 void af2b(void) {
   int *p = my_malloc(12);
   free(p);
-  my_free(p); // expected-warning{{Attempt to free released memory}}
+  my_free(p); // expected-warning{{Attempt to release already released memory}}
 }
 
 void af2c(void) {
   int *p = my_malloc(12);
   free(p);
-  my_hold(p); // expected-warning{{Attempt to free released memory}}
+  my_hold(p); // expected-warning{{Attempt to release already released memory}}
 }
 
 void af2d(void) {
   int *p = my_malloc(12);
   free(p);
-  my_hold2(0, 0, p); // expected-warning{{Attempt to free released memory}}
+  my_hold2(0, 0, p); // expected-warning{{Attempt to release already released memory}}
 }
 
 // No leak if malloc returns null.
@@ -139,13 +139,13 @@ void af2e(void) {
 void af3(void) {
   int *p = my_malloc(12);
   my_hold(p);
-  free(p); // expected-warning{{Attempt to free non-owned memory}}
+  free(p); // expected-warning{{Attempt to release non-owned memory}}
 }
 
 int * af4(void) {
   int *p = my_malloc(12);
   my_free(p);
-  return p; // expected-warning{{Use of memory after it is freed}}
+  return p; // expected-warning{{Use of memory after it is released}}
 }
 
 // This case is (possibly) ok, be conservative
@@ -211,13 +211,13 @@ void pr6293(void) {
 void f7(void) {
   char *x = (char*) malloc(4);
   free(x);
-  x[0] = 'a'; // expected-warning{{Use of memory after it is freed}}
+  x[0] = 'a'; // expected-warning{{Use of memory after it is released}}
 }
 
 void f7_realloc(void) {
   char *x = (char*) malloc(4);
   realloc(x,0);
-  x[0] = 'a'; // expected-warning{{Use of memory after it is freed}}
+  x[0] = 'a'; // expected-warning{{Use of memory after it is released}}
 }
 
 void PR6123(void) {
diff --git a/clang/test/Analysis/malloc-annotations.cpp b/clang/test/Analysis/malloc-annotations.cpp
index d75683f..67a069d 100644
--- a/clang/test/Analysis/malloc-annotations.cpp
+++ b/clang/test/Analysis/malloc-annotations.cpp
@@ -54,19 +54,19 @@ void af1_g(MemoryAllocator &Alloc, struct stuff **pps) {
 void af2(MemoryAllocator &Alloc) {
   void *p = Alloc.my_malloc(12);
   Alloc.my_free(p);
-  free(p); // expected-warning{{Attempt to free released memory}}
+  free(p); // expected-warning{{Attempt to release already released memory}}
 }
 
 void af2b(MemoryAllocator &Alloc) {
   void *p = Alloc.my_malloc(12);
   free(p);
-  Alloc.my_free(p); // expected-warning{{Attempt to free released memory}}
+  Alloc.my_free(p); // expected-warning{{Attempt to release already released memory}}
 }
 
 void af2c(MemoryAllocator &Alloc) {
   void *p = Alloc.my_malloc(12);
   free(p);
-  Alloc.my_hold(p); // expected-warning{{Attempt to free released memory}}
+  Alloc.my_hold(p); // expected-warning{{Attempt to release already released memory}}
 }
 
 // No leak if malloc returns null.
@@ -81,13 +81,13 @@ void af2e(MemoryAllocator &Alloc) {
 void af3(MemoryAllocator &Alloc) {
   void *p = Alloc.my_malloc(12);
   Alloc.my_hold(p);
-  free(p); // expected-warning{{Attempt to free non-owned memory}}
+  free(p); // expected-warning{{Attempt to release non-owned memory}}
 }
 
 void * af4(MemoryAllocator &Alloc) {
   void *p = Alloc.my_malloc(12);
   Alloc.my_free(p);
-  return p; // expected-warning{{Use of memory after it is freed}}
+  return p; // expected-warning{{Use of memory after it is released}}
 }
 
 // This case is (possibly) ok, be conservative
diff --git a/clang/test/Analysis/malloc-free-after-return.cpp b/clang/test/Analysis/malloc-free-after-return.cpp
index cebd79a..5174e30 100644
--- a/clang/test/Analysis/malloc-free-after-return.cpp
+++ b/clang/test/Analysis/malloc-free-after-return.cpp
@@ -12,10 +12,10 @@ private:
 };
 
 int *freeAfterReturnTemp() {
-  return S().getData(); // expected-warning {{Use of memory after it is freed}}
+  return S().getData(); // expected-warning {{Use of memory after it is released}}
 }
 
 int *freeAfterReturnLocal() {
   S X;
-  return X.getData(); // expected-warning {{Use of memory after it is freed}}
+  return X.getData(); // expected-warning {{Use of memory after it is released}}
 }
diff --git a/clang/test/Analysis/malloc-interprocedural.c b/clang/test/Analysis/malloc-interprocedural.c
index 5e5232a..e1569cf 100644
--- a/clang/test/Analysis/malloc-interprocedural.c
+++ b/clang/test/Analysis/malloc-interprocedural.c
@@ -59,13 +59,13 @@ int test4(void) {
   my_free1(data);
   data = (int *)my_malloc2(1, 4);
   my_free1(data);
-  return *data; // expected-warning {{Use of memory after it is freed}}
+  return *data; // expected-warning {{Use of memory after it is released}}
 }
 
 void test6(void) {
   int *data = (int *)my_malloc2(1, 4);
   my_free1((int*)data);
-  my_free1((int*)data); // expected-warning{{Use of memory after it is freed}}
+  my_free1((int*)data); // expected-warning{{Use of memory after it is released}}
 }
 
 // TODO: We should warn here.
@@ -96,5 +96,5 @@ int uafAndCallsFooWithEmptyReturn(void) {
   int *x = (int*)malloc(12);
   free(x);
   fooWithEmptyReturn(12);
-  return *x; // expected-warning {{Use of memory after it is freed}}
+  return *x; // expected-warning {{Use of memory after it is released}}
 }
diff --git a/clang/test/Analysis/malloc-plist.c b/clang/test/Analysis/malloc-plist.c
index 6a3ba5b..caceaaf6 100644
--- a/clang/test/Analysis/malloc-plist.c
+++ b/clang/test/Analysis/malloc-plist.c
@@ -46,7 +46,7 @@ void test_wrapper(void) {
   (void) buf;
 }//expected-warning{{Potential leak}}
 
-// Test what happens when the same call frees and allocated memory.
+// Test what happens when the same call releases and allocated memory.
 // Also tests the stack hint for parameters, when they are passed directly or via pointer.
 void my_free(void *x) {
     free(x);
@@ -60,7 +60,7 @@ void my_malloc_and_free(void **x) {
 void *test_double_action_call(void) {
     void *buf;
     my_malloc_and_free(&buf);
-    return buf; //expected-warning{{Use of memory after it is freed}}
+    return buf; //expected-warning{{Use of memory after it is released}}
 }
 
 // Test stack hint for 'reallocation failed'.
@@ -98,7 +98,7 @@ void call_myfree_takingblock(void) {
 
   int *p = malloc(sizeof(int));
   myfree_takingblock(some_block, p);
-  *p = 3;//expected-warning{{Use of memory after it is freed}}
+  *p = 3;//expected-warning{{Use of memory after it is released}}
 }
 
 // Test that we refer to the last symbol used in the leak diagnostic.
diff --git a/clang/test/Analysis/malloc-refcounted.c b/clang/test/Analysis/malloc-refcounted.c
index bfbe91d..224b60f 100644
--- a/clang/test/Analysis/malloc-refcounted.c
+++ b/clang/test/Analysis/malloc-refcounted.c
@@ -69,12 +69,12 @@ void test_uaf(void)
 {
   struct SomeData *data = alloc_data();
   put_data_uncond(data);
-  data->i += 1; // expected-warning{{Use of memory after it is freed}}
+  data->i += 1; // expected-warning{{Use of memory after it is released}}
 }
 
 void test_no_uaf_atomic_after(void)
 {
   struct SomeData *data = alloc_data();
   put_data_unrelated_atomic(data);
-  data->i += 1; // expected-warning{{Use of memory after it is freed}}
+  data->i += 1; // expected-warning{{Use of memory after it is released}}
 }
diff --git a/clang/test/Analysis/malloc.c b/clang/test/Analysis/malloc.c
index 27a04ff..82eb364 100644
--- a/clang/test/Analysis/malloc.c
+++ b/clang/test/Analysis/malloc.c
@@ -97,13 +97,13 @@ void f1(void) {
 void f2(void) {
   int *p = malloc(12);
   free(p);
-  free(p); // expected-warning{{Attempt to free released memory}}
+  free(p); // expected-warning{{Attempt to release already released memory}}
 }
 
 void f2_realloc_0(void) {
   int *p = malloc(12);
   realloc(p,0);
-  realloc(p,0); // expected-warning{{Attempt to free released memory}}
+  realloc(p,0); // expected-warning{{Attempt to release already released memory}}
 }
 
 void f2_realloc_1(void) {
@@ -153,7 +153,7 @@ void reallocSizeZero1(void) {
   char *p = malloc(12);
   char *r = realloc(p, 0);
   if (!r) {
-    free(p); // expected-warning {{Attempt to free released memory}}
+    free(p); // expected-warning {{Attempt to release already released memory}}
   } else {
     free(r);
   }
@@ -163,11 +163,11 @@ void reallocSizeZero2(void) {
   char *p = malloc(12);
   char *r = realloc(p, 0);
   if (!r) {
-    free(p); // expected-warning {{Attempt to free released memory}}
+    free(p); // expected-warning {{Attempt to release already released memory}}
   } else {
     free(r);
   }
-  free(p); // expected-warning {{Attempt to free released memory}}
+  free(p); // expected-warning {{Attempt to release already released memory}}
 }
 
 void reallocSizeZero3(void) {
@@ -262,7 +262,7 @@ void reallocfRadar6337483_3(void) {
     char * tmp;
     tmp = (char*)reallocf(buf, 0x1000000);
     if (!tmp) {
-        free(buf); // expected-warning {{Attempt to free released memory}}
+        free(buf); // expected-warning {{Attempt to release already released memory}}
         return;
     }
     buf = tmp;
@@ -480,19 +480,19 @@ void pr6293(void) {
 void f7(void) {
   char *x = (char*) malloc(4);
   free(x);
-  x[0] = 'a'; // expected-warning{{Use of memory after it is freed}}
+  x[0] = 'a'; // expected-warning{{Use of memory after it is released}}
 }
 
 void f8(void) {
   char *x = (char*) malloc(4);
   free(x);
-  char *y = strndup(x, 4); // expected-warning{{Use of memory after it is freed}}
+  char *y = strndup(x, 4); // expected-warning{{Use of memory after it is released}}
 }
 
 void f7_realloc(void) {
   char *x = (char*) malloc(4);
   realloc(x,0);
-  x[0] = 'a'; // expected-warning{{Use of memory after it is freed}}
+  x[0] = 'a'; // expected-warning{{Use of memory after it is released}}
 }
 
 void PR6123(void) {
@@ -773,7 +773,7 @@ void nullFree(void) {
 void paramFree(int *p) {
   myfoo(p);
   free(p); // no warning
-  myfoo(p); // expected-warning {{Use of memory after it is freed}}
+  myfoo(p); // expected-warning {{Use of memory after it is released}}
 }
 
 void allocaFree(void) {
@@ -813,14 +813,14 @@ void mallocEscapeFreeFree(void) {
   int *p = malloc(12);
   myfoo(p);
   free(p);
-  free(p); // expected-warning{{Attempt to free released memory}}
+  free(p); // expected-warning{{Attempt to release already released memory}}
 }
 
 void mallocEscapeFreeUse(void) {
   int *p = malloc(12);
   myfoo(p);
   free(p);
-  myfoo(p); // expected-warning{{Use of memory after it is freed}}
+  myfoo(p); // expected-warning{{Use of memory after it is released}}
 }
 
 int *myalloc(void);
@@ -846,7 +846,7 @@ void mallocBindFreeUse(void) {
   int *x = malloc(12);
   int *y = x;
   free(y);
-  myfoo(x); // expected-warning{{Use of memory after it is freed}}
+  myfoo(x); // expected-warning{{Use of memory after it is released}}
 }
 
 void mallocEscapeMalloc(void) {
@@ -871,13 +871,13 @@ void mallocFreeMalloc(void) {
 void mallocFreeUse_params(void) {
   int *p = malloc(12);
   free(p);
-  myfoo(p); //expected-warning{{Use of memory after it is freed}}
+  myfoo(p); //expected-warning{{Use of memory after it is released}}
 }
 
 void mallocFreeUse_params2(void) {
   int *p = malloc(12);
   free(p);
-  myfooint(*p); //expected-warning{{Use of memory after it is freed}}
+  myfooint(*p); //expected-warning{{Use of memory after it is released}}
 }
 
 void mallocFailedOrNot(void) {
@@ -895,14 +895,14 @@ struct StructWithInt {
 int *mallocReturnFreed(void) {
   int *p = malloc(12);
   free(p);
-  return p; // expected-warning {{Use of memory after it is freed}}
+  return p; // expected-warning {{Use of memory after it is released}}
 }
 
 int useAfterFreeStruct(void) {
   struct StructWithInt *px= malloc(sizeof(struct StructWithInt));
   px->g = 5;
   free(px);
-  return px->g; // expected-warning {{Use of memory after it is freed}}
+  return px->g; // expected-warning {{Use of memory after it is released}}
 }
 
 void nonSymbolAsFirstArg(int *pp, struct StructWithInt *p);
@@ -935,7 +935,7 @@ void vallocEscapeFreeUse(void) {
   int *p = valloc(12);
   myfoo(p);
   free(p);
-  myfoo(p); // expected-warning{{Use of memory after it is freed}}
+  myfoo(p); // expected-warning{{Use of memory after it is released}}
 }
 
 int *Gl;
@@ -1543,7 +1543,7 @@ void freeButNoMalloc(int *p, int x){
     free(p);
     //user forgot a return here.
   }
-  free(p); // expected-warning {{Attempt to free released memory}}
+  free(p); // expected-warning {{Attempt to release already released memory}}
 }
 
 struct HasPtr {
@@ -1553,7 +1553,7 @@ struct HasPtr {
 char* reallocButNoMalloc(struct HasPtr *a, int c, int size) {
   int *s;
   char *b = realloc(a->p, size);
-  char *m = realloc(a->p, size); // expected-warning {{Attempt to free released memory}}
+  char *m = realloc(a->p, size); // expected-warning {{Attempt to release already released memory}}
   // We don't expect a use-after-free for a->P here because the warning above
   // is a sink.
   return a->p; // no-warning
@@ -1722,7 +1722,7 @@ void testOffsetZeroDoubleFree(void) {
   int *array = malloc(sizeof(int)*2);
   int *p = &array[0];
   free(p);
-  free(&array[0]); // expected-warning{{Attempt to free released memory}}
+  free(&array[0]); // expected-warning{{Attempt to release already released memory}}
 }
 
 void testOffsetPassedToStrlen(void) {
@@ -1835,7 +1835,7 @@ int testNoCheckerDataPropagationFromLogicalOpOperandToOpResult(void) {
    int ok = (param && value);
    free(param);
    free(value);
-   // Previously we ended up with 'Use of memory after it is freed' on return.
+   // Previously we ended up with 'Use of memory after it is released' on return.
    return ok; // no warning
 }
 
@@ -1954,9 +1954,23 @@ int conjure(void);
 void testExtent(void) {
   int x = conjure();
   clang_analyzer_dump(x);
-  // expected-warning-re@-1 {{{{^conj_\$[[:digit:]]+{int, LC1, S[[:digit:]]+, #1}}}}}}
+  // expected-warning-re@-1 {{{{^conj_\$[[:digit:]]+{int, LC[[:digit:]]+, S[[:digit:]]+, #1}}}}}}
   int *p = (int *)malloc(x);
   clang_analyzer_dumpExtent(p);
-  // expected-warning-re@-1 {{{{^conj_\$[[:digit:]]+{int, LC1, S[[:digit:]]+, #1}}}}}}
+  // expected-warning-re@-1 {{{{^conj_\$[[:digit:]]+{int, LC[[:digit:]]+, S[[:digit:]]+, #1}}}}}}
   free(p);
 }
+
+void gh149754(void *p) {
+  // This testcase demonstrates an unusual situation where a certain symbol
+  // (the value of `p`) is released (more precisely, transitions from
+  // untracked state to Released state) twice within the same bug path because
+  // the `EvalAssume` callback resets it to untracked state after the first
+  // time when it is released. This caused the failure of an assertion, which
+  // was since then removed for the codebase.
+  if (!realloc(p, 8)) {
+    realloc(p, 8);
+    free(p); // expected-warning {{Attempt to release already released memory}}
+  }
+  // expected-warning@+1 {{Potential memory leak}}
+}
diff --git a/clang/test/Analysis/malloc.mm b/clang/test/Analysis/malloc.mm
index 5b816a1..8b4de9a 100644
--- a/clang/test/Analysis/malloc.mm
+++ b/clang/test/Analysis/malloc.mm
@@ -35,7 +35,7 @@ void testNSStringFreeWhenDoneYES3(NSUInteger dataLength) {
 void testNSStringFreeWhenDoneYES4(NSUInteger dataLength) {
   unichar *data = (unichar*)malloc(42);
   NSString *nsstr = [[NSString alloc] initWithCharactersNoCopy:data length:dataLength freeWhenDone:1];
-  free(data); //expected-warning {{Attempt to free non-owned memory}}
+  free(data); //expected-warning {{Attempt to release non-owned memory}}
 }
 
 void testNSStringFreeWhenDoneYES(NSUInteger dataLength) {
@@ -95,14 +95,14 @@ void testOffsetFree() {
 void testRelinquished1() {
   void *data = malloc(42);
   NSData *nsdata = [NSData dataWithBytesNoCopy:data length:42 freeWhenDone:1];
-  free(data); // expected-warning {{Attempt to free non-owned memory}}
+  free(data); // expected-warning {{Attempt to release non-owned memory}}
 }
 
 void testRelinquished2() {
   void *data = malloc(42);
   NSData *nsdata;
   free(data);
-  [NSData dataWithBytesNoCopy:data length:42]; // expected-warning {{Use of memory after it is freed}}
+  [NSData dataWithBytesNoCopy:data length:42]; // expected-warning {{Use of memory after it is released}}
 }
 
 @interface My
@@ -112,7 +112,7 @@ void testRelinquished2() {
 void testUseAfterFree() {
   int *p = (int *)malloc(sizeof(int));
   free(p);
-  [My param:p];  // expected-warning{{Use of memory after it is freed}}
+  [My param:p];  // expected-warning{{Use of memory after it is released}}
 }
 
 void testNoCopy() {
@@ -318,7 +318,7 @@ NSString *test12365078_no_malloc_returnValue(unichar *characters) {
 
 void test12365078_nocheck_nomalloc(unichar *characters) {
   NSString *string = [[NSString alloc] initWithCharactersNoCopy:characters length:12 freeWhenDone:1];
-  free(characters); // expected-warning {{Attempt to free non-owned memory}}
+  free(characters); // expected-warning {{Attempt to release non-owned memory}}
 }
 
 void test12365078_nested(unichar *characters) {
@@ -339,7 +339,7 @@ void test12365078_nested(unichar *characters) {
 void test12365078_check_positive() {
   unichar *characters = (unichar*)malloc(12);
   NSString *string = [[NSString alloc] initWithCharactersNoCopy:characters length:12 freeWhenDone:1];
-  if (string) free(characters); // expected-warning{{Attempt to free non-owned memory}}
+  if (string) free(characters); // expected-warning{{Attempt to release non-owned memory}}
 }
 
 void *test_reinterpret_cast_to_block() {
diff --git a/clang/test/Analysis/new.cpp b/clang/test/Analysis/new.cpp
index 15c27e7..8e5c6c4 100644
--- a/clang/test/Analysis/new.cpp
+++ b/clang/test/Analysis/new.cpp
@@ -112,7 +112,7 @@ void testCacheOut(PtrWrapper w) {
 void testUseAfter(int *p) {
   SomeClass *c = new SomeClass;
   free(p);
-  c->f(p); // expected-warning{{Use of memory after it is freed}}
+  c->f(p); // expected-warning{{Use of memory after it is released}}
   delete c;
 }
 
@@ -140,25 +140,25 @@ void testDeleteMallocked() {
 void testDeleteOpAfterFree() {
   int *p = (int *)malloc(sizeof(int));
   free(p);
-  operator delete(p); // expected-warning{{Use of memory after it is freed}}
+  operator delete(p); // expected-warning{{Use of memory after it is released}}
 }
 
 void testDeleteAfterFree() {
   int *p = (int *)malloc(sizeof(int));
   free(p);
-  delete p; // expected-warning{{Use of memory after it is freed}}
+  delete p; // expected-warning{{Use of memory after it is released}}
 }
 
 void testStandardPlacementNewAfterFree() {
   int *p = (int *)malloc(sizeof(int));
   free(p);
-  p = new(p) int; // expected-warning{{Use of memory after it is freed}}
+  p = new(p) int; // expected-warning{{Use of memory after it is released}}
 }
 
 void testCustomPlacementNewAfterFree() {
   int *p = (int *)malloc(sizeof(int));
   free(p);
-  p = new(0, p) int; // expected-warning{{Use of memory after it is freed}}
+  p = new(0, p) int; // expected-warning{{Use of memory after it is released}}
 }
 
 void testUsingThisAfterDelete() {
diff --git a/clang/test/Analysis/placement-new.cpp b/clang/test/Analysis/placement-new.cpp
index 766b11c..50bbde2 100644
--- a/clang/test/Analysis/placement-new.cpp
+++ b/clang/test/Analysis/placement-new.cpp
@@ -166,10 +166,29 @@ void f1() {
     short a;
   };
 
-  // bad (not enough space).
+  // On some systems, (notably before MSVC 16.7), a non-allocating placement
+  // array new could allocate more memory than the nominal size of the array.
+
+  // Since CWG 2382 (implemented in MSVC 16.7), overhead was disallowed for non-allocating placement new.
+  //  See:
+  //    https://learn.microsoft.com/en-us/cpp/overview/visual-cpp-language-conformance?view=msvc-170
+  //    https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2019/p1969r0.html#2382
+
+  // However, as of 17.1, there is a regression when the type comes from a template
+  // parameter where MSVC reintroduces overhead.
+  //  See:
+  //    https://developercommunity.visualstudio.com/t/10777485
+  //    https://godbolt.org/z/E1z1Tsfvj
+
+  // The checker doesn't warn here because this behavior only affects older
+  // MSVC versions (<16.7) or certain specific versions (17.1).
+  // Suppressing warnings avoids false positives on standard-compliant compilers
+  // and modern MSVC versions, but users of affected MSVC versions should be
+  // aware of potential buffer size issues.
+
   const unsigned N = 32;
-  alignas(S) unsigned char buffer1[sizeof(S) * N]; // expected-note {{'buffer1' initialized here}}
-  ::new (buffer1) S[N];                            // expected-warning{{Storage provided to placement new is only 64 bytes, whereas the allocated array type requires more space for internal needs}} expected-note 1 {{}}
+  alignas(S) unsigned char buffer1[sizeof(S) * N]; 
+  ::new (buffer1) S[N]; // no-warning: See comments above
 }
 
 void f2() {
@@ -177,10 +196,11 @@ void f2() {
     short a;
   };
 
-  // maybe ok but we need to warn.
+  // On some systems, placement array new could allocate more memory than the nominal size of the array.
+  // See the comment at f1() above for more details.
   const unsigned N = 32;
-  alignas(S) unsigned char buffer2[sizeof(S) * N + sizeof(int)]; // expected-note {{'buffer2' initialized here}}
-  ::new (buffer2) S[N];                                          // expected-warning{{68 bytes is possibly not enough for array allocation which requires 64 bytes. Current overhead requires the size of 4 bytes}} expected-note 1 {{}}
+  alignas(S) unsigned char buffer2[sizeof(S) * N + sizeof(int)]; 
+  ::new (buffer2) S[N]; // no-warning: See comments above
 }
 } // namespace testArrayTypesAllocation
 
diff --git a/clang/test/Analysis/retain-count-alloc.cpp b/clang/test/Analysis/retain-count-alloc.cpp
index 472cbbf..4b023a7 100644
--- a/clang/test/Analysis/retain-count-alloc.cpp
+++ b/clang/test/Analysis/retain-count-alloc.cpp
@@ -33,5 +33,5 @@ void useAfterFree(__isl_take Object *A) {
   freeObj(B);
 
   A->Ref = 13;
-  // no-warning: 'Use of memory after it is freed' was here.
+  // no-warning: 'Use of memory after it is released' was here.
 }
diff --git a/clang/test/Analysis/self-assign.cpp b/clang/test/Analysis/self-assign.cpp
index 7d3ea99b..d5c75c4 100644
--- a/clang/test/Analysis/self-assign.cpp
+++ b/clang/test/Analysis/self-assign.cpp
@@ -42,8 +42,8 @@ StringUsed &StringUsed::operator=(const StringUsed &rhs) {
                                      // expected-note@-2{{TRUE}}
                                      // expected-note@-3{{UNKNOWN}}
   free(str); // expected-note{{Memory is released}}
-  str = strdup(rhs.str); // expected-warning{{Use of memory after it is freed}}
-                         // expected-note@-1{{Use of memory after it is freed}}
+  str = strdup(rhs.str); // expected-warning{{Use of memory after it is released}}
+                         // expected-note@-1{{Use of memory after it is released}}
                          // expected-note@-2{{Memory is allocated}}
   return *this;
 }
@@ -90,8 +90,8 @@ StringUnused &StringUnused::operator=(const StringUnused &rhs) {
                                      // expected-note@-2{{TRUE}}
                                      // expected-note@-3{{UNKNOWN}}
   free(str); // expected-note{{Memory is released}}
-  str = strdup(rhs.str); // expected-warning{{Use of memory after it is freed}}
-                         // expected-note@-1{{Use of memory after it is freed}}
+  str = strdup(rhs.str); // expected-warning{{Use of memory after it is released}}
+                         // expected-note@-1{{Use of memory after it is released}}
   return *this;
 }
 
diff --git a/clang/test/Analysis/stack-frame-context-revision.cpp b/clang/test/Analysis/stack-frame-context-revision.cpp
index 51f86de..bd2f046 100644
--- a/clang/test/Analysis/stack-frame-context-revision.cpp
+++ b/clang/test/Analysis/stack-frame-context-revision.cpp
@@ -31,7 +31,7 @@ void test(Node *N) {
     delete N;
 
     N = Next.getPointer();
-    // no-warning: 'Use of memory after it is freed' was here as the same
+    // no-warning: 'Use of memory after it is released' was here as the same
     //             'StackArgumentsSpaceRegion' purged out twice as 'P'.
   }
 }
diff --git a/clang/test/Analysis/std-c-library-functions-arg-enabled-checkers.c b/clang/test/Analysis/std-c-library-functions-arg-enabled-checkers.c
index 7f9c9ff..77fa037 100644
--- a/clang/test/Analysis/std-c-library-functions-arg-enabled-checkers.c
+++ b/clang/test/Analysis/std-c-library-functions-arg-enabled-checkers.c
@@ -22,14 +22,12 @@
 // CHECK-NEXT: core.BitwiseShift
 // CHECK-NEXT: core.CallAndMessageModeling
 // CHECK-NEXT: core.CallAndMessage
-// CHECK-NEXT: core.DereferenceModeling
 // CHECK-NEXT: core.DivideZero
 // CHECK-NEXT: core.DynamicTypePropagation
 // CHECK-NEXT: core.FixedAddressDereference
 // CHECK-NEXT: core.NonNullParamChecker
 // CHECK-NEXT: core.NonnilStringConstants
 // CHECK-NEXT: core.NullDereference
-// CHECK-NEXT: core.StackAddrEscapeBase
 // CHECK-NEXT: core.StackAddressEscape
 // CHECK-NEXT: core.UndefinedBinaryOperatorResult
 // CHECK-NEXT: core.VLASize
diff --git a/clang/test/Analysis/std-string.cpp b/clang/test/Analysis/std-string.cpp
index ee6dc02..150f557 100644
--- a/clang/test/Analysis/std-string.cpp
+++ b/clang/test/Analysis/std-string.cpp
@@ -57,8 +57,8 @@ void ctor_notetag_on_constraining_symbol(const char *p) {
 
   free((void *)p); // expected-note {{Memory is released}}
   free((void *)p);
-  // expected-warning@-1 {{Attempt to free released memory}}
-  // expected-note@-2    {{Attempt to free released memory}}
+  // expected-warning@-1 {{Attempt to release already released memory}}
+  // expected-note@-2    {{Attempt to release already released memory}}
 }
 
 void ctor_no_notetag_symbol_already_constrained(const char *p) {
@@ -73,8 +73,8 @@ void ctor_no_notetag_symbol_already_constrained(const char *p) {
 
   free((void *)p); // expected-note {{Memory is released}}
   free((void *)p);
-  // expected-warning@-1 {{Attempt to free released memory}}
-  // expected-note@-2    {{Attempt to free released memory}}
+  // expected-warning@-1 {{Attempt to release already released memory}}
+  // expected-note@-2    {{Attempt to release already released memory}}
 }
 
 void ctor_no_notetag_if_not_interesting(const char *p1, const char *p2) {
@@ -83,6 +83,6 @@ void ctor_no_notetag_if_not_interesting(const char *p1, const char *p2) {
 
   free((void *)p1); // expected-note {{Memory is released}}
   free((void *)p1);
-  // expected-warning@-1 {{Attempt to free released memory}}
-  // expected-note@-2    {{Attempt to free released memory}}
+  // expected-warning@-1 {{Attempt to release already released memory}}
+  // expected-note@-2    {{Attempt to release already released memory}}
 }
diff --git a/clang/test/C/C23/n3037.c b/clang/test/C/C23/n3037.c
index ce6f4c4..3748375 100644
--- a/clang/test/C/C23/n3037.c
+++ b/clang/test/C/C23/n3037.c
@@ -401,3 +401,77 @@ _Static_assert(0 == _Generic(inner_anon_tagged.untagged, struct { int i; } : 1,
 // unions and structures are both RecordDecl objects, whereas EnumDecl is not).
 enum { E_Untagged1 } nontag_enum; // both-note {{previous definition is here}}
 _Static_assert(0 == _Generic(nontag_enum, enum { E_Untagged1 } : 1, default : 0)); // both-error {{redefinition of enumerator 'E_Untagged1'}}
+
+// Test that enumerations with mixed underlying types are properly handled.
+enum GH150594_E1 : int { GH150594_Val1 };
+enum GH150594_E2 : int { GH150594_Val2 };
+enum GH150594_E3 { GH150594_Val3 };
+enum GH150594_E4 : int { GH150594_Val4 };
+void GH150594(void) {
+  extern enum GH150594_E1 Fn1(void); // both-note {{previous declaration is here}}
+  extern enum GH150594_E2 Fn2(void); // c17-note {{previous declaration is here}}
+  extern enum GH150594_E3 Fn3(void); // both-note {{previous declaration is here}}
+  extern enum GH150594_E4 Fn4(void); // both-note {{previous declaration is here}}
+  enum GH150594_E1 { GH150594_Val1 };
+  enum GH150594_E2 : int { GH150594_Val2 };
+  enum GH150594_E3 : int { GH150594_Val3 };
+  enum GH150594_E4 : short { GH150594_Val4 };
+  extern enum GH150594_E1 Fn1(void); // both-error {{conflicting types for 'Fn1'}}
+  extern enum GH150594_E2 Fn2(void); // c17-error {{conflicting types for 'Fn2'}}
+  extern enum GH150594_E3 Fn3(void); // both-error {{conflicting types for 'Fn3'}}
+  extern enum GH150594_E4 Fn4(void); // both-error {{conflicting types for 'Fn4'}}
+
+  // Show that two declarations in the same scope give expected diagnostics.
+  enum E1 { e1 };       // both-note {{previous declaration is here}}
+  enum E1 : int { e1 }; // both-error {{enumeration previously declared with nonfixed underlying type}}
+
+  enum E2 : int { e2 }; // both-note {{previous declaration is here}}
+  enum E2 { e2 };       // both-error {{enumeration previously declared with fixed underlying type}}
+
+  enum E3 : int { e3 };   // both-note {{previous declaration is here}}
+  enum E3 : short { e3 }; // both-error {{enumeration redeclared with different underlying type 'short' (was 'int')}}
+
+  typedef short foo;
+  enum E4 : foo { e4 };   // c17-note 2 {{previous definition is here}}
+  enum E4 : short { e4 }; // c17-error {{redefinition of 'E4'}} \
+                             c17-error {{redefinition of enumerator 'e4'}}
+
+  enum E5 : foo { e5 }; // both-note {{previous declaration is here}}
+  enum E5 : int { e5 }; // both-error {{enumeration redeclared with different underlying type 'int' (was 'foo' (aka 'short'))}}
+}
+
+// Test that enumerations are compatible with their underlying type, but still
+// diagnose when "same type" is required rather than merely "compatible type".
+enum E1 : int { e1 }; // Fixed underlying type
+enum E2 { e2 };       // Unfixed underlying type, defaults to int or unsigned int
+
+struct GH149965_1 { int h; };
+// This typeof trick is used to get the underlying type of the enumeration in a
+// platform agnostic way.
+struct GH149965_2 { __typeof__(+(enum E2){}) h; };
+void gh149965(void) {
+  extern struct GH149965_1 x1; // c17-note {{previous declaration is here}}
+  extern struct GH149965_2 x2; // c17-note {{previous declaration is here}}
+
+  // Both the structure and the variable declarations are fine because only a
+  // compatible type is required, not the same type, because the structures are
+  // declared in different scopes.
+  struct GH149965_1 { enum E1 h; };
+  struct GH149965_2 { enum E2 h; };
+
+  extern struct GH149965_1 x1; // c17-error {{redeclaration of 'x1'}}
+  extern struct GH149965_2 x2; // c17-error {{redeclaration of 'x2'}}
+
+  // However, in the same scope, the same type is required, not just compatible
+  // types.
+  // FIXME: this should be an error in both C17 and C23 mode.
+  struct GH149965_3 { int h; };     // c17-note {{previous definition is here}}
+  struct GH149965_3 { enum E1 h; }; // c17-error {{redefinition of 'GH149965_3'}}
+
+  // For Clang, the composite type after declaration merging is the enumeration
+  // type rather than an integer type.
+  enum E1 *eptr;
+  [[maybe_unused]] __typeof__(x1.h) *ptr = eptr;
+  enum E2 *eptr2;
+  [[maybe_unused]] __typeof__(x2.h) *ptr2 = eptr2;
+}
diff --git a/clang/test/C/C2y/n3353.c b/clang/test/C/C2y/n3353.c
index cd61cbf..a2e08cf 100644
--- a/clang/test/C/C2y/n3353.c
+++ b/clang/test/C/C2y/n3353.c
@@ -44,7 +44,12 @@ static const void *ptr = 0o0;  /* ext-warning {{octal integer literals are a C2y
 #endif
 
 // 0 by itself is not deprecated, of course.
-int k = 0;
+int k1                = 0;
+unsigned int k2       = 0u;
+long k3               = 0l;
+unsigned long k4      = 0ul;
+long long k5          = 0ll;
+unsigned long long k6 = 0ull;
 
 // Test a preprocessor use of 0 by itself, which is also not deprecated.
 #if 0
@@ -65,7 +70,6 @@ static_assert(__extension__ _Generic(typeof(l), const int : 1, default : 0)); //
 
 // Note that 0o by itself is an invalid literal.
 int m = 0o; /* expected-error {{invalid suffix 'o' on integer constant}}
-               c2y-warning {{octal literals without a '0o' prefix are deprecated}}
              */
 
 // Ensure negation works as expected.
@@ -83,13 +87,11 @@ int n = 0o18; /* expected-error {{invalid digit '8' in octal constant}}
                  cpp-warning {{octal integer literals are a Clang extension}}
                */
 int o1 = 0o8; /* expected-error {{invalid suffix 'o8' on integer constant}}
-                 c2y-warning {{octal literals without a '0o' prefix are deprecated}}
                */
 // FIXME: however, it matches the behavior for hex literals in terms of the
 // error reported. Unfortunately, we then go on to think 0 is an octal literal
 // without a prefix, which is again a bit confusing.
 int o2 = 0xG; /* expected-error {{invalid suffix 'xG' on integer constant}}
-                 c2y-warning {{octal literals without a '0o' prefix are deprecated}}
                */
 
 // Show that floating-point suffixes on octal literals are rejected.
@@ -130,7 +132,6 @@ constexpr int p = 0o0'1'2'3'4'5'6'7; /* compat-warning {{octal integer literals
                                       */
 static_assert(p == 01234567); // c2y-warning {{octal literals without a '0o' prefix are deprecated}}
 int q = 0o'0'1; /* expected-error {{invalid suffix 'o'0'1' on integer constant}}
-                   c2y-warning {{octal literals without a '0o' prefix are deprecated}}
                  */
 
 #define M 0o123
diff --git a/clang/test/CIR/CodeGen/aapcs-volatile-bitfields.c b/clang/test/CIR/CodeGen/aapcs-volatile-bitfields.c
new file mode 100644
index 0000000..3643cf2
--- /dev/null
+++ b/clang/test/CIR/CodeGen/aapcs-volatile-bitfields.c
@@ -0,0 +1,73 @@
+// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -fclangir -emit-cir -fdump-record-layouts %s -o %t.cir 1> %t.cirlayout
+// RUN: FileCheck --input-file=%t.cirlayout %s --check-prefix=CIR-LAYOUT
+
+// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -emit-llvm -fdump-record-layouts %s -o %t.ll 1> %t.ogcglayout
+// RUN: FileCheck --input-file=%t.ogcglayout %s --check-prefix=OGCG-LAYOUT
+
+typedef struct  {
+    unsigned int a : 9;
+    volatile unsigned int b : 1;
+    unsigned int c : 1;
+} st1;
+
+// CIR-LAYOUT:  BitFields:[
+// CIR-LAYOUT-NEXT:    <CIRBitFieldInfo name:a offset:0 size:9 isSigned:0 storageSize:16 storageOffset:0 volatileOffset:0 volatileStorageSize:32 volatileStorageOffset:0>
+// CIR-LAYOUT-NEXT:    <CIRBitFieldInfo name:b offset:9 size:1 isSigned:0 storageSize:16 storageOffset:0 volatileOffset:9 volatileStorageSize:32 volatileStorageOffset:0>
+// CIR-LAYOUT-NEXT:    <CIRBitFieldInfo name:c offset:10 size:1 isSigned:0 storageSize:16 storageOffset:0 volatileOffset:10 volatileStorageSize:32 volatileStorageOffset:0>
+
+// OGCG-LAYOUT:  BitFields:[
+// OGCG-LAYOUT-NEXT:    <CGBitFieldInfo Offset:0 Size:9 IsSigned:0 StorageSize:16 StorageOffset:0 VolatileOffset:0 VolatileStorageSize:32 VolatileStorageOffset:0>
+// OGCG-LAYOUT-NEXT:    <CGBitFieldInfo Offset:9 Size:1 IsSigned:0 StorageSize:16 StorageOffset:0 VolatileOffset:9 VolatileStorageSize:32 VolatileStorageOffset:0>
+// OGCG-LAYOUT-NEXT:    <CGBitFieldInfo Offset:10 Size:1 IsSigned:0 StorageSize:16 StorageOffset:0 VolatileOffset:10 VolatileStorageSize:32 VolatileStorageOffset:0>
+
+// different base types
+typedef struct{
+    volatile  short a : 3;
+    volatile  int b: 13;
+    volatile  long c : 5;
+} st2;
+
+// CIR-LAYOUT: BitFields:[
+// CIR-LAYOUT-NEXT:   <CIRBitFieldInfo name:a offset:0 size:3 isSigned:1 storageSize:32 storageOffset:0 volatileOffset:0 volatileStorageSize:16 volatileStorageOffset:0>
+// CIR-LAYOUT-NEXT:   <CIRBitFieldInfo name:b offset:3 size:13 isSigned:1 storageSize:32 storageOffset:0 volatileOffset:3 volatileStorageSize:32 volatileStorageOffset:0>
+// CIR-LAYOUT-NEXT:   <CIRBitFieldInfo name:c offset:16 size:5 isSigned:1 storageSize:32 storageOffset:0 volatileOffset:16 volatileStorageSize:64 volatileStorageOffset:0>
+
+// OGCG-LAYOUT: BitFields:[
+// OGCG-LAYOUT-NEXT:   <CGBitFieldInfo Offset:0 Size:3 IsSigned:1 StorageSize:32 StorageOffset:0 VolatileOffset:0 VolatileStorageSize:16 VolatileStorageOffset:0>
+// OGCG-LAYOUT-NEXT:   <CGBitFieldInfo Offset:3 Size:13 IsSigned:1 StorageSize:32 StorageOffset:0 VolatileOffset:3 VolatileStorageSize:32 VolatileStorageOffset:0>
+// OGCG-LAYOUT-NEXT:   <CGBitFieldInfo Offset:16 Size:5 IsSigned:1 StorageSize:32 StorageOffset:0 VolatileOffset:16 VolatileStorageSize:64 VolatileStorageOffset:0>
+
+typedef struct{
+    volatile unsigned int a : 3;
+    unsigned int : 0; // zero-length bit-field force next field to aligned int boundary
+    volatile unsigned int b : 5;
+} st3;
+
+// CIR-LAYOUT: BitFields:[
+// CIR-LAYOUT-NEXT:   <CIRBitFieldInfo name:a offset:0 size:3 isSigned:0 storageSize:8 storageOffset:0 volatileOffset:0 volatileStorageSize:32 volatileStorageOffset:0>
+// CIR-LAYOUT-NEXT:   <CIRBitFieldInfo name:b offset:0 size:5 isSigned:0 storageSize:8 storageOffset:4 volatileOffset:0 volatileStorageSize:0 volatileStorageOffset:0>
+
+// OGCG-LAYOUT: BitFields:[
+// OGCG-LAYOUT-NEXT:   <CGBitFieldInfo Offset:0 Size:3 IsSigned:0 StorageSize:8 StorageOffset:0 VolatileOffset:0 VolatileStorageSize:32 VolatileStorageOffset:0>
+// OGCG-LAYOUT-NEXT:   <CGBitFieldInfo Offset:0 Size:5 IsSigned:0 StorageSize:8 StorageOffset:4 VolatileOffset:0 VolatileStorageSize:0 VolatileStorageOffset:0>
+
+typedef struct{
+    volatile unsigned int a : 3;
+    unsigned int z: 2;
+    volatile unsigned int b : 5;
+} st4;
+
+// CIR-LAYOUT: BitFields:[
+// CIR-LAYOUT-NEXT:   <CIRBitFieldInfo name:a offset:0 size:3 isSigned:0 storageSize:16 storageOffset:0 volatileOffset:0 volatileStorageSize:32 volatileStorageOffset:0>
+// CIR-LAYOUT-NEXT:   <CIRBitFieldInfo name:z offset:3 size:2 isSigned:0 storageSize:16 storageOffset:0 volatileOffset:3 volatileStorageSize:32 volatileStorageOffset:0>
+// CIR-LAYOUT-NEXT:   <CIRBitFieldInfo name:b offset:5 size:5 isSigned:0 storageSize:16 storageOffset:0 volatileOffset:5 volatileStorageSize:32 volatileStorageOffset:0>
+
+// OGCG-LAYOUT: BitFields:[
+// OGCG-LAYOUT-NEXT:   <CGBitFieldInfo Offset:0 Size:3 IsSigned:0 StorageSize:16 StorageOffset:0 VolatileOffset:0 VolatileStorageSize:32 VolatileStorageOffset:0>
+// OGCG-LAYOUT-NEXT:   <CGBitFieldInfo Offset:3 Size:2 IsSigned:0 StorageSize:16 StorageOffset:0 VolatileOffset:3 VolatileStorageSize:32 VolatileStorageOffset:0>
+// OGCG-LAYOUT-NEXT:   <CGBitFieldInfo Offset:5 Size:5 IsSigned:0 StorageSize:16 StorageOffset:0 VolatileOffset:5 VolatileStorageSize:32 VolatileStorageOffset:0>
+
+st1 s1;
+st2 s2;
+st3 s3;
+st4 s4;
diff --git a/clang/test/CIR/CodeGen/array-ctor.cpp b/clang/test/CIR/CodeGen/array-ctor.cpp
new file mode 100644
index 0000000..c373acf
--- /dev/null
+++ b/clang/test/CIR/CodeGen/array-ctor.cpp
@@ -0,0 +1,106 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o -  2>&1 | FileCheck --check-prefixes=CIR-BEFORE-LPP %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG
+
+struct S {
+    S();
+};
+
+void foo() {
+    S s[42];
+}
+
+// CIR-BEFORE-LPP: cir.func dso_local @_Z3foov()
+// CIR-BEFORE-LPP:   %[[ARRAY:.*]] = cir.alloca !cir.array<!rec_S x 42>, !cir.ptr<!cir.array<!rec_S x 42>>, ["s", init]
+// CIR-BEFORE-LPP:   cir.array.ctor %[[ARRAY]] : !cir.ptr<!cir.array<!rec_S x 42>> {
+// CIR-BEFORE-LPP:    ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_S>):
+// CIR-BEFORE-LPP:      cir.call @_ZN1SC1Ev(%[[ARG]]) : (!cir.ptr<!rec_S>) -> ()
+// CIR-BEFORE-LPP:      cir.yield
+// CIR-BEFORE-LPP:    }
+// CIR-BEFORE-LPP:   cir.return
+// CIR-BEFORE-LPP: }
+
+// CIR: cir.func dso_local @_Z3foov()
+// CIR:   %[[ARRAY:.*]] = cir.alloca !cir.array<!rec_S x 42>, !cir.ptr<!cir.array<!rec_S x 42>>, ["s", init]
+// CIR:   %[[CONST42:.*]] = cir.const #cir.int<42> : !u64i
+// CIR:   %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[ARRAY]] : !cir.ptr<!cir.array<!rec_S x 42>>), !cir.ptr<!rec_S>
+// CIR:   %[[END_PTR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_S>, %[[CONST42]] : !u64i), !cir.ptr<!rec_S>
+// CIR:   %[[ITER:.*]] = cir.alloca !cir.ptr<!rec_S>, !cir.ptr<!cir.ptr<!rec_S>>, ["__array_idx"]
+// CIR:   cir.store %[[DECAY]], %[[ITER]] : !cir.ptr<!rec_S>, !cir.ptr<!cir.ptr<!rec_S>>
+// CIR:   cir.do {
+// CIR:     %[[CURRENT:.*]] = cir.load %[[ITER]] : !cir.ptr<!cir.ptr<!rec_S>>, !cir.ptr<!rec_S>
+// CIR:     cir.call @_ZN1SC1Ev(%[[CURRENT]]) : (!cir.ptr<!rec_S>) -> ()
+// CIR:     %[[CONST1:.*]] = cir.const #cir.int<1> : !u64i
+// CIR:     %[[NEXT:.*]] = cir.ptr_stride(%[[CURRENT]] : !cir.ptr<!rec_S>, %[[CONST1]] : !u64i), !cir.ptr<!rec_S>
+// CIR:     cir.store %[[NEXT]], %[[ITER]] : !cir.ptr<!rec_S>, !cir.ptr<!cir.ptr<!rec_S>>
+// CIR:     cir.yield
+// CIR:   } while {
+// CIR:     %[[CURRENT2:.*]] = cir.load %[[ITER]] : !cir.ptr<!cir.ptr<!rec_S>>, !cir.ptr<!rec_S>
+// CIR:     %[[CMP:.*]] = cir.cmp(ne, %[[CURRENT2]], %[[END_PTR]]) : !cir.ptr<!rec_S>, !cir.bool
+// CIR:     cir.condition(%[[CMP]])
+// CIR:   }
+// CIR:   cir.return
+// CIR: }
+
+// LLVM: define dso_local void @_Z3foov()
+// LLVM: %[[ARRAY:.*]] = alloca [42 x %struct.S]
+// LLVM: %[[START:.*]] = getelementptr %struct.S, ptr %[[ARRAY]], i32 0
+// LLVM: %[[END:.*]] = getelementptr %struct.S, ptr %[[START]], i64 42
+// LLVM: %[[ITER:.*]] = alloca ptr
+// LLVM: store ptr %[[START]], ptr %[[ITER]]
+// LLVM: br label %[[LOOP:.*]]
+// LLVM: [[COND:.*]]:
+// LLVM: %[[CURRENT_CHECK:.*]] = load ptr, ptr %[[ITER]]
+// LLVM: %[[DONE:.*]] = icmp ne ptr %[[CURRENT_CHECK]], %[[END]]
+// LLVM: br i1 %[[DONE]], label %[[LOOP]], label %[[EXIT:.*]]
+// LLVM: [[LOOP]]:
+// LLVM: %[[CURRENT:.*]] = load ptr, ptr %[[ITER]]
+// LLVM: call void @_ZN1SC1Ev(ptr %[[CURRENT]])
+// LLVM: %[[NEXT:.*]] = getelementptr %struct.S, ptr %[[CURRENT]], i64 1
+// LLVM: store ptr %[[NEXT]], ptr %[[ITER]]
+// LLVM: br label %[[COND]]
+// LLVM: [[EXIT]]:
+// LLVM: ret void
+
+// OGCG: define dso_local void @_Z3foov()
+// OGCG: %[[ARRAY:.*]] = alloca [42 x %struct.S]
+// OGCG: %[[START:.*]] = getelementptr{{.*}} %struct.S{{.*}}
+// OGCG: %[[END:.*]] = getelementptr{{.*}} %struct.S{{.*}} i64 42
+// OGCG: br label %[[LOOP:.*]]
+// OGCG: [[LOOP]]:
+// OGCG: %[[CURRENT:.*]] = phi ptr [ %[[START]], %{{.*}} ], [ %[[NEXT:.*]], %[[LOOP]] ]
+// OGCG: call void @_ZN1SC1Ev(ptr{{.*}})
+// OGCG: %[[NEXT]] = getelementptr{{.*}} %struct.S{{.*}} i64 1
+// OGCG: %[[DONE:.*]] = icmp eq ptr %[[NEXT]], %[[END]]
+// OGCG: br i1 %[[DONE]], label %[[EXIT:.*]], label %[[LOOP]]
+// OGCG: [[EXIT]]:
+// OGCG: ret void
+
+void zero_sized() {
+    S s[0];
+}
+
+// CIR-BEFORE-LPP:     cir.func dso_local @_Z10zero_sizedv()
+// CIR-BEFORE-LPP:       cir.alloca !cir.array<!rec_S x 0>, !cir.ptr<!cir.array<!rec_S x 0>>, ["s"]
+// CIR-BEFORE-LPP-NOT:   cir.array.ctor
+// CIR-BEFORE-LPP:       cir.return
+
+// CIR:     cir.func dso_local @_Z10zero_sizedv()
+// CIR:       cir.alloca !cir.array<!rec_S x 0>, !cir.ptr<!cir.array<!rec_S x 0>>, ["s"]
+// CIR-NOT:   cir.do
+// CIR-NOT:   cir.call @_ZN1SC1Ev
+// CIR:       cir.return
+
+// LLVM:     define dso_local void @_Z10zero_sizedv()
+// LLVM:       alloca [0 x %struct.S]
+// LLVM-NOT:   call void @_ZN1SC1Ev
+// LLVM:       ret void
+
+// OGCG:     define dso_local void @_Z10zero_sizedv()
+// OGCG:       alloca [0 x %struct.S]
+// OGCG-NOT:   call void @_ZN1SC1Ev
+// OGCG:       ret void
diff --git a/clang/test/CIR/CodeGen/array-dtor.cpp b/clang/test/CIR/CodeGen/array-dtor.cpp
new file mode 100644
index 0000000..3edc6f1
--- /dev/null
+++ b/clang/test/CIR/CodeGen/array-dtor.cpp
@@ -0,0 +1,104 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t.cir  2> %t-before-lp.cir
+// RUN: FileCheck --input-file=%t-before-lp.cir %s -check-prefix=CIR-BEFORE-LPP
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG
+
+struct S {
+    ~S();
+};
+
+void test_cleanup_array() {
+    S s[42];
+}
+
+// CIR-BEFORE-LPP: cir.func{{.*}} @_Z18test_cleanup_arrayv()
+// CIR-BEFORE-LPP:   %[[S:.*]] = cir.alloca !cir.array<!rec_S x 42>, !cir.ptr<!cir.array<!rec_S x 42>>, ["s"]
+// CIR-BEFORE-LPP:   cir.array.dtor %[[S]] : !cir.ptr<!cir.array<!rec_S x 42>> {
+// CIR-BEFORE-LPP:   ^bb0(%arg0: !cir.ptr<!rec_S>
+// CIR-BEFORE-LPP:     cir.call @_ZN1SD1Ev(%arg0) nothrow : (!cir.ptr<!rec_S>) -> ()
+// CIR-BEFORE-LPP:     cir.yield
+// CIR-BEFORE-LPP:   }
+// CIR-BEFORE-LPP:   cir.return
+
+// CIR: cir.func{{.*}} @_Z18test_cleanup_arrayv()
+// CIR:   %[[S:.*]] = cir.alloca !cir.array<!rec_S x 42>, !cir.ptr<!cir.array<!rec_S x 42>>, ["s"]
+// CIR:   %[[CONST41:.*]] = cir.const #cir.int<41> : !u64i
+// CIR:   %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[S]] : !cir.ptr<!cir.array<!rec_S x 42>>), !cir.ptr<!rec_S>
+// CIR:   %[[END_PTR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_S>, %[[CONST41]] : !u64i), !cir.ptr<!rec_S>
+// CIR:   %[[ITER:.*]] = cir.alloca !cir.ptr<!rec_S>, !cir.ptr<!cir.ptr<!rec_S>>, ["__array_idx"]
+// CIR:   cir.store %[[END_PTR]], %[[ITER]] : !cir.ptr<!rec_S>, !cir.ptr<!cir.ptr<!rec_S>>
+// CIR:   cir.do {
+// CIR:     %[[CURRENT:.*]] = cir.load %[[ITER]] : !cir.ptr<!cir.ptr<!rec_S>>, !cir.ptr<!rec_S>
+// CIR:     cir.call @_ZN1SD1Ev(%[[CURRENT]]) nothrow : (!cir.ptr<!rec_S>) -> ()
+// CIR:     %[[CONST_MINUS1:.*]] = cir.const #cir.int<-1> : !s64i
+// CIR:     %[[NEXT:.*]] = cir.ptr_stride(%[[CURRENT]] : !cir.ptr<!rec_S>, %[[CONST_MINUS1]] : !s64i), !cir.ptr<!rec_S>
+// CIR:     cir.store %[[NEXT]], %[[ITER]] : !cir.ptr<!rec_S>, !cir.ptr<!cir.ptr<!rec_S>>
+// CIR:     cir.yield
+// CIR:   } while {
+// CIR:     %[[CURRENT2:.*]] = cir.load %[[ITER]] : !cir.ptr<!cir.ptr<!rec_S>>, !cir.ptr<!rec_S>
+// CIR:     %[[CMP:.*]] = cir.cmp(ne, %[[CURRENT2]], %[[DECAY]])
+// CIR:     cir.condition(%[[CMP]])
+// CIR:   }
+// CIR:   cir.return
+
+// LLVM: define{{.*}} void @_Z18test_cleanup_arrayv()
+// LLVM:   %[[ARRAY:.*]] = alloca [42 x %struct.S]
+// LLVM:   %[[START:.*]] = getelementptr %struct.S, ptr %[[ARRAY]], i32 0
+// LLVM:   %[[END:.*]] = getelementptr %struct.S, ptr %[[START]], i64 41
+// LLVM:   %[[ITER:.*]] = alloca ptr
+// LLVM:   store ptr %[[END]], ptr %[[ITER]]
+// LLVM:   br label %[[LOOP:.*]]
+// LLVM: [[COND:.*]]:
+// LLVM:   %[[CURRENT_CHECK:.*]] = load ptr, ptr %[[ITER]]
+// LLVM:   %[[DONE:.*]] = icmp ne ptr %[[CURRENT_CHECK]], %[[START]]
+// LLVM:   br i1 %[[DONE]], label %[[LOOP]], label %[[EXIT:.*]]
+// LLVM: [[LOOP]]:
+// LLVM:   %[[CURRENT:.*]] = load ptr, ptr %[[ITER]]
+// LLVM:   call void @_ZN1SD1Ev(ptr %[[CURRENT]])
+// LLVM:   %[[NEXT:.*]] = getelementptr %struct.S, ptr %[[CURRENT]], i64 -1
+// LLVM:   store ptr %[[NEXT]], ptr %[[ITER]]
+// LLVM:   br label %[[COND]]
+// LLVM: [[EXIT]]:
+// LLVM:   ret void
+
+// OGCG: define{{.*}} void @_Z18test_cleanup_arrayv()
+// OGCG:   %[[ARRAY:.*]] = alloca [42 x %struct.S]
+// OGCG:   %[[START:.*]] = getelementptr{{.*}} %struct.S{{.*}}
+// OGCG:   %[[END:.*]] = getelementptr{{.*}} %struct.S{{.*}} i64 42
+// OGCG:   br label %[[LOOP:.*]]
+// OGCG: [[LOOP]]:
+// OGCG:   %[[NEXT:.*]] = phi ptr [ %[[END]], %{{.*}} ], [ %[[LAST:.*]], %[[LOOP]] ]
+// OGCG:   %[[LAST]] = getelementptr{{.*}} %struct.S{{.*}}, ptr %[[NEXT]], i64 -1
+// OGCG:   call void @_ZN1SD1Ev(ptr{{.*}} %[[LAST]])
+// OGCG:   %[[DONE:.*]] = icmp eq ptr %[[LAST]], %[[START]]
+// OGCG:   br i1 %[[DONE]], label %[[EXIT:.*]], label %[[LOOP]]
+// OGCG: [[EXIT]]:
+// OGCG:   ret void
+
+void test_cleanup_zero_length_array() {
+    S s[0];
+}
+
+// CIR-BEFORE-LPP:     cir.func{{.*}} @_Z30test_cleanup_zero_length_arrayv()
+// CIR-BEFORE-LPP:       %[[S:.*]] = cir.alloca !cir.array<!rec_S x 0>, !cir.ptr<!cir.array<!rec_S x 0>>, ["s"]
+// CIR-BEFORE-LPP-NOT:   cir.array.dtor
+// CIR-BEFORE-LPP:       cir.return
+
+// CIR:     cir.func{{.*}} @_Z30test_cleanup_zero_length_arrayv()
+// CIR:       %[[S:.*]] = cir.alloca !cir.array<!rec_S x 0>, !cir.ptr<!cir.array<!rec_S x 0>>, ["s"]
+// CIR-NOT:   cir.do
+// CIR-NOT:   cir.call @_ZN1SD1Ev
+// CIR:       cir.return
+
+// LLVM:     define{{.*}} void @_Z30test_cleanup_zero_length_arrayv()
+// LLVM:       alloca [0 x %struct.S]
+// LLVM-NOT:   call void @_ZN1SD1Ev
+// LLVM:       ret void
+
+// OGCG:     define{{.*}} void @_Z30test_cleanup_zero_length_arrayv()
+// OGCG:       alloca [0 x %struct.S]
+// OGCG-NOT:   call void @_ZN1SD1Ev
+// OGCG:       ret void
diff --git a/clang/test/CIR/CodeGen/bitfields.c b/clang/test/CIR/CodeGen/bitfields.c
index a73c076..869a7c9 100644
--- a/clang/test/CIR/CodeGen/bitfields.c
+++ b/clang/test/CIR/CodeGen/bitfields.c
@@ -315,3 +315,51 @@ void unOp(S* s) {
 // OGCG:   [[TMP12:%.*]] = shl i64 [[TMP8]], 62
 // OGCG:   [[TMP13:%.*]] = ashr i64 [[TMP12]], 62
 // OGCG:   [[TMP14:%.*]] = trunc i64 [[TMP13]] to i32
+
+void binOp(S* s) {
+   s->d |= 42;
+}
+
+// CIR: cir.func {{.*@binOp}}
+// CIR:   [[TMP0:%.*]] = cir.const #cir.int<42> : !s32i
+// CIR:   [[TMP1:%.*]] = cir.get_member {{.*}}[0] {name = "d"} : !cir.ptr<!rec_S> -> !cir.ptr<!u64i>
+// CIR:   [[TMP2:%.*]] = cir.get_bitfield align(4) (#bfi_d, [[TMP1]] : !cir.ptr<!u64i>) -> !s32i
+// CIR:   [[TMP3:%.*]] = cir.binop(or, [[TMP2]], [[TMP0]]) : !s32i
+// CIR:   cir.set_bitfield align(4) (#bfi_d, [[TMP1]] : !cir.ptr<!u64i>, [[TMP3]] : !s32i)
+
+// LLVM: define {{.*@binOp}}
+// LLVM:   [[TMP0:%.*]] = load ptr, ptr {{.*}}, align 8
+// LLVM:   [[TMP1:%.*]] = getelementptr %struct.S, ptr [[TMP0]], i32 0, i32 0
+// LLVM:   [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 4
+// LLVM:   [[TMP3:%.*]] = shl i64 [[TMP2]], 13
+// LLVM:   [[TMP4:%.*]] = ashr i64 [[TMP3]], 62
+// LLVM:   [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32
+// LLVM:   [[TMP6:%.*]] = or i32 [[TMP5]], 42
+// LLVM:   [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
+// LLVM:   [[TMP8:%.*]] = load i64, ptr [[TMP1]], align 4
+// LLVM:   [[TMP9:%.*]] = and i64 [[TMP7]], 3
+// LLVM:   [[TMP10:%.*]] = shl i64 [[TMP9]], 49
+// LLVM:   [[TMP11:%.*]] = and i64 [[TMP8]], -1688849860263937
+// LLVM:   [[TMP12:%.*]] = or i64 [[TMP11]], [[TMP10]]
+// LLVM:   store i64 [[TMP12]], ptr [[TMP1]], align 4
+// LLVM:   [[TMP13:%.*]] = shl i64 [[TMP9]], 62
+// LLVM:   [[TMP14:%.*]] = ashr i64 [[TMP13]], 62
+// LLVM:   [[TMP15:%.*]] = trunc i64 [[TMP14]] to i32
+
+// OGCG: define {{.*@binOp}}
+// OGCG:   [[TMP0:%.*]] = load ptr, ptr %s.addr, align 8
+// OGCG:   [[TMP1:%.*]] = load i64, ptr [[TMP0]], align 4
+// OGCG:   [[TMP2:%.*]] = shl i64 [[TMP1]], 13
+// OGCG:   [[TMP3:%.*]] = ashr i64 [[TMP2]], 62
+// OGCG:   [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
+// OGCG:   [[TMP5:%.*]] = or i32 [[TMP4]], 42
+// OGCG:   [[TMP6:%.*]] = zext i32 [[TMP5]] to i64
+// OGCG:   [[TMP7:%.*]] = load i64, ptr [[TMP0]], align 4
+// OGCG:   [[TMP8:%.*]] = and i64 [[TMP6]], 3
+// OGCG:   [[TMP9:%.*]] = shl i64 [[TMP8]], 49
+// OGCG:   [[TMP10:%.*]] = and i64 [[TMP7]], -1688849860263937
+// OGCG:   [[TMP11:%.*]] = or i64 [[TMP10]], [[TMP9]]
+// OGCG:   store i64 [[TMP11]], ptr [[TMP0]], align 4
+// OGCG:   [[TMP12:%.*]] = shl i64 [[TMP8]], 62
+// OGCG:   [[TMP13:%.*]] = ashr i64 [[TMP12]], 62
+// OGCG:   [[TMP14:%.*]] = trunc i64 [[TMP13]] to i32
diff --git a/clang/test/CIR/CodeGen/builtin_bit.cpp b/clang/test/CIR/CodeGen/builtin_bit.cpp
index 4ac82bd..8b9a187 100644
--- a/clang/test/CIR/CodeGen/builtin_bit.cpp
+++ b/clang/test/CIR/CodeGen/builtin_bit.cpp
@@ -216,6 +216,78 @@ int test_builtin_clzg(unsigned x) {
 // OGCG-LABEL: _Z17test_builtin_clzgj
 // OGCG:         %{{.+}} = call i32 @llvm.ctlz.i32(i32 %{{.+}}, i1 true)
 
+int test_builtin_ffs(int x) {
+  return __builtin_ffs(x);
+}
+
+// CIR-LABEL: _Z16test_builtin_ffsi
+// CIR:         %{{.+}} = cir.ffs %{{.+}} : !s32i
+// CIR:       }
+
+// LLVM-LABEL: _Z16test_builtin_ffsi
+// LLVM:         %[[INPUT:.+]] = load i32, ptr %{{.+}}, align 4
+// LLVM-NEXT:    %[[CTZ:.+]] = call i32 @llvm.cttz.i32(i32 %[[INPUT]], i1 true)
+// LLVM-NEXT:    %[[R1:.+]] = add i32 %[[CTZ]], 1
+// LLVM-NEXT:    %[[IS_ZERO:.+]] = icmp eq i32 %[[INPUT]], 0
+// LLVM-NEXT:    %{{.+}} = select i1 %[[IS_ZERO]], i32 0, i32 %[[R1]]
+// LLVM:       }
+
+// OGCG-LABEL: _Z16test_builtin_ffsi
+// OGCG:         %[[INPUT:.+]] = load i32, ptr %{{.+}}, align 4
+// OGCG-NEXT:    %[[CTZ:.+]] = call i32 @llvm.cttz.i32(i32 %[[INPUT]], i1 true)
+// OGCG-NEXT:    %[[R1:.+]] = add i32 %[[CTZ]], 1
+// OGCG-NEXT:    %[[IS_ZERO:.+]] = icmp eq i32 %[[INPUT]], 0
+// OGCG-NEXT:    %{{.+}} = select i1 %[[IS_ZERO]], i32 0, i32 %[[R1]]
+// OGCG:       }
+
+int test_builtin_ffsl(long x) {
+  return __builtin_ffsl(x);
+}
+
+// CIR-LABEL: _Z17test_builtin_ffsll
+// CIR:         %{{.+}} = cir.ffs %{{.+}} : !s64i
+// CIR:       }
+
+// LLVM-LABEL: _Z17test_builtin_ffsll
+// LLVM:         %[[INPUT:.+]] = load i64, ptr %{{.+}}, align 8
+// LLVM-NEXT:    %[[CTZ:.+]] = call i64 @llvm.cttz.i64(i64 %[[INPUT]], i1 true)
+// LLVM-NEXT:    %[[R1:.+]] = add i64 %[[CTZ]], 1
+// LLVM-NEXT:    %[[IS_ZERO:.+]] = icmp eq i64 %[[INPUT]], 0
+// LLVM-NEXT:    %{{.+}} = select i1 %[[IS_ZERO]], i64 0, i64 %[[R1]]
+// LLVM:       }
+
+// OGCG-LABEL: _Z17test_builtin_ffsll
+// OGCG:         %[[INPUT:.+]] = load i64, ptr %{{.+}}, align 8
+// OGCG-NEXT:    %[[CTZ:.+]] = call i64 @llvm.cttz.i64(i64 %[[INPUT]], i1 true)
+// OGCG-NEXT:    %[[R1:.+]] = add i64 %[[CTZ]], 1
+// OGCG-NEXT:    %[[IS_ZERO:.+]] = icmp eq i64 %[[INPUT]], 0
+// OGCG-NEXT:    %{{.+}} = select i1 %[[IS_ZERO]], i64 0, i64 %[[R1]]
+// OGCG:       }
+
+int test_builtin_ffsll(long long x) {
+  return __builtin_ffsll(x);
+}
+
+// CIR-LABEL: _Z18test_builtin_ffsllx
+// CIR:         %{{.+}} = cir.ffs %{{.+}} : !s64i
+// CIR:       }
+
+// LLVM-LABEL: _Z18test_builtin_ffsllx
+// LLVM:         %[[INPUT:.+]] = load i64, ptr %{{.+}}, align 8
+// LLVM-NEXT:    %[[CTZ:.+]] = call i64 @llvm.cttz.i64(i64 %[[INPUT]], i1 true)
+// LLVM-NEXT:    %[[R1:.+]] = add i64 %[[CTZ]], 1
+// LLVM-NEXT:    %[[IS_ZERO:.+]] = icmp eq i64 %[[INPUT]], 0
+// LLVM-NEXT:    %{{.+}} = select i1 %[[IS_ZERO]], i64 0, i64 %[[R1]]
+// LLVM:       }
+
+// OGCG-LABEL: _Z18test_builtin_ffsllx
+// OGCG:         %[[INPUT:.+]] = load i64, ptr %{{.+}}, align 8
+// OGCG-NEXT:    %[[CTZ:.+]] = call i64 @llvm.cttz.i64(i64 %[[INPUT]], i1 true)
+// OGCG-NEXT:    %[[R1:.+]] = add i64 %[[CTZ]], 1
+// OGCG-NEXT:    %[[IS_ZERO:.+]] = icmp eq i64 %[[INPUT]], 0
+// OGCG-NEXT:    %{{.+}} = select i1 %[[IS_ZERO]], i64 0, i64 %[[R1]]
+// OGCG:       }
+
 int test_builtin_parity(unsigned x) {
   return __builtin_parity(x);
 }
diff --git a/clang/test/CIR/CodeGen/builtin_call.cpp b/clang/test/CIR/CodeGen/builtin_call.cpp
index ad0e478..d9a7068 100644
--- a/clang/test/CIR/CodeGen/builtin_call.cpp
+++ b/clang/test/CIR/CodeGen/builtin_call.cpp
@@ -111,6 +111,22 @@ void assume(bool arg) {
 // OGCG:   call void @llvm.assume(i1 %{{.+}})
 // OGCG: }
 
+void assume_separate_storage(void *p1, void *p2) {
+  __builtin_assume_separate_storage(p1, p2);
+}
+
+// CIR: cir.func{{.*}} @_Z23assume_separate_storagePvS_
+// CIR:   cir.assume_separate_storage %{{.+}}, %{{.+}} : !cir.ptr<!void>
+// CIR: }
+
+// LLVM: define {{.*}}void @_Z23assume_separate_storagePvS_
+// LLVM:   call void @llvm.assume(i1 true) [ "separate_storage"(ptr %{{.+}}, ptr %{{.+}}) ]
+// LLVM: }
+
+// OGCG: define {{.*}}void @_Z23assume_separate_storagePvS_
+// OGCG:   call void @llvm.assume(i1 true) [ "separate_storage"(ptr %{{.+}}, ptr %{{.+}}) ]
+// OGCG: }
+
 void expect(int x, int y) {
   __builtin_expect(x, y);
 }
diff --git a/clang/test/CIR/CodeGen/call.c b/clang/test/CIR/CodeGen/call.c
index 83a66fc..9d516c6 100644
--- a/clang/test/CIR/CodeGen/call.c
+++ b/clang/test/CIR/CodeGen/call.c
@@ -11,7 +11,7 @@ struct S {
 };
 
 void f1(struct S);
-void f2() {
+void f2(void) {
   struct S s;
   f1(s);
 }
@@ -28,8 +28,8 @@ void f2() {
 // OGCG:         %[[S:.+]] = load i64, ptr %{{.+}}, align 4
 // OGCG-NEXT:    call void @f1(i64 %[[S]])
 
-struct S f3();
-void f4() {
+struct S f3(void);
+void f4(void) {
   struct S s = f3();
 }
 
@@ -38,11 +38,11 @@ void f4() {
 // CIR-NEXT:    cir.store align(4) %[[S]], %{{.+}} : !rec_S, !cir.ptr<!rec_S>
 
 // LLVM-LABEL: define{{.*}} void @f4() {
-// LLVM:         %[[S:.+]] = call %struct.S (...) @f3()
+// LLVM:         %[[S:.+]] = call %struct.S @f3()
 // LLVM-NEXT:    store %struct.S %[[S]], ptr %{{.+}}, align 4
 
 // OGCG-LABEL: define{{.*}} void @f4() #0 {
-// OGCG:         %[[S:.+]] = call i64 (...) @f3()
+// OGCG:         %[[S:.+]] = call i64 @f3()
 // OGCG-NEXT:    store i64 %[[S]], ptr %{{.+}}, align 4
 
 struct Big {
@@ -50,9 +50,9 @@ struct Big {
 };
 
 void f5(struct Big);
-struct Big f6();
+struct Big f6(void);
 
-void f7() {
+void f7(void) {
   struct Big b;
   f5(b);
 }
@@ -69,7 +69,7 @@ void f7() {
 // OGCG:         %[[B:.+]] = alloca %struct.Big, align 8
 // OGCG-NEXT:    call void @f5(ptr noundef byval(%struct.Big) align 8 %[[B]])
 
-void f8() {
+void f8(void) {
   struct Big b = f6();
 }
 
@@ -78,14 +78,14 @@ void f8() {
 // CIR:         cir.store align(4) %[[B]], %{{.+}} : !rec_Big, !cir.ptr<!rec_Big>
 
 // LLVM-LABEL: define{{.*}} void @f8() {
-// LLVM:        %[[B:.+]] = call %struct.Big (...) @f6()
+// LLVM:        %[[B:.+]] = call %struct.Big @f6()
 // LLVM-NEXT:   store %struct.Big %[[B]], ptr %{{.+}}, align 4
 
 // OGCG-LABEL: define{{.*}} void @f8() #0 {
 // OGCG:         %[[B:.+]] = alloca %struct.Big, align 4
-// OGCG-NEXT:    call void (ptr, ...) @f6(ptr dead_on_unwind writable sret(%struct.Big) align 4 %[[B]])
+// OGCG-NEXT:    call void @f6(ptr dead_on_unwind writable sret(%struct.Big) align 4 %[[B]])
 
-void f9() {
+void f9(void) {
   f1(f3());
 }
 
@@ -98,14 +98,14 @@ void f9() {
 
 // LLVM-LABEL: define{{.*}} void @f9() {
 // LLVM:         %[[SLOT:.+]] = alloca %struct.S, i64 1, align 4
-// LLVM-NEXT:    %[[RET:.+]] = call %struct.S (...) @f3()
+// LLVM-NEXT:    %[[RET:.+]] = call %struct.S @f3()
 // LLVM-NEXT:    store %struct.S %[[RET]], ptr %[[SLOT]], align 4
 // LLVM-NEXT:    %[[ARG:.+]] = load %struct.S, ptr %[[SLOT]], align 4
 // LLVM-NEXT:    call void @f1(%struct.S %[[ARG]])
 
 // OGCG-LABEL: define{{.*}} void @f9() #0 {
 // OGCG:         %[[SLOT:.+]] = alloca %struct.S, align 4
-// OGCG-NEXT:    %[[RET:.+]] = call i64 (...) @f3()
+// OGCG-NEXT:    %[[RET:.+]] = call i64 @f3()
 // OGCG-NEXT:    store i64 %[[RET]], ptr %[[SLOT]], align 4
 // OGCG-NEXT:    %[[ARG:.+]] = load i64, ptr %[[SLOT]], align 4
 // OGCG-NEXT:    call void @f1(i64 %[[ARG]])
diff --git a/clang/test/CIR/CodeGen/cleanup.cpp b/clang/test/CIR/CodeGen/cleanup.cpp
new file mode 100644
index 0000000..41961513
--- /dev/null
+++ b/clang/test/CIR/CodeGen/cleanup.cpp
@@ -0,0 +1,83 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+struct Struk {
+  ~Struk();
+};
+
+// CHECK: !rec_Struk = !cir.record<struct "Struk" padded {!u8i}>
+
+// CHECK: cir.func{{.*}} @_ZN5StrukD1Ev(!cir.ptr<!rec_Struk>)
+
+void test_cleanup() {
+  Struk s;
+}
+
+// CHECK: cir.func{{.*}} @_Z12test_cleanupv()
+// CHECK:   %[[S_ADDR:.*]] = cir.alloca !rec_Struk, !cir.ptr<!rec_Struk>, ["s"]
+// CHECK:   cir.call @_ZN5StrukD1Ev(%[[S_ADDR]]) nothrow : (!cir.ptr<!rec_Struk>) -> ()
+// CHECK:   cir.return
+
+void test_cleanup_ifelse(bool b) {
+  if (b) {
+    Struk s;
+  } else {
+    Struk s;
+  }
+}
+
+// CHECK: cir.func{{.*}} @_Z19test_cleanup_ifelseb(%arg0: !cir.bool
+// CHECK:   cir.scope {
+// CHECK:     %[[B:.*]] = cir.load{{.*}} %0 : !cir.ptr<!cir.bool>
+// CHECK:     cir.if %[[B]] {
+// CHECK:       %[[S:.*]] = cir.alloca !rec_Struk, !cir.ptr<!rec_Struk>, ["s"]
+// CHECK:       cir.call @_ZN5StrukD1Ev(%[[S]]) nothrow : (!cir.ptr<!rec_Struk>) -> ()
+// CHECK:     } else {
+// CHECK:       %[[S_TOO:.*]] = cir.alloca !rec_Struk, !cir.ptr<!rec_Struk>, ["s"]
+// CHECK:       cir.call @_ZN5StrukD1Ev(%[[S_TOO]]) nothrow : (!cir.ptr<!rec_Struk>) -> ()
+// CHECK:     }
+// CHECK:   }
+// CHECK:   cir.return
+
+void test_cleanup_for() {
+  for (int i = 0; i < 10; i++) {
+    Struk s;
+  }
+}
+
+// CHECK: cir.func{{.*}} @_Z16test_cleanup_forv()
+// CHECK:   cir.scope {
+// CHECK:     cir.for : cond {
+// CHECK:     } body {
+// CHECK:       cir.scope {
+// CHECK:         %[[S:.*]] = cir.alloca !rec_Struk, !cir.ptr<!rec_Struk>, ["s"]
+// CHECK:         cir.call @_ZN5StrukD1Ev(%[[S]]) nothrow : (!cir.ptr<!rec_Struk>) -> ()
+// CHECK:       }
+// CHECK:       cir.yield
+// CHECK:     } step {
+// CHECK:     }
+// CHECK:   }
+// CHECK:   cir.return
+
+void test_cleanup_nested() {
+  Struk outer;
+  {
+    Struk middle;
+    {
+      Struk inner;
+    }
+  }
+}
+
+// CHECK: cir.func{{.*}} @_Z19test_cleanup_nestedv()
+// CHECK:   %[[OUTER:.*]] = cir.alloca !rec_Struk, !cir.ptr<!rec_Struk>, ["outer"]
+// CHECK:   cir.scope {
+// CHECK:     %[[MIDDLE:.*]] = cir.alloca !rec_Struk, !cir.ptr<!rec_Struk>, ["middle"]
+// CHECK:     cir.scope {
+// CHECK:       %[[INNER:.*]] = cir.alloca !rec_Struk, !cir.ptr<!rec_Struk>, ["inner"]
+// CHECK:       cir.call @_ZN5StrukD1Ev(%[[INNER]]) nothrow : (!cir.ptr<!rec_Struk>) -> ()
+// CHECK:     }
+// CHECK:     cir.call @_ZN5StrukD1Ev(%[[MIDDLE]]) nothrow : (!cir.ptr<!rec_Struk>) -> ()
+// CHECK:   }
+// CHECK:   cir.call @_ZN5StrukD1Ev(%[[OUTER]]) nothrow : (!cir.ptr<!rec_Struk>) -> ()
+// CHECK:   cir.return
diff --git a/clang/test/CIR/CodeGen/complex-cast.cpp b/clang/test/CIR/CodeGen/complex-cast.cpp
new file mode 100644
index 0000000..a8f51cd
--- /dev/null
+++ b/clang/test/CIR/CodeGen/complex-cast.cpp
@@ -0,0 +1,375 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-canonicalize -o %t.cir %s 2>&1 | FileCheck --check-prefix=CIR-BEFORE %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -mmlir --mlir-print-ir-after=cir-lowering-prepare -o %t.cir %s 2>&1 | FileCheck --check-prefixes=CIR-AFTER %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG
+
+double _Complex cd;
+float _Complex cf;
+int _Complex ci;
+short _Complex cs;
+double sd;
+int si;
+bool b;
+
+void scalar_to_complex() {
+  cd = sd;
+  ci = si;
+  cd = si;
+  ci = sd;
+}
+
+// CIR-BEFORE: %[[FP_TO_COMPLEX:.*]] = cir.cast(float_to_complex, %{{.*}} : !cir.double), !cir.complex<!cir.double>
+
+//      CIR-AFTER: %[[REAL:.*]] = cir.load{{.*}}  %{{.*}} : !cir.ptr<!cir.double>, !cir.double
+// CIR-AFTER-NEXT: %[[IMAG:.*]] = cir.const #cir.fp<0.000000e+00> : !cir.double
+// CIR-AFTER-NEXT: %{{.*}} = cir.complex.create %[[REAL]], %[[IMAG]] : !cir.double -> !cir.complex<!cir.double>
+
+//      LLVM: %[[REAL:.*]] = load double, ptr {{.*}}, align 8
+// LLVM-NEXT: %[[TMP:.*]] = insertvalue { double, double } undef, double %[[REAL]], 0
+// LLVM-NEXT: %[[COMPLEX:.*]] = insertvalue { double, double } %[[TMP]], double 0.000000e+00, 1
+// LLVM-NEXT: store { double, double } %[[COMPLEX]], ptr {{.*}}, align 8
+
+// OGCG: %[[REAL:.*]] = load double, ptr {{.*}}, align 8
+// OGCG: store double %[[REAL]], ptr {{.*}}, align 8
+// OGCG: store double 0.000000e+00, ptr getelementptr inbounds nuw ({ double, double }, ptr @cd, i32 0, i32 1), align 8
+
+// CIR-BEFORE: %[[INT_TO_COMPLEX:.*]] = cir.cast(int_to_complex, %{{.*}} : !s32i), !cir.complex<!s32i>
+
+//      CIR-AFTER: %[[REAL:.*]] = cir.load{{.*}}  %{{.*}} : !cir.ptr<!s32i>, !s32i
+// CIR-AFTER-NEXT: %[[IMAG:.*]] = cir.const #cir.int<0> : !s32i
+// CIR-AFTER-NEXT: %{{.*}} = cir.complex.create %[[REAL]], %[[IMAG]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM: %[[REAL:.*]] = load i32, ptr {{.*}}, align 4
+// LLVM-NEXT: %[[TMP:.*]] = insertvalue { i32, i32 } undef, i32 %[[REAL]], 0
+// LLVM-NEXT: %[[COMPLEX:.*]] = insertvalue { i32, i32 } %[[TMP]], i32 0, 1
+// LLVM-NEXT: store { i32, i32 } %[[COMPLEX]], ptr {{.*}}, align 4
+
+// OGCG:  %[[REAL:.*]] = load i32, ptr {{.*}}, align 4
+// OGCG: store i32 %[[REAL]], ptr {{.*}}, align 4
+// OGCG: store i32 0, ptr getelementptr inbounds nuw ({ i32, i32 }, ptr @ci, i32 0, i32 1), align 4
+
+// CIR-BEFORE: %[[INT_TO_FP:.*]] = cir.cast(int_to_float, %{{.*}} : !s32i), !cir.double
+// CIR-BEFORE: %[[FP_TO_COMPLEX:.*]] = cir.cast(float_to_complex, %[[INT_TO_FP]] : !cir.double), !cir.complex<!cir.double>
+
+//      CIR-AFTER: %[[TMP:.*]] = cir.load{{.*}} %{{.*}} : !cir.ptr<!s32i>, !s32i
+// CIR-AFTER-NEXT: %[[REAL:.*]] = cir.cast(int_to_float, %[[TMP]] : !s32i), !cir.double
+// CIR-AFTER-NEXT: %[[IMAG:.*]] = cir.const #cir.fp<0.000000e+00> : !cir.double
+// CIR-AFTER-NEXT: %{{.*}} = cir.complex.create %[[REAL]], %[[IMAG]] : !cir.double -> !cir.complex<!cir.double>
+
+//      LLVM: %[[TMP:.*]] = load i32, ptr {{.*}}, align 4
+// LLVM-NEXT: %[[REAL:.*]] = sitofp i32 %[[TMP]] to double
+// LLVM-NEXT: %[[TMP_2:.*]] = insertvalue { double, double } undef, double %[[REAL]], 0
+// LLVM-NEXT: %[[COMPLEX:.*]] = insertvalue { double, double } %[[TMP_2]], double 0.000000e+00, 1
+// LLVM-NEXT: store { double, double } %[[COMPLEX]], ptr {{.*}}, align 8
+
+// OGCG: %[[TMP:.*]] = load i32, ptr {{.*}}, align 4
+// OGCG: %[[REAL:.*]] = sitofp i32 %[[TMP]] to double
+// OGCG: store double %[[REAL]], ptr {{.*}}, align 8
+// OGCG: store double 0.000000e+00, ptr getelementptr inbounds nuw ({ double, double }, ptr {{.*}}, i32 0, i32 1), align 8
+
+// CIR-BEFORE: %[[FP_TO_INT:.*]] = cir.cast(float_to_int, %{{.*}} : !cir.double), !s32i
+// CIR-BEFORE: %[[INT_TO_COMPLEX:.*]] = cir.cast(int_to_complex, %[[FP_TO_INT]] : !s32i), !cir.complex<!s32i>
+
+//      CIR-AFTER: %[[TMP:.*]] = cir.load{{.*}} %{{.*}} : !cir.ptr<!cir.double>, !cir.double
+// CIR-AFTER-NEXT: %[[REAL:.*]] = cir.cast(float_to_int, %[[TMP]] : !cir.double), !s32i
+// CIR-AFTER-NEXT: %[[IMAG:.*]] = cir.const #cir.int<0> : !s32i
+// CIR-AFTER-NEXT: %{{.*}} = cir.complex.create %[[REAL]], %[[IMAG]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM: %[[TMP:.*]] = load double, ptr {{.*}}, align 8
+// LLVM-NEXT: %[[REAL:.*]] = fptosi double %[[TMP]] to i32
+// LLVM-NEXT: %[[TMP_2:.*]] = insertvalue { i32, i32 } undef, i32 %[[REAL]], 0
+// LLVM-NEXT: %[[COMPLEX:.*]] = insertvalue { i32, i32 } %[[TMP_2]], i32 0, 1
+// LLVM-NEXT:  store { i32, i32 } %[[COMPLEX]], ptr {{.*}}, align 4
+
+// OGCG: %[[TMP:.*]] = load double, ptr {{.*}}, align 8
+// OGCG: %[[REAL:.*]] = fptosi double %[[TMP]] to i32
+// OGCG: store i32 %[[REAL]], ptr {{.*}}, align 4
+// OGCG: store i32 0, ptr getelementptr inbounds nuw ({ i32, i32 }, ptr {{.*}}, i32 0, i32 1), align 4
+
+void scalar_to_complex_explicit() {
+  cd = (double _Complex)sd;
+  ci = (int _Complex)si;
+  cd = (double _Complex)si;
+  ci = (int _Complex)sd;
+}
+
+// CIR-BEFORE: %[[FP_TO_COMPLEX:.*]] = cir.cast(float_to_complex, %{{.*}} : !cir.double), !cir.complex<!cir.double>
+
+//      CIR-AFTER: %[[REAL:.*]] = cir.load{{.*}}  %{{.*}} : !cir.ptr<!cir.double>, !cir.double
+// CIR-AFTER-NEXT: %[[IMAG:.*]] = cir.const #cir.fp<0.000000e+00> : !cir.double
+// CIR-AFTER-NEXT: %{{.*}} = cir.complex.create %[[REAL]], %[[IMAG]] : !cir.double -> !cir.complex<!cir.double>
+
+//      LLVM: %[[REAL:.*]] = load double, ptr {{.*}}, align 8
+// LLVM-NEXT: %[[TMP:.*]] = insertvalue { double, double } undef, double %[[REAL]], 0
+// LLVM-NEXT: %[[COMPLEX:.*]] = insertvalue { double, double } %[[TMP]], double 0.000000e+00, 1
+// LLVM-NEXT: store { double, double } %[[COMPLEX]], ptr {{.*}}, align 8
+
+// OGCG: %[[REAL:.*]] = load double, ptr {{.*}}, align 8
+// OGCG: store double %[[REAL]], ptr {{.*}}, align 8
+// OGCG: store double 0.000000e+00, ptr getelementptr inbounds nuw ({ double, double }, ptr @cd, i32 0, i32 1), align 8
+
+// CIR-BEFORE: %[[INT_TO_COMPLEX:.*]] = cir.cast(int_to_complex, %{{.*}} : !s32i), !cir.complex<!s32i>
+
+//      CIR-AFTER: %[[REAL:.*]] = cir.load{{.*}}  %{{.*}} : !cir.ptr<!s32i>, !s32i
+// CIR-AFTER-NEXT: %[[IMAG:.*]] = cir.const #cir.int<0> : !s32i
+// CIR-AFTER-NEXT: %{{.*}} = cir.complex.create %[[REAL]], %[[IMAG]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM: %[[REAL:.*]] = load i32, ptr {{.*}}, align 4
+// LLVM-NEXT: %[[TMP:.*]] = insertvalue { i32, i32 } undef, i32 %[[REAL]], 0
+// LLVM-NEXT: %[[COMPLEX:.*]] = insertvalue { i32, i32 } %[[TMP]], i32 0, 1
+// LLVM-NEXT: store { i32, i32 } %[[COMPLEX]], ptr {{.*}}, align 4
+
+// OGCG:  %[[REAL:.*]] = load i32, ptr {{.*}}, align 4
+// OGCG: store i32 %[[REAL]], ptr {{.*}}, align 4
+// OGCG: store i32 0, ptr getelementptr inbounds nuw ({ i32, i32 }, ptr @ci, i32 0, i32 1), align 4
+
+// CIR-BEFORE: %[[INT_TO_FP:.*]] = cir.cast(int_to_float, %{{.*}} : !s32i), !cir.double
+// CIR-BEFORE: %[[FP_TO_COMPLEX:.*]] = cir.cast(float_to_complex, %[[INT_TO_FP]] : !cir.double), !cir.complex<!cir.double>
+
+//      CIR-AFTER: %[[TMP:.*]] = cir.load{{.*}} %{{.*}} : !cir.ptr<!s32i>, !s32i
+// CIR-AFTER-NEXT: %[[REAL:.*]] = cir.cast(int_to_float, %[[TMP]] : !s32i), !cir.double
+// CIR-AFTER-NEXT: %[[IMAG:.*]] = cir.const #cir.fp<0.000000e+00> : !cir.double
+// CIR-AFTER-NEXT: %{{.*}} = cir.complex.create %[[REAL]], %[[IMAG]] : !cir.double -> !cir.complex<!cir.double>
+
+//      LLVM: %[[TMP:.*]] = load i32, ptr {{.*}}, align 4
+// LLVM-NEXT: %[[REAL:.*]] = sitofp i32 %[[TMP]] to double
+// LLVM-NEXT: %[[TMP_2:.*]] = insertvalue { double, double } undef, double %[[REAL]], 0
+// LLVM-NEXT: %[[COMPLEX:.*]] = insertvalue { double, double } %[[TMP_2]], double 0.000000e+00, 1
+// LLVM-NEXT: store { double, double } %[[COMPLEX]], ptr {{.*}}, align 8
+
+// OGCG: %[[TMP:.*]] = load i32, ptr {{.*}}, align 4
+// OGCG: %[[REAL:.*]] = sitofp i32 %[[TMP]] to double
+// OGCG: store double %[[REAL]], ptr {{.*}}, align 8
+// OGCG: store double 0.000000e+00, ptr getelementptr inbounds nuw ({ double, double }, ptr {{.*}}, i32 0, i32 1), align 8
+
+// CIR-BEFORE: %[[FP_TO_INT:.*]] = cir.cast(float_to_int, %{{.*}} : !cir.double), !s32i
+// CIR-BEFORE: %[[INT_TO_COMPLEX:.*]] = cir.cast(int_to_complex, %[[FP_TO_INT]] : !s32i), !cir.complex<!s32i>
+
+//      CIR-AFTER: %[[TMP:.*]] = cir.load{{.*}} %{{.*}} : !cir.ptr<!cir.double>, !cir.double
+// CIR-AFTER-NEXT: %[[REAL:.*]] = cir.cast(float_to_int, %[[TMP]] : !cir.double), !s32i
+// CIR-AFTER-NEXT: %[[IMAG:.*]] = cir.const #cir.int<0> : !s32i
+// CIR-AFTER-NEXT: %{{.*}} = cir.complex.create %[[REAL]], %[[IMAG]] : !s32i -> !cir.complex<!s32i>
+
+//      LLVM: %[[TMP:.*]] = load double, ptr {{.*}}, align 8
+// LLVM-NEXT: %[[REAL:.*]] = fptosi double %[[TMP]] to i32
+// LLVM-NEXT: %[[TMP_2:.*]] = insertvalue { i32, i32 } undef, i32 %[[REAL]], 0
+// LLVM-NEXT: %[[COMPLEX:.*]] = insertvalue { i32, i32 } %[[TMP_2]], i32 0, 1
+// LLVM-NEXT:  store { i32, i32 } %[[COMPLEX]], ptr {{.*}}, align 4
+
+// OGCG: %[[TMP:.*]] = load double, ptr {{.*}}, align 8
+// OGCG: %[[REAL:.*]] = fptosi double %[[TMP]] to i32
+// OGCG: store i32 %[[REAL]], ptr {{.*}}, align 4
+// OGCG: store i32 0, ptr getelementptr inbounds nuw ({ i32, i32 }, ptr {{.*}}, i32 0, i32 1), align 4
+
+void complex_to_scalar() {
+  sd = (double)cd;
+  si = (int)ci;
+  sd = (double)ci;
+  si = (int)cd;
+}
+
+// CIR-BEFORE: %[[FP_TO_COMPLEX_REAL:.*]] = cir.cast(float_complex_to_real, %{{.*}} : !cir.complex<!cir.double>), !cir.double
+
+// CIR-AFTER: %{{.*}} = cir.complex.real %{{.*}} : !cir.complex<!cir.double> -> !cir.double
+
+// LLVM: %[[REAL:.*]] = extractvalue { double, double } %{{.*}}, 0
+// LLVM: store double %[[REAL]], ptr {{.*}}, align 8
+
+// OGCG: %[[REAL:.*]] = load double, ptr {{.*}}, align 8
+// OGCG: store double %[[REAL]], ptr {{.*}}, align 8
+
+// CIR-BEFORE: %[[INT_COMPLEX_TO_REAL:.*]] = cir.cast(int_complex_to_real, %{{.*}} : !cir.complex<!s32i>), !s32i
+
+// CIR-AFTER: %{{.*}} = cir.complex.real %{{.*}} : !cir.complex<!s32i> -> !s32i
+
+// LLVM: %[[REAL:.*]] = extractvalue { i32, i32 } %{{.*}}, 0
+// LLVM: store i32 %[[REAL]], ptr {{.*}}, align 4
+
+// OGCG: %[[REAL:.*]] = load i32, ptr {{.*}}, align 4
+// OGCG: store i32 %[[REAL]], ptr {{.*}}, align 4
+
+// CIR-BEFORE: %[[INT_COMPLEX_TO_REAL:.*]] = cir.cast(int_complex_to_real, %{{.*}} : !cir.complex<!s32i>), !s32i
+// CIR-BEFORE: %[[INT_TO_FP:.*]] = cir.cast(int_to_float, %[[INT_COMPLEX_TO_REAL]] : !s32i), !cir.double
+
+//      CIR-AFTER: %[[REAL:.*]] = cir.complex.real %{{.*}} : !cir.complex<!s32i> -> !s32i
+// CIR-AFTER-NEXT: %{{.*}} = cir.cast(int_to_float, %[[REAL]] : !s32i), !cir.double
+
+//      LLVM: %[[REAL:.*]] = extractvalue { i32, i32 } %{{.+}}, 0
+// LLVM-NEXT: %[[REAL_TO_DOUBLE:.*]] = sitofp i32 %[[REAL]] to double
+// LLVM-NEXT: store double %[[REAL_TO_DOUBLE]], ptr {{.*}}, align 8
+
+// OGCG: %[[REAL:.*]] = load i32, ptr {{.*}}, align 4
+// OGCG: %[[INT_TO_FP:.*]] = sitofp i32 %[[REAL]] to double
+// OGCG: store double %[[INT_TO_FP]], ptr {{.*}}, align 8
+
+// CIR-BEFORE: %[[FP_TO_COMPLEX_REAL:.*]] = cir.cast(float_complex_to_real, %{{.*}} : !cir.complex<!cir.double>), !cir.double
+// CIR-BEFORE: %[[FP_TO_INT:.*]] = cir.cast(float_to_int, %[[FP_TO_COMPLEX_REAL]] : !cir.double), !s32i
+
+//      CIR-AFTER: %[[REAL:.*]] = cir.complex.real %{{.*}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-AFTER-NEXT: %{{.*}} = cir.cast(float_to_int, %[[REAL]] : !cir.double), !s32i
+
+//      LLVM: %[[REAL:.*]] = extractvalue { double, double } %{{.+}}, 0
+// LLVM-NEXT: %[[REAL_TO_INT:.*]] = fptosi double %[[REAL]] to i32
+// LLVM-NEXT: store i32 %[[REAL_TO_INT]], ptr {{.*}}, align 4
+
+// OGCG: %[[REAL:.*]] = load double, ptr {{.*}}, align 8
+// OGCG: %[[FP_TO_INT:.*]] = fptosi double %[[REAL]] to i32
+// OGCG: store i32 %[[FP_TO_INT]], ptr {{.*}}, align 4
+
+void complex_to_bool() {
+  b = (bool)cd;
+  b = (bool)ci;
+}
+
+// CIR-BEFORE: %[[FP_COMPLEX_TO_BOOL:.*]] = cir.cast(float_complex_to_bool, %{{.*}} : !cir.complex<!cir.double>), !cir.bool
+
+//      CIR-AFTER: %[[REAL:.*]] = cir.complex.real %{{.*}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-AFTER-NEXT: %[[IMAG:.*]] = cir.complex.imag %{{.*}} : !cir.complex<!cir.double> -> !cir.double
+// CIR-AFTER-NEXT: %[[REAL_TO_BOOL:.*]] = cir.cast(float_to_bool, %[[REAL]] : !cir.double), !cir.bool
+// CIR-AFTER-NEXT: %[[IMAG_TO_BOOL:.*]] = cir.cast(float_to_bool, %[[IMAG]] : !cir.double), !cir.bool
+// CIR-AFTER-NEXT: %[[CONST_TRUE:.*]] = cir.const #true
+// CIR-AFTER-NEXT: %{{.*}} = cir.select if %[[REAL_TO_BOOL]] then %[[CONST_TRUE]] else %[[IMAG_TO_BOOL]] : (!cir.bool, !cir.bool, !cir.bool) -> !cir.bool
+
+//      LLVM: %[[REAL:.*]] = extractvalue { double, double } %{{.*}}, 0
+// LLVM-NEXT: %[[IMAG:.*]] = extractvalue { double, double } %{{.*}}, 1
+// LLVM-NEXT: %[[REAL_TO_BOOL:.*]] = fcmp une double %[[REAL]], 0.000000e+00
+// LLVM-NEXT: %[[IMAG_TO_BOOL:.*]] = fcmp une double %[[IMAG]], 0.000000e+00
+// LLVM-NEXT: %[[OR:.*]] = or i1 %[[REAL_TO_BOOL]], %[[IMAG_TO_BOOL]]
+// LLVM-NEXT: %[[RESULT:.*]] = zext i1 %[[OR]] to i8
+// LLVM-NEXT: store i8 %[[RESULT]], ptr {{.*}}, align 1
+
+// OGCG: %[[REAL:.*]] = load double, ptr {{.*}}, align 8
+// OGCG: %[[IMAG:.*]] = load double, ptr getelementptr inbounds nuw ({ double, double }, ptr {{.*}}, i32 0, i32 1), align 8
+// OGCG: %[[REAL_TO_BOOL:.*]] = fcmp une double %[[REAL]], 0.000000e+00
+// OGCG: %[[IMAG_TO_BOOL:.*]] = fcmp une double %[[IMAG]], 0.000000e+00
+// OGCG: %[[COMPLEX_TO_BOOL:.*]] = or i1 %[[REAL_TO_BOOL]], %[[IMAG_TO_BOOL]]
+// OGCG: %[[BOOL_TO_INT:.*]] = zext i1 %[[COMPLEX_TO_BOOL]] to i8
+// OGCG: store i8 %[[BOOL_TO_INT]], ptr {{.*}}, align 1
+
+// CIR-BEFORE: %[[INT_COMPLEX_TO_BOOL:.*]] = cir.cast(int_complex_to_bool, %{{.*}} : !cir.complex<!s32i>), !cir.bool
+
+//      CIR-AFTER: %[[REAL:.*]] = cir.complex.real %{{.*}} : !cir.complex<!s32i> -> !s32i
+// CIR-AFTER-NEXT: %[[IMAG:.*]] = cir.complex.imag %{{.*}} : !cir.complex<!s32i> -> !s32i
+// CIR-AFTER-NEXT: %[[REAL_TO_BOOL:.*]] = cir.cast(int_to_bool, %[[REAL]] : !s32i), !cir.bool
+// CIR-AFTER-NEXT: %[[IMAG_TO_BOOL:.*]] = cir.cast(int_to_bool, %[[IMAG]] : !s32i), !cir.bool
+// CIR-AFTER-NEXT: %[[CONST_TRUE:.*]] = cir.const #true
+// CIR-AFTER-NEXT: %{{.+}} = cir.select if %[[REAL_TO_BOOL]] then %[[CONST_TRUE]] else %[[IMAG_TO_BOOL]] : (!cir.bool, !cir.bool, !cir.bool) -> !cir.bool
+
+//      LLVM: %[[REAL:.*]] = extractvalue { i32, i32 } %{{.*}}, 0
+// LLVM-NEXT: %[[IMAG:.*]] = extractvalue { i32, i32 } %{{.*}}, 1
+// LLVM-NEXT: %[[REAL_TO_BOOL:.*]] = icmp ne i32 %[[REAL]], 0
+// LLVM-NEXT: %[[IMAG_TO_BOOL:.*]] = icmp ne i32 %[[IMAG]], 0
+// LLVM-NEXT: %[[OR:.*]] = or i1 %[[REAL_TO_BOOL]], %[[IMAG_TO_BOOL]]
+// LLVM-NEXT: %[[RESULT:.*]] = zext i1 %[[OR]] to i8
+// LLVM-NEXT: store i8 %[[RESULT]], ptr {{.*}}, align 1
+
+// OGCG: %[[REAL:.*]] = load i32, ptr {{.*}}, align 4
+// OGCG: %[[IMAG:.*]] = load i32, ptr getelementptr inbounds nuw ({ i32, i32 }, ptr {{.*}}, i32 0, i32 1), align 4
+// OGCG: %[[REAL_TO_BOOL:.*]] = icmp ne i32 %[[REAL]], 0
+// OGCG: %[[IMAG_TO_BOOL:.*]] = icmp ne i32 %[[IMAG]], 0
+// OGCG: %[[COMPLEX_TO_BOOL:.*]] = or i1 %[[REAL_TO_BOOL]], %[[IMAG_TO_BOOL]]
+// OGCG: %[[BOOL_TO_INT:.*]] = zext i1 %[[COMPLEX_TO_BOOL]] to i8
+// OGCG: store i8 %[[BOOL_TO_INT]], ptr {{.*}}, align 1
+
+void complex_to_complex_cast() {
+  cd = cf;
+  ci = cs;
+}
+
+// CIR-BEFORE: %[[TMP:.*]] = cir.load{{.*}} %{{.*}} : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR-BEFORE: %[[FP_COMPLEX:.*]] = cir.cast(float_complex, %[[TMP]] : !cir.complex<!cir.float>), !cir.complex<!cir.double>
+
+// CIR-AFTER: %[[REAL:.*]] = cir.complex.real %{{.*}} : !cir.complex<!cir.float> -> !cir.float
+// CIR-AFTER: %[[IMAG:.*]] = cir.complex.imag %{{.*}} : !cir.complex<!cir.float> -> !cir.float
+// CIR-AFTER: %[[REAL_FP_CAST:.*]] = cir.cast(floating, %[[REAL]] : !cir.float), !cir.double
+// CIR-AFTER: %[[IMAG_FP_CAST:.*]] = cir.cast(floating, %[[IMAG]] : !cir.float), !cir.double
+// CIR-AFTER: %{{.*}} = cir.complex.create %[[REAL_FP_CAST]], %[[IMAG_FP_CAST]] : !cir.double -> !cir.complex<!cir.double>
+
+// LLVM: %[[REAL:.*]] = extractvalue { float, float } %{{.*}}, 0
+// LLVM: %[[IMAG:.*]] = extractvalue { float, float } %{{.*}}, 1
+// LLVM: %[[REAL_FP_CAST:.*]] = fpext float %[[REAL]] to double
+// LLVM: %[[IMAG_FP_CAST:.*]] = fpext float %[[IMAG]] to double
+// LLVM: %[[TMP:.*]] = insertvalue { double, double } undef, double %[[REAL_FP_CAST]], 0
+// LLVM: %[[COMPLEX:.*]] = insertvalue { double, double } %[[TMP]], double %[[IMAG_FP_CAST]], 1
+// LLVM: store { double, double } %[[COMPLEX]], ptr {{.*}}, align 8
+
+// OGCG: %[[REAL:.*]] = load float, ptr {{.*}}, align 4
+// OGCG: %[[IMAG:.*]] = load float, ptr getelementptr inbounds nuw ({ float, float }, ptr {{.*}}, i32 0, i32 1), align 4
+// OGCG: %[[REAL_FP_CAST:.*]] = fpext float %[[REAL]] to double
+// OGCG: %[[IMAG_FP_CAST:.*]] = fpext float %[[IMAG]] to double
+// OGCG: store double %[[REAL_FP_CAST]], ptr {{.*}}, align 8
+// OGCG: store double %[[IMAG_FP_CAST]], ptr getelementptr inbounds nuw ({ double, double }, ptr {{.*}}, i32 0, i32 1), align 8
+
+// CIR-BEFORE: %[[TMP:.*]] = cir.load{{.*}} %{{.*}} : !cir.ptr<!cir.complex<!s16i>>, !cir.complex<!s16i>
+// CIR-BEFORE: %[[INT_COMPLEX:.*]] = cir.cast(int_complex, %[[TMP]] : !cir.complex<!s16i>), !cir.complex<!s32i>
+
+// CIR-AFTER: %[[REAL:.*]] = cir.complex.real %{{.*}} : !cir.complex<!s16i> -> !s16i
+// CIR-AFTER: %[[IMAG:.*]] = cir.complex.imag %{{.*}} : !cir.complex<!s16i> -> !s16i
+// CIR-AFTER: %[[REAL_INT_CAST:.*]] = cir.cast(integral, %[[REAL]] : !s16i), !s32i
+// CIR-AFTER: %[[IMAG_INT_CAST:.*]] = cir.cast(integral, %[[IMAG]] : !s16i), !s32i
+// CIR-AFTER: %{{.*}} = cir.complex.create %[[REAL_INT_CAST]], %[[IMAG_INT_CAST]] : !s32i -> !cir.complex<!s32i>
+
+// LLVM: %[[REAL:.*]] = extractvalue { i16, i16 } %{{.*}}, 0
+// LLVM: %[[IMAG:.*]] = extractvalue { i16, i16 } %{{.*}}, 1
+// LLVM: %[[REAL_INT_CAST:.*]] = sext i16 %[[REAL]] to i32
+// LLVM: %[[IMAG_INT_CAST:.*]] = sext i16 %[[IMAG]] to i32
+// LLVM: %[[TMP:.*]] = insertvalue { i32, i32 } undef, i32 %[[REAL_INT_CAST]], 0
+// LLVM: %[[COMPLEX:.*]] = insertvalue { i32, i32 } %[[TMP]], i32 %[[IMAG_INT_CAST]], 1
+// LLVM: store { i32, i32 } %[[COMPLEX]], ptr {{.*}}, align 4
+
+// OGCG:  %[[REAL:.*]] = load i16, ptr {{.*}}, align 2
+// OGCG: %[[IMAG:.*]] = load i16, ptr getelementptr inbounds nuw ({ i16, i16 }, ptr {{.*}}, i32 0, i32 1), align 2
+// OGCG: %[[REAL_INT_CAST:.*]] = sext i16 %[[REAL]] to i32
+// OGCG: %[[IMAG_INT_CAST:.*]] = sext i16 %[[IMAG]] to i32
+// OGCG: store i32 %[[REAL_INT_CAST]], ptr {{.*}}, align 4
+// OGCG: store i32 %[[IMAG_INT_CAST]], ptr getelementptr inbounds nuw ({ i32, i32 }, ptr {{.*}}, i32 0, i32 1), align 4
+
+struct CX {
+  double real;
+  double imag;
+};
+
+void lvalue_to_rvalue_bitcast() {
+   CX a;
+   double _Complex b = __builtin_bit_cast(double _Complex, a);
+}
+
+// CIR-BEFORE: %{{.*}} = cir.cast(bitcast, %{{.*}} : !cir.ptr<!rec_CX>), !cir.ptr<!cir.complex<!cir.double>>
+
+// CIR-AFTER: %{{.*}} = cir.cast(bitcast, %{{.*}} : !cir.ptr<!rec_CX>), !cir.ptr<!cir.complex<!cir.double>>
+
+// LLVM: %[[PTR_ADDR:.*]] = alloca %struct.CX, i64 1, align 8
+// LLVM: %[[COMPLEX_ADDR:.*]] = alloca { double, double }, i64 1, align 8
+// LLVM: %[[PTR_TO_COMPLEX:.*]] = load { double, double }, ptr %[[PTR_ADDR]], align 8
+// LLVM: store { double, double } %[[PTR_TO_COMPLEX]], ptr %[[COMPLEX_ADDR]], align 8
+
+// OGCG: %[[A_ADDR:.*]] = alloca %struct.CX, align 8
+// OGCG: %[[B_ADDR:.*]] = alloca { double, double }, align 8
+// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { double, double }, ptr %[[A_ADDR]], i32 0, i32 0
+// OGCG: %[[A_REAL:.*]] = load double, ptr %[[A_REAL_PTR]], align 8
+// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { double, double }, ptr %[[A_ADDR]], i32 0, i32 1
+// OGCG: %[[A_IMAG:.*]] = load double, ptr %[[A_IMAG_PTR]], align 8
+// OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { double, double }, ptr %[[B_ADDR]], i32 0, i32 0
+// OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { double, double }, ptr %[[B_ADDR]], i32 0, i32 1
+// OGCG: store double %[[A_REAL]], ptr %[[B_REAL_PTR]], align 8
+// OGCG: store double %[[A_IMAG]], ptr %[[B_IMAG_PTR]], align 8
+
+void lvalue_bitcast() {
+  CX a;
+  (double _Complex &)a = {};
+}
+
+// CIR-BEFORE: %{{.*}} = cir.cast(bitcast, %{{.*}} : !cir.ptr<!rec_CX>), !cir.ptr<!cir.complex<!cir.double>>
+
+// CIR-AFTER: %{{.*}} = cir.cast(bitcast, %{{.*}} : !cir.ptr<!rec_CX>), !cir.ptr<!cir.complex<!cir.double>>
+
+// LLVM: %[[A_ADDR:.*]] = alloca %struct.CX, i64 1, align 8
+// LLVM: store { double, double } zeroinitializer, ptr %[[A_ADDR]], align 8
+
+// OGCG: %[[A_ADDR]] = alloca %struct.CX, align 8
+// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { double, double }, ptr %[[A_ADDR]], i32 0, i32 0
+// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { double, double }, ptr %[[A_ADDR]], i32 0, i32 1
+// OGCG: store double 0.000000e+00, ptr %[[A_REAL_PTR]], align 8
+// OGCG: store double 0.000000e+00, ptr %[[A_IMAG_PTR]], align 8
diff --git a/clang/test/CIR/CodeGen/complex-mul-div.cpp b/clang/test/CIR/CodeGen/complex-mul-div.cpp
new file mode 100644
index 0000000..9d71ef7
--- /dev/null
+++ b/clang/test/CIR/CodeGen/complex-mul-div.cpp
@@ -0,0 +1,280 @@
+// complex-range basic
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -complex-range=basic -Wno-unused-value -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-canonicalize -o %t.cir %s 2>&1 | FileCheck --check-prefix=CIR-BEFORE-BASIC %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=basic -Wno-unused-value -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefixes=CIR-AFTER-INT,CIR-AFTER-MUL-COMBINED
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=basic -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefixes=LLVM-INT,LLVM-MUL-COMBINED
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=basic -Wno-unused-value -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefixes=OGCG-INT,OGCG-MUL-COMBINED
+
+// complex-range improved
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -complex-range=improved -Wno-unused-value -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-canonicalize -o %t.cir %s 2>&1 | FileCheck --check-prefix=CIR-BEFORE-IMPROVED %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=improved -Wno-unused-value -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefixes=CIR-AFTER-INT,CIR-AFTER-MUL-COMBINED
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=improved -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefixes=LLVM-INT,LLVM-MUL-COMBINED
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=improved -Wno-unused-value -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefixes=OGCG-INT,OGCG-MUL-COMBINED
+
+// complex-range promoted
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -complex-range=promoted -Wno-unused-value -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-canonicalize -o %t.cir %s 2>&1 | FileCheck --check-prefix=CIR-BEFORE-PROMOTED %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=promoted -Wno-unused-value -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefixes=CIR-AFTER-INT,CIR-AFTER-MUL-COMBINED
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=promoted -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefixes=LLVM-INT,LLVM-MUL-COMBINED
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=promoted -Wno-unused-value -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefixes=OGCG-INT,OGCG-MUL-COMBINED
+
+// complex-range full
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -complex-range=full -Wno-unused-value -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-canonicalize -o %t.cir %s 2>&1 | FileCheck --check-prefix=CIR-BEFORE-FULL %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=full -Wno-unused-value -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefixes=CIR-AFTER-FULL,CIR-AFTER-INT
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=full -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefixes=LLVM-FULL,LLVM-INT
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=full -Wno-unused-value -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefixes=OGCG-FULL,OGCG-INT
+
+void foo() {
+  float _Complex a;
+  float _Complex b;
+  float _Complex c = a * b;
+}
+
+// CIR-BEFORE-BASIC: %{{.*}} = cir.complex.mul {{.*}}, {{.*}} range(basic) : !cir.complex<!cir.float>
+
+// CIR-BEFORE-IMPROVED: %{{.*}} = cir.complex.mul {{.*}}, {{.*}} range(improved) : !cir.complex<!cir.float>
+
+// CIR-BEFORE-PROMOTED: %{{.*}} = cir.complex.mul {{.*}}, {{.*}} range(promoted) : !cir.complex<!cir.float>
+
+// CIR-AFTER-MUL-COMBINED: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a"]
+// CIR-AFTER-MUL-COMBINED: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["b"]
+// CIR-AFTER-MUL-COMBINED: %[[C_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["c", init]
+// CIR-AFTER-MUL-COMBINED: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR-AFTER-MUL-COMBINED: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR-AFTER-MUL-COMBINED: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.complex<!cir.float> -> !cir.float
+// CIR-AFTER-MUL-COMBINED: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.complex<!cir.float> -> !cir.float
+// CIR-AFTER-MUL-COMBINED: %[[B_REAL:.*]] = cir.complex.real %[[TMP_B]] : !cir.complex<!cir.float> -> !cir.float
+// CIR-AFTER-MUL-COMBINED: %[[B_IMAG:.*]] = cir.complex.imag %[[TMP_B]] : !cir.complex<!cir.float> -> !cir.float
+// CIR-AFTER-MUL-COMBINED: %[[MUL_AR_BR:.*]] = cir.binop(mul, %[[A_REAL]], %[[B_REAL]]) : !cir.float
+// CIR-AFTER-MUL-COMBINED: %[[MUL_AI_BI:.*]] = cir.binop(mul, %[[A_IMAG]], %[[B_IMAG]]) : !cir.float
+// CIR-AFTER-MUL-COMBINED: %[[MUL_AR_BI:.*]] = cir.binop(mul, %[[A_REAL]], %[[B_IMAG]]) : !cir.float
+// CIR-AFTER-MUL-COMBINED: %[[MUL_AI_BR:.*]] = cir.binop(mul, %[[A_IMAG]], %[[B_REAL]]) : !cir.float
+// CIR-AFTER-MUL-COMBINED: %[[C_REAL:.*]] = cir.binop(sub, %[[MUL_AR_BR]], %[[MUL_AI_BI]]) : !cir.float
+// CIR-AFTER-MUL-COMBINED: %[[C_IMAG:.*]] = cir.binop(add, %[[MUL_AR_BI]], %[[MUL_AI_BR]]) : !cir.float
+// CIR-AFTER-MUL-COMBINED: %[[RESULT:.*]] = cir.complex.create %[[C_REAL]], %[[C_IMAG]] : !cir.float -> !cir.complex<!cir.float>
+// CIR-AFTER-MUL-COMBINED: cir.store{{.*}} %[[RESULT]], %[[C_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
+
+// LLVM-MUL-COMBINED: %[[A_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM-MUL-COMBINED: %[[B_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM-MUL-COMBINED: %[[C_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM-MUL-COMBINED: %[[TMP_A:.*]] = load { float, float }, ptr %[[A_ADDR]], align 4
+// LLVM-MUL-COMBINED: %[[TMP_B:.*]] = load { float, float }, ptr %[[B_ADDR]], align 4
+// LLVM-MUL-COMBINED: %[[A_REAL:.*]] = extractvalue { float, float } %[[TMP_A]], 0
+// LLVM-MUL-COMBINED: %[[A_IMAG:.*]] = extractvalue { float, float } %[[TMP_A]], 1
+// LLVM-MUL-COMBINED: %[[B_REAL:.*]] = extractvalue { float, float } %[[TMP_B]], 0
+// LLVM-MUL-COMBINED: %[[B_IMAG:.*]] = extractvalue { float, float } %[[TMP_B]], 1
+// LLVM-MUL-COMBINED: %[[MUL_AR_BR:.*]] = fmul float %[[A_REAL]], %[[B_REAL]]
+// LLVM-MUL-COMBINED: %[[MUL_AI_BI:.*]] = fmul float %[[A_IMAG]], %[[B_IMAG]]
+// LLVM-MUL-COMBINED: %[[MUL_AR_BI:.*]] = fmul float %[[A_REAL]], %[[B_IMAG]]
+// LLVM-MUL-COMBINED: %[[MUL_AI_BR:.*]] = fmul float %[[A_IMAG]], %[[B_REAL]]
+// LLVM-MUL-COMBINED: %[[C_REAL:.*]] = fsub float %[[MUL_AR_BR]], %[[MUL_AI_BI]]
+// LLVM-MUL-COMBINED: %[[C_IMAG:.*]] = fadd float %[[MUL_AR_BI]], %[[MUL_AI_BR]]
+// LLVM-MUL-COMBINED: %[[MUL_A_B:.*]] = insertvalue { float, float } {{.*}}, float %[[C_REAL]], 0
+// LLVM-MUL-COMBINED: %[[RESULT:.*]] = insertvalue { float, float } %[[MUL_A_B]], float %[[C_IMAG]], 1
+// LLVM-MUL-COMBINED: store { float, float } %[[RESULT]], ptr %[[C_ADDR]], align 4
+
+// OGCG-MUL-COMBINED: %[[A_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG-MUL-COMBINED: %[[B_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG-MUL-COMBINED: %[[C_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG-MUL-COMBINED: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 0
+// OGCG-MUL-COMBINED: %[[A_REAL:.*]] = load float, ptr %[[A_REAL_PTR]], align 4
+// OGCG-MUL-COMBINED: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 1
+// OGCG-MUL-COMBINED: %[[A_IMAG:.*]] = load float, ptr %[[A_IMAG_PTR]], align 4
+// OGCG-MUL-COMBINED: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 0
+// OGCG-MUL-COMBINED: %[[B_REAL:.*]] = load float, ptr %[[B_REAL_PTR]], align 4
+// OGCG-MUL-COMBINED: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 1
+// OGCG-MUL-COMBINED: %[[B_IMAG:.*]] = load float, ptr %[[B_IMAG_PTR]], align 4
+// OGCG-MUL-COMBINED: %[[MUL_AR_BR:.*]] = fmul float %[[A_REAL]], %[[B_REAL]]
+// OGCG-MUL-COMBINED: %[[MUL_AI_BI:.*]] = fmul float %[[A_IMAG]], %[[B_IMAG]]
+// OGCG-MUL-COMBINED: %[[MUL_AR_BI:.*]] = fmul float %[[A_REAL]], %[[B_IMAG]]
+// OGCG-MUL-COMBINED: %[[MUL_AI_BR:.*]] = fmul float %[[A_IMAG]], %[[B_REAL]]
+// OGCG-MUL-COMBINED: %[[C_REAL:.*]] = fsub float %[[MUL_AR_BR]], %[[MUL_AI_BI]]
+// OGCG-MUL-COMBINED: %[[C_IMAG:.*]] = fadd float %[[MUL_AR_BI]], %[[MUL_AI_BR]]
+// OGCG-MUL-COMBINED: %[[C_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[C_ADDR]], i32 0, i32 0
+// OGCG-MUL-COMBINED: %[[C_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[C_ADDR]], i32 0, i32 1
+// OGCG-MUL-COMBINED: store float %[[C_REAL]], ptr %[[C_REAL_PTR]], align 4
+// OGCG-MUL-COMBINED: store float %[[C_IMAG]], ptr %[[C_IMAG_PTR]], align 4
+
+// CIR-BEFORE-FULL: %{{.*}} = cir.complex.mul {{.*}}, {{.*}} range(full) : !cir.complex<!cir.float>
+
+// CIR-AFTER-FULL: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a"]
+// CIR-AFTER-FULL: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["b"]
+// CIR-AFTER-FULL: %[[C_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["c", init]
+// CIR-AFTER-FULL: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR-AFTER-FULL: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR-AFTER-FULL: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.complex<!cir.float> -> !cir.float
+// CIR-AFTER-FULL: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.complex<!cir.float> -> !cir.float
+// CIR-AFTER-FULL: %[[B_REAL:.*]] = cir.complex.real %[[TMP_B]] : !cir.complex<!cir.float> -> !cir.float
+// CIR-AFTER-FULL: %[[B_IMAG:.*]] = cir.complex.imag %[[TMP_B]] : !cir.complex<!cir.float> -> !cir.float
+// CIR-AFTER-FULL: %[[MUL_AR_BR:.*]] = cir.binop(mul, %[[A_REAL]], %[[B_REAL]]) : !cir.float
+// CIR-AFTER-FULL: %[[MUL_AI_BI:.*]] = cir.binop(mul, %[[A_IMAG]], %[[B_IMAG]]) : !cir.float
+// CIR-AFTER-FULL: %[[MUL_AR_BI:.*]] = cir.binop(mul, %[[A_REAL]], %[[B_IMAG]]) : !cir.float
+// CIR-AFTER-FULL: %[[MUL_AI_BR:.*]] = cir.binop(mul, %[[A_IMAG]], %[[B_REAL]]) : !cir.float
+// CIR-AFTER-FULL: %[[C_REAL:.*]] = cir.binop(sub, %[[MUL_AR_BR]], %[[MUL_AI_BI]]) : !cir.float
+// CIR-AFTER-FULL: %[[C_IMAG:.*]] = cir.binop(add, %[[MUL_AR_BI]], %[[MUL_AI_BR]]) : !cir.float
+// CIR-AFTER-FULL: %[[COMPLEX:.*]] = cir.complex.create %[[C_REAL]], %[[C_IMAG]] : !cir.float -> !cir.complex<!cir.float>
+// CIR-AFTER-FULL: %[[IS_C_REAL_NAN:.*]] = cir.cmp(ne, %[[C_REAL]], %[[C_REAL]]) : !cir.float, !cir.bool
+// CIR-AFTER-FULL: %[[IS_C_IMAG_NAN:.*]] = cir.cmp(ne, %[[C_IMAG]], %[[C_IMAG]]) : !cir.float, !cir.bool
+// CIR-AFTER-FULL: %[[CONST_FALSE:.*]] = cir.const #false
+// CIR-AFTER-FULL: %[[SELECT_CONDITION:.*]] = cir.select if %[[IS_C_REAL_NAN]] then %[[IS_C_IMAG_NAN]] else %[[CONST_FALSE]] : (!cir.bool, !cir.bool, !cir.bool) -> !cir.bool
+// CIR-AFTER-FULL: %[[RESULT:.*]] = cir.ternary(%[[SELECT_CONDITION]], true {
+// CIR-AFTER-FULL:   %[[LIBC_COMPLEX:.*]] = cir.call @__mulsc3(%[[A_REAL]], %[[A_IMAG]], %[[B_REAL]], %[[B_IMAG]]) : (!cir.float, !cir.float, !cir.float, !cir.float) -> !cir.complex<!cir.float>
+// CIR-AFTER-FULL:   cir.yield %[[LIBC_COMPLEX]] : !cir.complex<!cir.float>
+// CIR-AFTER-FULL: }, false {
+// CIR-AFTER-FULL:   cir.yield %[[COMPLEX]] : !cir.complex<!cir.float>
+// CIR-AFTER-FULL: }) : (!cir.bool) -> !cir.complex<!cir.float>
+// CIR-AFTER-FULL: cir.store{{.*}} %[[RESULT]], %[[C_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
+
+// LLVM-FULL: %[[A_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM-FULL: %[[B_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM-FULL: %[[C_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM-FULL: %[[TMP_A:.*]] = load { float, float }, ptr %[[A_ADDR]], align 4
+// LLVM-FULL: %[[TMP_B:.*]] = load { float, float }, ptr %[[B_ADDR]], align 4
+// LLVM-FULL: %[[A_REAL:.*]] = extractvalue { float, float } %[[TMP_A]], 0
+// LLVM-FULL: %[[A_IMAG:.*]] = extractvalue { float, float } %[[TMP_A]], 1
+// LLVM-FULL: %[[B_REAL:.*]] = extractvalue { float, float } %[[TMP_B]], 0
+// LLVM-FULL: %[[B_IMAG:.*]] = extractvalue { float, float } %[[TMP_B]], 1
+// LLVM-FULL: %[[MUL_AR_BR:.*]] = fmul float %[[A_REAL]], %[[B_REAL]]
+// LLVM-FULL: %[[MUL_AI_BI:.*]] = fmul float %[[A_IMAG]], %[[B_IMAG]]
+// LLVM-FULL: %[[MUL_AR_BI:.*]] = fmul float %[[A_REAL]], %[[B_IMAG]]
+// LLVM-FULL: %[[MUL_AI_BR:.*]] = fmul float %[[A_IMAG]], %[[B_REAL]]
+// LLVM-FULL: %[[C_REAL:.*]] = fsub float %[[MUL_AR_BR]], %[[MUL_AI_BI]]
+// LLVM-FULL: %[[C_IMAG:.*]] = fadd float %[[MUL_AR_BI]], %[[MUL_AI_BR]]
+// LLVM-FULL: %[[MUL_A_B:.*]] = insertvalue { float, float } {{.*}}, float %[[C_REAL]], 0
+// LLVM-FULL: %[[COMPLEX:.*]] = insertvalue { float, float } %[[MUL_A_B]], float %[[C_IMAG]], 1
+// LLVM-FULL: %[[IS_C_REAL_NAN:.*]] = fcmp une float %[[C_REAL]], %[[C_REAL]]
+// LLVM-FULL: %[[IS_C_IMAG_NAN:.*]] = fcmp une float %[[C_IMAG]], %[[C_IMAG]]
+// LLVM-FULL: %[[SELECT_CONDITION:.*]] = and i1 %[[IS_C_REAL_NAN]], %[[IS_C_IMAG_NAN]]
+// LLVM-FULL: br i1 %[[SELECT_CONDITION]], label %[[THEN_LABEL:.*]], label %[[ELSE_LABEL:.*]]
+// LLVM-FULL: [[THEN_LABEL]]:
+// LLVM-FULL:  %[[LIBC_COMPLEX:.*]] = call { float, float } @__mulsc3(float %[[A_REAL]], float %[[A_IMAG]], float %[[B_REAL]], float %[[B_IMAG]])
+// LLVM-FULL:  br label %[[PHI_BRANCH:.*]]
+// LLVM-FULL: [[ELSE_LABEL]]:
+// LLVM-FULL:  br label %[[PHI_BRANCH:]]
+// LLVM-FULL: [[PHI_BRANCH:]]:
+// LLVM-FULL:  %[[RESULT:.*]] = phi { float, float } [ %[[COMPLEX]], %[[ELSE_LABEL]] ], [ %[[LIBC_COMPLEX]], %[[THEN_LABEL]] ]
+// LLVM-FULL:  br label %[[END_LABEL:.*]]
+// LLVM-FULL: [[END_LABEL]]:
+// LLVM-FULL:  store { float, float } %[[RESULT]], ptr %[[C_ADDR]], align 4
+
+// OGCG-FULL: %[[A_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG-FULL: %[[B_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG-FULL: %[[C_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG-FULL: %[[COMPLEX_CALL_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG-FULL: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 0
+// OGCG-FULL: %[[A_REAL:.*]] = load float, ptr %[[A_REAL_PTR]], align 4
+// OGCG-FULL: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 1
+// OGCG-FULL: %[[A_IMAG:.*]] = load float, ptr %[[A_IMAG_PTR]], align 4
+// OGCG-FULL: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 0
+// OGCG-FULL: %[[B_REAL:.*]] = load float, ptr %[[B_REAL_PTR]], align 4
+// OGCG-FULL: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 1
+// OGCG-FULL: %[[B_IMAG:.*]] = load float, ptr %[[B_IMAG_PTR]], align 4
+// OGCG-FULL: %[[MUL_AR_BR:.*]] = fmul float %[[A_REAL]], %[[B_REAL]]
+// OGCG-FULL: %[[MUL_AI_BI:.*]] = fmul float %[[A_IMAG]], %[[B_IMAG]]
+// OGCG-FULL: %[[MUL_AR_BI:.*]] = fmul float %[[A_REAL]], %[[B_IMAG]]
+// OGCG-FULL: %[[MUL_AI_BR:.*]] = fmul float %[[A_IMAG]], %[[B_REAL]]
+// OGCG-FULL: %[[C_REAL:.*]] = fsub float %[[MUL_AR_BR]], %[[MUL_AI_BI]]
+// OGCG-FULL: %[[C_IMAG:.*]] = fadd float %[[MUL_AR_BI]], %[[MUL_AI_BR]]
+// OGCG-FULL: %[[IS_C_REAL_NAN:.*]] = fcmp uno float %[[C_REAL]], %[[C_REAL]]
+// OGCG-FULL: br i1 %[[IS_C_REAL_NAN]], label %[[COMPLEX_IS_IMAG_NAN:.*]], label %[[END_LABEL:.*]], !prof !2
+// OGCG-FULL: [[COMPLEX_IS_IMAG_NAN]]:
+// OGCG-FULL:  %[[IS_C_IMAG_NAN:.*]] = fcmp uno float %[[C_IMAG]], %[[C_IMAG]]
+// OGCG-FULL:  br i1 %[[IS_C_IMAG_NAN]], label %[[COMPLEX_LIB_CALL:.*]], label %[[END_LABEL]], !prof !2
+// OGCG-FULL: [[COMPLEX_LIB_CALL]]:
+// OGCG-FULL:  %[[CALL_RESULT:.*]] = call noundef <2 x float> @__mulsc3(float noundef %[[A_REAL]], float noundef %[[A_IMAG]], float noundef %[[B_REAL]], float noundef %[[B_IMAG]])
+// OGCG-FULL:  store <2 x float> %[[CALL_RESULT]], ptr %[[COMPLEX_CALL_ADDR]], align 4
+// OGCG-FULL:  %[[COMPLEX_CALL_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX_CALL_ADDR]], i32 0, i32 0
+// OGCG-FULL:  %[[COMPLEX_CALL_REAL:.*]] = load float, ptr %[[COMPLEX_CALL_REAL_PTR]], align 4
+// OGCG-FULL:  %[[COMPLEX_CALL_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX_CALL_ADDR]], i32 0, i32 1
+// OGCG-FULL:  %[[COMPLEX_CALL_IMAG:.*]] = load float, ptr %[[COMPLEX_CALL_IMAG_PTR]], align 4
+// OGCG-FULL:  br label %[[END_LABEL]]
+// OGCG-FULL: [[END_LABEL]]:
+// OGCG-FULL:  %[[FINAL_REAL:.*]] = phi float [ %[[C_REAL]], %[[ENTRY:.*]] ], [ %[[C_REAL]], %[[COMPLEX_IS_IMAG_NAN]] ], [ %[[COMPLEX_CALL_REAL]], %[[COMPLEX_LIB_CALL]] ]
+// OGCG-FULL:  %[[FINAL_IMAG:.*]] = phi float [ %[[C_IMAG]], %[[ENTRY]] ], [ %[[C_IMAG]], %[[COMPLEX_IS_IMAG_NAN]] ], [ %[[COMPLEX_CALL_IMAG]], %[[COMPLEX_LIB_CALL]] ]
+// OGCG-FULL:  %[[C_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[C_ADDR]], i32 0, i32 0
+// OGCG-FULL:  %[[C_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[C_ADDR]], i32 0, i32 1
+// OGCG-FULL:  store float %[[FINAL_REAL]], ptr %[[C_REAL_PTR]], align 4
+// OGCG-FULL:  store float %[[FINAL_IMAG]], ptr %[[C_IMAG_PTR]], align 4
+
+void foo1() {
+  int _Complex a;
+  int _Complex b;
+  int _Complex c = a * b;
+}
+
+// CIR-BEFORE-BASIC: %{{.*}} = cir.complex.mul {{.*}}, {{.*}} range(basic) : !cir.complex<!s32i>
+
+// CIR-BEFORE-IMPROVED: %{{.*}} = cir.complex.mul {{.*}}, {{.*}} range(improved) : !cir.complex<!s32i>
+
+// CIR-BEFORE-PROMOTED: %{{.*}} = cir.complex.mul {{.*}}, {{.*}} range(promoted) : !cir.complex<!s32i>
+
+// CIR-BEFORE-FULL: %{{.*}} = cir.complex.mul {{.*}}, {{.*}} range(full) : !cir.complex<!s32i>
+
+// CIR-AFTER-INT: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["a"]
+// CIR-AFTER-INT: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["b"]
+// CIR-AFTER-INT: %[[C_ADDR:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["c", init]
+// CIR-AFTER-INT: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CIR-AFTER-INT: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CIR-AFTER-INT: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.complex<!s32i> -> !s32i
+// CIR-AFTER-INT: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.complex<!s32i> -> !s32i
+// CIR-AFTER-INT: %[[B_REAL:.*]] = cir.complex.real %[[TMP_B]] : !cir.complex<!s32i> -> !s32i
+// CIR-AFTER-INT: %[[B_IMAG:.*]] = cir.complex.imag %[[TMP_B]] : !cir.complex<!s32i> -> !s32i
+// CIR-AFTER-INT: %[[MUL_AR_BR:.*]] = cir.binop(mul, %[[A_REAL]], %[[B_REAL]]) : !s32i
+// CIR-AFTER-INT: %[[MUL_AI_BI:.*]] = cir.binop(mul, %[[A_IMAG]], %[[B_IMAG]]) : !s32i
+// CIR-AFTER-INT: %[[MUL_AR_BI:.*]] = cir.binop(mul, %[[A_REAL]], %[[B_IMAG]]) : !s32i
+// CIR-AFTER-INT: %[[MUL_AI_BR:.*]] = cir.binop(mul, %[[A_IMAG]], %[[B_REAL]]) : !s32i
+// CIR-AFTER-INT: %[[C_REAL:.*]] = cir.binop(sub, %[[MUL_AR_BR]], %[[MUL_AI_BI]]) : !s32i
+// CIR-AFTER-INT: %[[C_IMAG:.*]] = cir.binop(add, %[[MUL_AR_BI]], %[[MUL_AI_BR]]) : !s32i
+// CIR-AFTER-INT: %[[RESULT:.*]] = cir.complex.create %[[C_REAL]], %[[C_IMAG]] : !s32i -> !cir.complex<!s32i>
+// CIR-AFTER-INT: cir.store{{.*}} %[[RESULT]], %[[C_ADDR]] : !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>
+
+// LLVM-INT: %[[A_ADDR:.*]] = alloca { i32, i32 }, i64 1, align 4
+// LLVM-INT: %[[B_ADDR:.*]] = alloca { i32, i32 }, i64 1, align 4
+// LLVM-INT: %[[C_ADDR:.*]] = alloca { i32, i32 }, i64 1, align 4
+// LLVM-INT: %[[TMP_A:.*]] = load { i32, i32 }, ptr %[[A_ADDR]], align 4
+// LLVM-INT: %[[TMP_B:.*]] = load { i32, i32 }, ptr %[[B_ADDR]], align 4
+// LLVM-INT: %[[A_REAL:.*]] = extractvalue { i32, i32 } %[[TMP_A]], 0
+// LLVM-INT: %[[A_IMAG:.*]] = extractvalue { i32, i32 } %[[TMP_A]], 1
+// LLVM-INT: %[[B_REAL:.*]] = extractvalue { i32, i32 } %[[TMP_B]], 0
+// LLVM-INT: %[[B_IMAG:.*]] = extractvalue { i32, i32 } %[[TMP_B]], 1
+// LLVM-INT: %[[MUL_AR_BR:.*]] = mul i32 %[[A_REAL]], %[[B_REAL]]
+// LLVM-INT: %[[MUL_AI_BI:.*]] = mul i32 %[[A_IMAG]], %[[B_IMAG]]
+// LLVM-INT: %[[MUL_AR_BI:.*]] = mul i32 %[[A_REAL]], %[[B_IMAG]]
+// LLVM-INT: %[[MUL_AI_BR:.*]] = mul i32 %[[A_IMAG]], %[[B_REAL]]
+// LLVM-INT: %[[C_REAL:.*]] = sub i32 %[[MUL_AR_BR]], %[[MUL_AI_BI]]
+// LLVM-INT: %[[C_IMAG:.*]] = add i32 %[[MUL_AR_BI]], %[[MUL_AI_BR]]
+// LLVM-INT: %[[MUL_A_B:.*]] = insertvalue { i32, i32 } {{.*}}, i32 %[[C_REAL]], 0
+// LLVM-INT: %[[RESULT:.*]] = insertvalue { i32, i32 } %[[MUL_A_B]], i32 %[[C_IMAG]], 1
+// LLVM-INT: store { i32, i32 } %[[RESULT]], ptr %[[C_ADDR]], align 4
+
+// OGCG-INT: %[[A_ADDR:.*]] = alloca { i32, i32 }, align 4
+// OGCG-INT: %[[B_ADDR:.*]] = alloca { i32, i32 }, align 4
+// OGCG-INT: %[[C_ADDR:.*]] = alloca { i32, i32 }, align 4
+// OGCG-INT: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[A_ADDR]], i32 0, i32 0
+// OGCG-INT: %[[A_REAL:.*]] = load i32, ptr %[[A_REAL_PTR]], align 4
+// OGCG-INT: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[A_ADDR]], i32 0, i32 1
+// OGCG-INT: %[[A_IMAG:.*]] = load i32, ptr %[[A_IMAG_PTR]], align 4
+// OGCG-INT: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[B_ADDR]], i32 0, i32 0
+// OGCG-INT: %[[B_REAL:.*]] = load i32, ptr %[[B_REAL_PTR]], align 4
+// OGCG-INT: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[B_ADDR]], i32 0, i32 1
+// OGCG-INT: %[[B_IMAG:.*]] = load i32, ptr %[[B_IMAG_PTR]], align 4
+// OGCG-INT: %[[MUL_AR_BR:.*]] = mul i32 %[[A_REAL]], %[[B_REAL]]
+// OGCG-INT: %[[MUL_AI_BI:.*]] = mul i32 %[[A_IMAG]], %[[B_IMAG]]
+// OGCG-INT: %[[C_REAL:.*]] = sub i32 %[[MUL_AR_BR]], %[[MUL_AI_BI]]
+// OGCG-INT: %[[MUL_AI_BR:.*]] = mul i32 %[[A_IMAG]], %[[B_REAL]]
+// OGCG-INT: %[[MUL_AR_BI:.*]] = mul i32 %[[A_REAL]], %[[B_IMAG]]
+// OGCG-INT: %[[C_IMAG:.*]] = add i32 %[[MUL_AI_BR]], %[[MUL_AR_BI]]
+// OGCG-INT: %[[C_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[C_ADDR]], i32 0, i32 0
+// OGCG-INT: %[[C_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[C_ADDR]], i32 0, i32 1
+// OGCG-INT: store i32 %[[C_REAL]], ptr %[[C_REAL_PTR]], align 4
+// OGCG-INT: store i32 %[[C_IMAG]], ptr %[[C_IMAG_PTR]], align 4
diff --git a/clang/test/CIR/CodeGen/complex-unary.cpp b/clang/test/CIR/CodeGen/complex-unary.cpp
index 676b554..4cd81eb 100644
--- a/clang/test/CIR/CodeGen/complex-unary.cpp
+++ b/clang/test/CIR/CodeGen/complex-unary.cpp
@@ -284,3 +284,89 @@ void foo6() {
 // OGCG: %[[RESULT_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 1
 // OGCG: store float %[[A_REAL_DEC]], ptr %[[RESULT_REAL_PTR]], align 4
 // OGCG: store float %[[A_IMAG]], ptr %[[RESULT_IMAG_PTR]], align 4
+
+void foo7() {
+  float _Complex a;
+  float _Complex b = +a;
+}
+
+// CIR-BEFORE: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a"]
+// CIR-BEFORE: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["b", init]
+// CIR-BEFORE: %[[TMP:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR-BEFORE: %[[COMPLEX_PLUS:.*]] = cir.unary(plus, %[[TMP]]) : !cir.complex<!cir.float>, !cir.complex<!cir.float>
+// CIR-BEFORE: cir.store{{.*}} %[[COMPLEX_PLUS]], %[[B_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
+
+// CIR-AFTER: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a"]
+// CIR-AFTER: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["b", init]
+// CIR-AFTER: %[[TMP:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR-AFTER: %[[REAL:.*]] = cir.complex.real %[[TMP]] : !cir.complex<!cir.float> -> !cir.float
+// CIR-AFTER: %[[IMAG:.*]] = cir.complex.imag %[[TMP]] : !cir.complex<!cir.float> -> !cir.float
+// CIR-AFTER: %[[REAL_PLUS:.*]] = cir.unary(plus, %[[REAL]]) : !cir.float, !cir.float
+// CIR-AFTER: %[[IMAG_PLUS:.*]] = cir.unary(plus, %[[IMAG]]) : !cir.float, !cir.float
+// CIR-AFTER: %[[NEW_COMPLEX:.*]] = cir.complex.create %[[REAL_PLUS]], %[[IMAG_PLUS]] : !cir.float -> !cir.complex<!cir.float>
+// CIR-AFTER: cir.store{{.*}} %[[NEW_COMPLEX]], %[[B_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
+
+// LLVM: %[[A_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: %[[B_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: %[[TMP:.*]] = load { float, float }, ptr %[[A_ADDR]], align 4
+// LLVM: %[[REAL:.*]] = extractvalue { float, float } %[[TMP]], 0
+// LLVM: %[[IMAG:.*]] = extractvalue { float, float } %[[TMP]], 1
+// LLVM: %[[RESULT_TMP:.*]] = insertvalue { float, float } {{.*}}, float %[[REAL]], 0
+// LLVM: %[[RESULT_VAL:.*]] = insertvalue { float, float } %[[RESULT_TMP]], float %[[IMAG]], 1
+// LLVM: store { float, float } %[[RESULT_VAL]], ptr %[[B_ADDR]], align 4
+
+// OGCG: %[[A_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG: %[[B_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 0
+// OGCG: %[[A_REAL:.*]] = load float, ptr %[[A_REAL_PTR]], align 4
+// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 1
+// OGCG: %[[A_IMAG:.*]] = load float, ptr %[[A_IMAG_PTR]], align 4
+// OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 0
+// OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 1
+// OGCG: store float %[[A_REAL]], ptr %[[B_REAL_PTR]], align 4
+// OGCG: store float %[[A_IMAG]], ptr %[[B_IMAG_PTR]], align 4
+
+void foo8() {
+  float _Complex a;
+  float _Complex b = -a;
+}
+
+// CIR-BEFORE: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a"]
+// CIR-BEFORE: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["b", init]
+// CIR-BEFORE: %[[TMP:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR-BEFORE: %[[COMPLEX_MINUS:.*]] = cir.unary(minus, %[[TMP]]) : !cir.complex<!cir.float>, !cir.complex<!cir.float>
+// CIR-BEFORE: cir.store{{.*}} %[[COMPLEX_MINUS]], %[[B_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
+
+// CIR-AFTER: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a"]
+// CIR-AFTER: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["b", init]
+// CIR-AFTER: %[[TMP:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR-AFTER: %[[REAL:.*]] = cir.complex.real %[[TMP]] : !cir.complex<!cir.float> -> !cir.float
+// CIR-AFTER: %[[IMAG:.*]] = cir.complex.imag %[[TMP]] : !cir.complex<!cir.float> -> !cir.float
+// CIR-AFTER: %[[REAL_MINUS:.*]] = cir.unary(minus, %[[REAL]]) : !cir.float, !cir.float
+// CIR-AFTER: %[[IMAG_MINUS:.*]] = cir.unary(minus, %[[IMAG]]) : !cir.float, !cir.float
+// CIR-AFTER: %[[NEW_COMPLEX:.*]] = cir.complex.create %[[REAL_MINUS]], %[[IMAG_MINUS]] : !cir.float -> !cir.complex<!cir.float>
+// CIR-AFTER: cir.store{{.*}} %[[NEW_COMPLEX]], %[[B_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
+
+// LLVM: %[[A_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: %[[B_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: %[[TMP:.*]] = load { float, float }, ptr %[[A_ADDR]], align 4
+// LLVM: %[[REAL:.*]] = extractvalue { float, float } %[[TMP]], 0
+// LLVM: %[[IMAG:.*]] = extractvalue { float, float } %[[TMP]], 1
+// LLVM: %[[REAL_MINUS:.*]] = fneg float %[[REAL]]
+// LLVM: %[[IMAG_MINUS:.*]] = fneg float %[[IMAG]]
+// LLVM: %[[RESULT_TMP:.*]] = insertvalue { float, float } {{.*}}, float %[[REAL_MINUS]], 0
+// LLVM: %[[RESULT_VAL:.*]] = insertvalue { float, float } %[[RESULT_TMP]], float %[[IMAG_MINUS]], 1
+// LLVM: store { float, float } %[[RESULT_VAL]], ptr %[[B_ADDR]], align 4
+
+// OGCG: %[[A_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG: %[[B_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 0
+// OGCG: %[[A_REAL:.*]] = load float, ptr %[[A_REAL_PTR]], align 4
+// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 1
+// OGCG: %[[A_IMAG:.*]] = load float, ptr %[[A_IMAG_PTR]], align 4
+// OGCG: %[[A_REAL_MINUS:.*]] = fneg float %[[A_REAL]]
+// OGCG: %[[A_IMAG_MINUS:.*]] = fneg float %[[A_IMAG]]
+// OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 0
+// OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 1
+// OGCG: store float %[[A_REAL_MINUS]], ptr %[[B_REAL_PTR]], align 4
+// OGCG: store float %[[A_IMAG_MINUS]], ptr %[[B_IMAG_PTR]], align 4
diff --git a/clang/test/CIR/CodeGen/complex.cpp b/clang/test/CIR/CodeGen/complex.cpp
index 0a7765f..bd7de9a 100644
--- a/clang/test/CIR/CodeGen/complex.cpp
+++ b/clang/test/CIR/CodeGen/complex.cpp
@@ -780,3 +780,22 @@ void foo29() {
 // OGCG: %[[INIT_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[INIT]], i32 0, i32 1
 // OGCG: store i32 0, ptr %[[INIT_REAL_PTR]], align 4
 // OGCG: store i32 0, ptr %[[INIT_IMAG_PTR]], align 4
+
+void foo30() {
+  float _Complex a = { 1.0f };
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a", init]
+// CIR: %[[CONST_1F:.*]] = cir.const #cir.fp<1.000000e+00> : !cir.float
+// CIR: %[[CONST_0F:.*]] = cir.const #cir.fp<0.000000e+00> : !cir.float
+// CIR: %[[COMPLEX:.*]] = cir.complex.create %[[CONST_1F]], %[[CONST_0F]] : !cir.float -> !cir.complex<!cir.float>
+// CIR: cir.store{{.*}} %[[COMPLEX]], %[[A_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
+
+// LLVM: %[[A_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: store { float, float } { float 1.000000e+00, float 0.000000e+00 }, ptr %[[A_ADDR]], align 4
+
+// OGCG:  %[[A_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 0
+// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 1
+// OGCG: store float 1.000000e+00, ptr %[[A_REAL_PTR]], align 4
+// OGCG: store float 0.000000e+00, ptr %[[A_IMAG_PTR]], align 4
diff --git a/clang/test/CIR/CodeGen/ctor-alias.cpp b/clang/test/CIR/CodeGen/ctor-alias.cpp
index a20e206..c4bf455 100644
--- a/clang/test/CIR/CodeGen/ctor-alias.cpp
+++ b/clang/test/CIR/CodeGen/ctor-alias.cpp
@@ -11,6 +11,8 @@ struct B {
 B::B() {
 }
 
+// LLVM: @_ZN1BC1Ev = alias void (ptr), ptr @_ZN1BC2Ev
+
 // OGCG: @_ZN1BC1Ev = unnamed_addr alias void (ptr), ptr @_ZN1BC2Ev
 
 // CHECK: cir.func{{.*}} @_ZN1BC2Ev(%arg0: !cir.ptr<!rec_B>
@@ -25,15 +27,30 @@ B::B() {
 // LLVM:   store ptr %[[THIS_ARG]], ptr %[[THIS_ADDR]]
 // LLVM:   %[[THIS:.*]] = load ptr, ptr %[[THIS_ADDR]]
 
-// This should be an alias, like the similar OGCG alias above, but that's not
-// implemented yet.
-// LLVM: declare dso_local void @_ZN1BC1Ev(ptr)
-
 // OGCG: define{{.*}} @_ZN1BC2Ev(ptr{{.*}} %[[THIS_ARG:.*]])
 // OGCG:   %[[THIS_ADDR:.*]] = alloca ptr
 // OGCG:   store ptr %[[THIS_ARG]], ptr %[[THIS_ADDR]]
 // OGCG:   %[[THIS:.*]] = load ptr, ptr %[[THIS_ADDR]]
 
+void bar() {
+  B b;
+}
+
+// CHECK: cir.func{{.*}} @_Z3barv()
+// CHECK:   %[[B:.*]] = cir.alloca !rec_B, !cir.ptr<!rec_B>, ["b", init]
+// CHECK:   cir.call @_ZN1BC1Ev(%[[B]]) : (!cir.ptr<!rec_B>) -> ()
+// CHECK:   cir.return
+
+// LLVM: define{{.*}} void @_Z3barv()
+// LLVM:   %[[B:.*]] = alloca %struct.B, i64 1, align 1
+// LLVM:   call void @_ZN1BC1Ev(ptr %[[B]])
+// LLVM:   ret void
+
+// OGCG: define{{.*}} void @_Z3barv()
+// OGCG:   %[[B:.*]] = alloca %struct.B, align 1
+// OGCG:   call void @_ZN1BC1Ev(ptr{{.*}} %[[B]])
+// OGCG:   ret void
+
 // The constructor in this cases is handled by RAUW rather than aliasing.
 struct Struk {
   Struk() {}
diff --git a/clang/test/CIR/CodeGen/cxx-conversion-operators.cpp b/clang/test/CIR/CodeGen/cxx-conversion-operators.cpp
new file mode 100644
index 0000000..a386a41
--- /dev/null
+++ b/clang/test/CIR/CodeGen/cxx-conversion-operators.cpp
@@ -0,0 +1,124 @@
+// RUN: %clang_cc1 -std=c++11 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -std=c++11 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s
+// RUN: %clang_cc1 -std=c++11 -triple aarch64-none-linux-android21 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+struct inline_operator {
+  operator int() const { return 987; }
+
+  int operator+(inline_operator) { return 666; }
+};
+
+struct out_of_line_operator {
+  operator int();
+};
+
+out_of_line_operator::operator int() { return 123; }
+
+void test() {
+  int x = 42;
+
+  inline_operator i;
+  x = i;
+
+  out_of_line_operator o;
+  x = o;
+}
+
+// CIR: cir.func dso_local @_ZN20out_of_line_operatorcviEv(%[[THIS_ARG:.+]]: !cir.ptr<!rec_out_of_line_operator>{{.*}}) -> !s32i
+// CIR:   %[[THIS_ALLOCA:.+]] = cir.alloca !cir.ptr<!rec_out_of_line_operator>, !cir.ptr<!cir.ptr<!rec_out_of_line_operator>>, ["this", init]
+// CIR:   %[[RETVAL:.+]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CIR:   cir.store %[[THIS_ARG]], %[[THIS_ALLOCA]] : !cir.ptr<!rec_out_of_line_operator>, !cir.ptr<!cir.ptr<!rec_out_of_line_operator>>
+// CIR:   %[[THIS_LOAD:.+]] = cir.load %[[THIS_ALLOCA]] : !cir.ptr<!cir.ptr<!rec_out_of_line_operator>>, !cir.ptr<!rec_out_of_line_operator>
+// CIR:   %[[CONST_123:.+]] = cir.const #cir.int<123> : !s32i
+// CIR:   cir.store %[[CONST_123]], %[[RETVAL]] : !s32i, !cir.ptr<!s32i>
+// CIR:   %[[RET_LOAD:.+]] = cir.load %[[RETVAL]] : !cir.ptr<!s32i>, !s32i
+// CIR:   cir.return %[[RET_LOAD]] : !s32i
+// CIR: }
+
+// CIR: cir.func comdat linkonce_odr @_ZNK15inline_operatorcviEv(%[[INLINE_THIS_ARG:.+]]: !cir.ptr<!rec_inline_operator>{{.*}}) -> !s32i
+// CIR:   %[[INLINE_THIS_ALLOCA:.+]] = cir.alloca !cir.ptr<!rec_inline_operator>, !cir.ptr<!cir.ptr<!rec_inline_operator>>, ["this", init]
+// CIR:   %[[INLINE_RETVAL:.+]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CIR:   cir.store %[[INLINE_THIS_ARG]], %[[INLINE_THIS_ALLOCA]] : !cir.ptr<!rec_inline_operator>, !cir.ptr<!cir.ptr<!rec_inline_operator>>
+// CIR:   %[[INLINE_THIS_LOAD:.+]] = cir.load %[[INLINE_THIS_ALLOCA]] : !cir.ptr<!cir.ptr<!rec_inline_operator>>, !cir.ptr<!rec_inline_operator>
+// CIR:   %[[CONST_987:.+]] = cir.const #cir.int<987> : !s32i
+// CIR:   cir.store %[[CONST_987]], %[[INLINE_RETVAL]] : !s32i, !cir.ptr<!s32i>
+// CIR:   %[[INLINE_RET_LOAD:.+]] = cir.load %[[INLINE_RETVAL]] : !cir.ptr<!s32i>, !s32i
+// CIR:   cir.return %[[INLINE_RET_LOAD]] : !s32i
+// CIR: }
+
+// CIR: cir.func dso_local @_Z4testv()
+// CIR:   %[[X_ALLOCA:.+]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init]
+// CIR:   %[[I_ALLOCA:.+]] = cir.alloca {{.*}}, {{.*}}, ["i"]
+// CIR:   %[[O_ALLOCA:.+]] = cir.alloca {{.*}}, {{.*}}, ["o"]
+// CIR:   %[[CONST_42:.+]] = cir.const #cir.int<42> : !s32i
+// CIR:   cir.store align(4) %[[CONST_42]], %[[X_ALLOCA]] : !s32i, !cir.ptr<!s32i>
+// CIR:   %[[INLINE_CALL:.+]] = cir.call @_ZNK15inline_operatorcviEv(%[[I_ALLOCA]]) : ({{.*}}) -> !s32i
+// CIR:   cir.store align(4) %[[INLINE_CALL]], %[[X_ALLOCA]] : !s32i, !cir.ptr<!s32i>
+// CIR:   %[[OUTLINE_CALL:.+]] = cir.call @_ZN20out_of_line_operatorcviEv(%[[O_ALLOCA]]) : ({{.*}}) -> !s32i
+// CIR:   cir.store align(4) %[[OUTLINE_CALL]], %[[X_ALLOCA]] : !s32i, !cir.ptr<!s32i>
+// CIR:   cir.return
+// CIR: }
+
+// LLVM: define dso_local i32 @_ZN20out_of_line_operatorcviEv(ptr %[[PARAM0:.+]])
+// LLVM:   %[[THIS_ALLOCA:.+]] = alloca ptr, i64 1
+// LLVM:   %[[RETVAL:.+]] = alloca i32, i64 1
+// LLVM:   store ptr %[[PARAM0]], ptr %[[THIS_ALLOCA]]
+// LLVM:   %[[THIS_LOAD:.+]] = load ptr, ptr %[[THIS_ALLOCA]]
+// LLVM:   store i32 123, ptr %[[RETVAL]]
+// LLVM:   %[[RET_LOAD:.+]] = load i32, ptr %[[RETVAL]]
+// LLVM:   ret i32 %[[RET_LOAD]]
+// LLVM: }
+
+// LLVM: define linkonce_odr i32 @_ZNK15inline_operatorcviEv(ptr %[[INLINE_PARAM0:.+]])
+// LLVM:   %[[INLINE_THIS_ALLOCA:.+]] = alloca ptr, i64 1
+// LLVM:   %[[INLINE_RETVAL:.+]] = alloca i32, i64 1
+// LLVM:   store ptr %[[INLINE_PARAM0]], ptr %[[INLINE_THIS_ALLOCA]]
+// LLVM:   %[[INLINE_THIS_LOAD:.+]] = load ptr, ptr %[[INLINE_THIS_ALLOCA]]
+// LLVM:   store i32 987, ptr %[[INLINE_RETVAL]]
+// LLVM:   %[[INLINE_RET_LOAD:.+]] = load i32, ptr %[[INLINE_RETVAL]]
+// LLVM:   ret i32 %[[INLINE_RET_LOAD]]
+// LLVM: }
+
+// LLVM: define dso_local void @_Z4testv()
+// LLVM:   %[[X_ALLOCA:.+]] = alloca i32, i64 1
+// LLVM:   %[[I_ALLOCA:.+]] = alloca {{.*}}, i64 1
+// LLVM:   %[[O_ALLOCA:.+]] = alloca {{.*}}, i64 1
+// LLVM:   store i32 42, ptr %[[X_ALLOCA]]
+// LLVM:   %[[INLINE_CALL:.+]] = call i32 @_ZNK15inline_operatorcviEv(ptr %[[I_ALLOCA]])
+// LLVM:   store i32 %[[INLINE_CALL]], ptr %[[X_ALLOCA]]
+// LLVM:   %[[OUTLINE_CALL:.+]] = call i32 @_ZN20out_of_line_operatorcviEv(ptr %[[O_ALLOCA]])
+// LLVM:   store i32 %[[OUTLINE_CALL]], ptr %[[X_ALLOCA]]
+// LLVM:   ret void
+// LLVM: }
+
+// OGCG: define dso_local noundef i32 @_ZN20out_of_line_operatorcviEv(ptr {{.*}} %[[THIS_PARAM:.+]])
+// OGCG: entry:
+// OGCG:   %[[THIS_ADDR:.+]] = alloca ptr
+// OGCG:   store ptr %[[THIS_PARAM]], ptr %[[THIS_ADDR]]
+// OGCG:   %[[THIS_LOAD:.+]] = load ptr, ptr %[[THIS_ADDR]]
+// OGCG:   ret i32 123
+// OGCG: }
+
+// OGCG: define dso_local void @_Z4testv()
+// OGCG: entry:
+// OGCG:   %[[X_VAR:.+]] = alloca i32
+// OGCG:   %[[I_VAR:.+]] = alloca {{.*}}
+// OGCG:   %[[O_VAR:.+]] = alloca {{.*}}
+// OGCG:   store i32 42, ptr %[[X_VAR]]
+// OGCG:   %[[INLINE_CALL:.+]] = call noundef i32 @_ZNK15inline_operatorcviEv(ptr {{.*}} %[[I_VAR]])
+// OGCG:   store i32 %[[INLINE_CALL]], ptr %[[X_VAR]]
+// OGCG:   %[[OUTLINE_CALL:.+]] = call noundef i32 @_ZN20out_of_line_operatorcviEv(ptr {{.*}} %[[O_VAR]])
+// OGCG:   store i32 %[[OUTLINE_CALL]], ptr %[[X_VAR]]
+// OGCG:   ret void
+// OGCG: }
+
+// OGCG: define linkonce_odr noundef i32 @_ZNK15inline_operatorcviEv(ptr {{.*}} %[[INLINE_THIS_PARAM:.+]])
+// OGCG: entry:
+// OGCG:   %[[INLINE_THIS_ADDR:.+]] = alloca ptr
+// OGCG:   store ptr %[[INLINE_THIS_PARAM]], ptr %[[INLINE_THIS_ADDR]]
+// OGCG:   %[[INLINE_THIS_LOAD:.+]] = load ptr, ptr %[[INLINE_THIS_ADDR]]
+// OGCG:   ret i32 987
+// OGCG: }
diff --git a/clang/test/CIR/CodeGen/destructors.cpp b/clang/test/CIR/CodeGen/destructors.cpp
index d8f9f23..de7718f 100644
--- a/clang/test/CIR/CodeGen/destructors.cpp
+++ b/clang/test/CIR/CodeGen/destructors.cpp
@@ -31,11 +31,11 @@ out_of_line_destructor::~out_of_line_destructor() {
 // OGCG:   ret void
 
 // CIR: cir.func dso_local @_ZN22out_of_line_destructorD1Ev(%{{.+}}: !cir.ptr<!rec_out_of_line_destructor>
-// CIR:  cir.call @_Z13some_functionv() nothrow : () -> ()
+// CIR:  cir.call @_ZN22out_of_line_destructorD2Ev(%{{.*}}) nothrow : (!cir.ptr<!rec_out_of_line_destructor>)
 // CIR:  cir.return
 
 // LLVM: define dso_local void @_ZN22out_of_line_destructorD1Ev(ptr %{{.+}})
-// LLVM:   call void @_Z13some_functionv()
+// LLVM:   call void @_ZN22out_of_line_destructorD2Ev
 // LLVM:   ret void
 
 // OGCG: define dso_local void @_ZN22out_of_line_destructorD1Ev(ptr {{.*}}%{{.+}})
diff --git a/clang/test/CIR/CodeGen/dtor-alias.cpp b/clang/test/CIR/CodeGen/dtor-alias.cpp
new file mode 100644
index 0000000..f4d54df
--- /dev/null
+++ b/clang/test/CIR/CodeGen/dtor-alias.cpp
@@ -0,0 +1,73 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -mconstructor-aliases -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -mconstructor-aliases -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -mconstructor-aliases -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+struct B {
+  ~B();
+};
+B::~B() {
+}
+
+// LLVM: @_ZN1BD1Ev = alias void (ptr), ptr @_ZN1BD2Ev
+
+// OGCG: @_ZN1BD1Ev = unnamed_addr alias void (ptr), ptr @_ZN1BD2Ev
+
+// CHECK: cir.func{{.*}} @_ZN1BD2Ev(%arg0: !cir.ptr<!rec_B>
+// CHECK:   %[[THIS_ADDR:.*]] = cir.alloca !cir.ptr<!rec_B>, !cir.ptr<!cir.ptr<!rec_B>>, ["this", init]
+// CHECK:   cir.store %arg0, %[[THIS_ADDR]]
+// CHECK:   %[[THIS:.*]] = cir.load %[[THIS_ADDR]] : !cir.ptr<!cir.ptr<!rec_B>>, !cir.ptr<!rec_B>
+
+// CHECK: cir.func{{.*}} private dso_local @_ZN1BD1Ev(!cir.ptr<!rec_B>) alias(@_ZN1BD2Ev)
+
+// LLVM: define{{.*}} @_ZN1BD2Ev(ptr %[[THIS_ARG:.*]])
+// LLVM:   %[[THIS_ADDR:.*]] = alloca ptr
+// LLVM:   store ptr %[[THIS_ARG]], ptr %[[THIS_ADDR]]
+// LLVM:   %[[THIS:.*]] = load ptr, ptr %[[THIS_ADDR]]
+
+// OGCG: define{{.*}} @_ZN1BD2Ev(ptr{{.*}} %[[THIS_ARG:.*]])
+// OGCG:   %[[THIS_ADDR:.*]] = alloca ptr
+// OGCG:   store ptr %[[THIS_ARG]], ptr %[[THIS_ADDR]]
+// OGCG:   %[[THIS:.*]] = load ptr, ptr %[[THIS_ADDR]]
+
+// The destructor in this case is handled by RAUW rather than aliasing.
+struct Struk {
+  ~Struk() {}
+};
+
+void baz() {
+  Struk s;
+}
+
+// CHECK:   cir.func{{.*}} @_ZN5StrukD2Ev(%arg0: !cir.ptr<!rec_Struk>
+// CHECK:     %[[THIS_ADDR:.*]] = cir.alloca !cir.ptr<!rec_Struk>, !cir.ptr<!cir.ptr<!rec_Struk>>, ["this", init]
+// CHECK:     cir.store %arg0, %[[THIS_ADDR]] : !cir.ptr<!rec_Struk>, !cir.ptr<!cir.ptr<!rec_Struk>>
+// CHECK:     %[[THIS:.*]] = cir.load %[[THIS_ADDR]] : !cir.ptr<!cir.ptr<!rec_Struk>>, !cir.ptr<!rec_Struk>
+// CHECK:     cir.return
+
+// CHECK-NOT:   cir.func{{.*}} @_ZN5StrukD1Ev
+
+// CHECK:   cir.func{{.*}} @_Z3bazv()
+// CHECK:     %[[S_ADDR:.*]] = cir.alloca !rec_Struk, !cir.ptr<!rec_Struk>, ["s"]
+// CHECK:     cir.call @_ZN5StrukD2Ev(%[[S_ADDR]]) nothrow : (!cir.ptr<!rec_Struk>) -> ()
+
+// LLVM: define linkonce_odr void @_ZN5StrukD2Ev(ptr %[[THIS_ARG]])
+// LLVM:   %[[THIS_ADDR:.*]] = alloca ptr
+// LLVM:   store ptr %[[THIS_ARG]], ptr %[[THIS_ADDR]]
+// LLVM:   %[[THIS:.*]] = load ptr, ptr %[[THIS_ADDR]]
+
+// LLVM: define{{.*}} void @_Z3bazv()
+// LLVM:   %[[S_ADDR:.*]] = alloca %struct.Struk
+// LLVM:   call void @_ZN5StrukD2Ev(ptr{{.*}} %[[S_ADDR]])
+
+// This function gets emitted before the destructor in OGCG.
+// OGCG: define{{.*}} void @_Z3bazv()
+// OGCG:   %[[S_ADDR:.*]] = alloca %struct.Struk
+// OGCG:   call void @_ZN5StrukD2Ev(ptr{{.*}} %[[S_ADDR]])
+
+// OGCG: define linkonce_odr void @_ZN5StrukD2Ev(ptr{{.*}} %[[THIS_ARG]])
+// OGCG:   %[[THIS_ADDR:.*]] = alloca ptr
+// OGCG:   store ptr %[[THIS_ARG]], ptr %[[THIS_ADDR]]
+// OGCG:   %[[THIS:.*]] = load ptr, ptr %[[THIS_ADDR]]
diff --git a/clang/test/CIR/CodeGen/empty.cpp b/clang/test/CIR/CodeGen/empty.cpp
new file mode 100644
index 0000000..378ae21
--- /dev/null
+++ b/clang/test/CIR/CodeGen/empty.cpp
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+
+// These declarations shouldn't emit any code. Therefore the module is expected to be empty.
+
+template<typename T>
+concept some_concept = true;
+
+template<some_concept T>
+class class_template {};
+
+; // Empty declaration
+
+template<typename T>
+void function_template();
+
+static_assert(true, "top level static assert");
+
+template<typename T>
+using type_alias = T;
+
+namespace N {
+    using ::class_template; // UsingShadow
+}
+
+template<typename T>
+struct deduction_guide {};
+
+deduction_guide() -> deduction_guide<int>;
+
+// CIR: module {{.*}} {
+// CIR-NEXT: }
diff --git a/clang/test/CIR/CodeGen/finegrain-bitfield-access.cpp b/clang/test/CIR/CodeGen/finegrain-bitfield-access.cpp
new file mode 100644
index 0000000..930b0a9
--- /dev/null
+++ b/clang/test/CIR/CodeGen/finegrain-bitfield-access.cpp
@@ -0,0 +1,271 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -ffine-grained-bitfield-accesses %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -ffine-grained-bitfield-accesses %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -ffine-grained-bitfield-accesses %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG
+
+struct S1 {
+  unsigned f1:2;
+  unsigned f2:6;
+  unsigned f3:8;
+  unsigned f4:4;
+  unsigned f5:8;
+};
+
+// CIR-DAG: !rec_S1 = !cir.record<struct "S1" {!u8i, !u8i, !u16i}>
+// LLVM-DAG: %struct.S1 = type { i8, i8, i16 }
+// OGCG-DAG: %struct.S1 = type { i8, i8, i16 }
+
+struct S2 {
+  unsigned long f1:16;
+  unsigned long f2:16;
+  unsigned long f3:6;
+};
+
+// CIR-DAG: !rec_S2 = !cir.record<struct "S2" padded {!u16i, !u16i, !u8i, !cir.array<!u8i x 3>}>
+// LLVM-DAG: %struct.S2 = type { i16, i16, i8, [3 x i8] }
+// OGCG-DAG: %struct.S2 = type { i16, i16, i8, [3 x i8] }
+
+struct S3 {
+  unsigned long f1:14;
+  unsigned long f2:18;
+  unsigned long f3:32;
+};
+
+// CIR-DAG: !rec_S3 = !cir.record<struct "S3" {!u32i, !u32i}>
+// LLVM-DAG: %struct.S3 = type { i32, i32 }
+// OGCG-DAG: %struct.S3 = type { i32, i32 }
+
+S1 a1;
+S2 a2;
+S3 a3;
+
+unsigned read8_1() {
+  return a1.f3;
+}
+
+// CIR-LABEL: @_Z7read8_1v
+// CIR: [[MEMBER:%.*]] = cir.get_member %1[1] {name = "f3"} : !cir.ptr<!rec_S1> -> !cir.ptr<!u8i>
+// CIR: [[BITFI:%.*]] = cir.get_bitfield align(1) (#bfi_f3, [[MEMBER]] : !cir.ptr<!u8i>) -> !u32i
+// CIR: cir.store [[BITFI]], {{.*}} : !u32i, !cir.ptr<!u32i>
+// CIR: [[RET:%.*]] = cir.load {{.*}} : !cir.ptr<!u32i>, !u32i
+// CIR: cir.return [[RET]] : !u32i
+
+// LLVM-LABEL: @_Z7read8_1v
+// LLVM:  [[MEMBER:%.*]] = load i8, ptr getelementptr inbounds nuw (i8, ptr {{.*}}, i64 1), align 1
+// LLVM:  [[BFCAST:%.*]] = zext i8 [[MEMBER]] to i32
+// LLVM:  store i32 [[BFCAST]], ptr {{.*}}, align 4
+// LLVM:  [[RET:%.*]] = load i32, ptr {{.*}}, align 4
+// LLVM:  ret i32 [[RET]]
+
+// OGCG-LABEL: @_Z7read8_1v
+// OGCG: [[BFLOAD:%.*]] = load i8, ptr getelementptr inbounds nuw (%struct.S1, ptr {{.*}}, i32 0, i32 1), align 1
+// OGCG-NEXT: [[BFCAST:%.*]] = zext i8 [[BFLOAD]] to i32
+// OGCG-NEXT: ret i32 [[BFCAST]]
+
+void write8_1() {
+  a1.f3 = 3;
+}
+
+// CIR-LABEL: @_Z8write8_1v
+// CIR: [[CONST3:%.*]] = cir.const #cir.int<3> : !s32i
+// CIR: [[INT3:%.*]] = cir.cast(integral, [[CONST3]] : !s32i), !u32i
+// CIR: [[MEMBER:%.*]] = cir.get_member {{.*}}[1] {name = "f3"} : !cir.ptr<!rec_S1> -> !cir.ptr<!u8i>
+// CIR: cir.set_bitfield align(1) (#bfi_f3, [[MEMBER]] : !cir.ptr<!u8i>, [[INT3]] : !u32i) -> !u32i
+
+// LLVM-LABEL: @_Z8write8_1v
+// LLVM:  store i8 3, ptr getelementptr inbounds nuw (i8, ptr {{.*}}, i64 1), align 1
+// LLVM:  ret void
+
+// OGCG-LABEL: @_Z8write8_1v
+// OGCG: store i8 3, ptr getelementptr inbounds nuw (%struct.S1, ptr {{.*}}, i32 0, i32 1), align 1
+// OGCG-NEXT: ret void
+
+unsigned read8_2() {
+
+  return a1.f5;
+}
+
+// CIR-LABEL: @_Z7read8_2v
+// CIR: [[MEMBER:%.*]] = cir.get_member {{.*}}[2] {name = "f5"} : !cir.ptr<!rec_S1> -> !cir.ptr<!u16i>
+// CIR: [[BITFI:%.*]] = cir.get_bitfield align(2) (#bfi_f5, [[MEMBER]] : !cir.ptr<!u16i>) -> !u32i
+// CIR: cir.store [[BITFI]], {{.*}} : !u32i, !cir.ptr<!u32i>
+// CIR: [[RET:%.*]] = cir.load {{.*}} : !cir.ptr<!u32i>, !u32i
+// CIR: cir.return [[RET]] : !u32i
+
+// LLVM-LABEL: @_Z7read8_2v
+// LLVM:  [[BFLOAD:%.*]] = load i16, ptr getelementptr inbounds nuw (i8, ptr {{.*}}, i64 2), align 2
+// LLVM:  [[BFLSHR:%.*]] = lshr i16 [[BFLOAD]], 4
+// LLVM:  [[BFCLEAR:%.*]] = and i16 [[BFLSHR]], 255
+// LLVM:  [[BFCAST:%.*]] = zext i16 [[BFCLEAR]] to i32
+// LLVM:  store i32 [[BFCAST]], ptr {{.*}}, align 4
+// LLVM:  [[RET:%.*]] = load i32, ptr {{.*}}, align 4
+// LLVM:  ret i32 [[RET]]
+
+// OGCG-LABEL: @_Z7read8_2v
+// OGCG: [[BFLOAD:%.*]] = load i16, ptr getelementptr inbounds nuw (%struct.S1, ptr {{.*}}, i32 0, i32 2), align 2
+// OGCG-NEXT: [[BFLSHR:%.*]] = lshr i16 [[BFLOAD]], 4
+// OGCG-NEXT: [[BFCLEAR:%.*]] = and i16 [[BFLSHR]], 255
+// OGCG-NEXT: [[BFCAST:%.*]] = zext i16 [[BFCLEAR]] to i32
+// OGCG-NEXT: ret i32 [[BFCAST]]
+
+void write8_2() {
+  a1.f5 = 3;
+}
+
+// CIR-LABEL: @_Z8write8_2v
+// CIR: [[CONST3:%.*]] = cir.const #cir.int<3> : !s32i
+// CIR: [[INT3:%.*]] = cir.cast(integral, [[CONST3]] : !s32i), !u32i
+// CIR: [[MEMBER:%.*]] = cir.get_member {{.*}}[2] {name = "f5"} : !cir.ptr<!rec_S1> -> !cir.ptr<!u16i>
+// CIR: cir.set_bitfield align(2) (#bfi_f5, %3 : !cir.ptr<!u16i>, {{.*}} : !u32i) -> !u32i
+
+// LLVM-LABEL: @_Z8write8_2v
+// LLVM:  [[BFLOAD:%.*]] = load i16, ptr getelementptr inbounds nuw (i8, ptr {{.*}}, i64 2), align 2
+// LLVM:  [[BFCLEAR:%.*]] = and i16 [[BFLOAD]], -4081
+// LLVM:  [[BFSET:%.*]] = or i16 [[BFCLEAR]], 48
+// LLVM:  store i16 [[BFSET]], ptr getelementptr inbounds nuw (i8, ptr {{.*}}, i64 2), align 2
+// LLVM:  ret void
+
+// OGCG-LABEL: @_Z8write8_2v
+// OGCG: [[BFLOAD:%.*]] = load i16, ptr getelementptr inbounds nuw (%struct.S1, ptr {{.*}}, i32 0, i32 2), align 2
+// OGCG-NEXT: [[BFCLEAR:%.*]] = and i16 [[BFLOAD]], -4081
+// OGCG-NEXT: [[BFSET:%.*]] = or i16 [[BFCLEAR]], 48
+// OGCG-NEXT: store i16 [[BFSET]], ptr getelementptr inbounds nuw (%struct.S1, ptr {{.*}}, i32 0, i32 2), align 2
+// OGCG-NEXT: ret void
+
+unsigned read16_1() {
+  return a2.f1;
+}
+
+// CIR-LABEL: @_Z8read16_1v
+// CIR: [[MEMBER:%.*]] = cir.get_member {{.*}}[0] {name = "f1"} : !cir.ptr<!rec_S2> -> !cir.ptr<!u16i>
+// CIR: [[BITFI:%.*]] = cir.get_bitfield align(8) (#bfi_f1, [[MEMBER]] : !cir.ptr<!u16i>) -> !u64i
+// CIR: [[BFCAST:%.*]] = cir.cast(integral, [[BITFI]] : !u64i), !u32i
+// CIR: cir.store [[BFCAST]], {{.*}} : !u32i, !cir.ptr<!u32i>
+// CIR: [[RET:%.*]] = cir.load {{.*}} : !cir.ptr<!u32i>, !u32i
+// CIR: cir.return [[RET]] : !u32i
+
+// LLVM-LABEL: @_Z8read16_1v
+// LLVM:  [[BFLOAD:%.*]] = load i16, ptr {{.*}}, align 8
+// LLVM:  [[BFCAST:%.*]] = zext i16 [[BFLOAD]] to i64
+// LLVM:  [[BF:%.*]] = trunc i64 [[BFCAST]] to i32
+// LLVM:  store i32 [[BF]], ptr {{.*}}, align 4
+// LLVM:  [[RET:%.*]] = load i32, ptr {{.*}}, align 4
+// LLVM:  ret i32 [[RET]]
+
+// OGCG-LABEL: @_Z8read16_1v
+// OGCG: [[BFLOAD:%.*]] = load i16, ptr {{.*}}, align 8
+// OGCG-NEXT: [[BFCAST:%.*]] = zext i16 [[BFLOAD]] to i64
+// OGCG-NEXT: [[RET:%.*]] = trunc i64 [[BFCAST]] to i32
+// OGCG-NEXT: ret i32 [[RET]]
+
+unsigned read16_2() {
+  return a2.f2;
+}
+
+// CIR-LABEL: @_Z8read16_2v
+// CIR: [[MEMBER:%.*]] = cir.get_member {{.*}}[1] {name = "f2"} : !cir.ptr<!rec_S2> -> !cir.ptr<!u16i>
+// CIR: [[BITFI:%.*]] = cir.get_bitfield align(2) (#bfi_f2, [[MEMBER]] : !cir.ptr<!u16i>) -> !u64i
+// CIR: [[BFCAST:%.*]] = cir.cast(integral, [[BITFI]] : !u64i), !u32i
+// CIR: cir.store [[BFCAST]], {{.*}} : !u32i, !cir.ptr<!u32i>
+// CIR: [[RET:%.*]] = cir.load {{.*}} : !cir.ptr<!u32i>, !u32i
+// CIR: cir.return [[RET]] : !u32i
+
+// LLVM-LABEL: @_Z8read16_2v
+// LLVM:  [[BFLOAD:%.*]] = load i16, ptr getelementptr inbounds nuw (i8, ptr {{.*}}, i64 2), align 2
+// LLVM:  [[BFCAST:%.*]] = zext i16 [[BFLOAD]] to i64
+// LLVM:  [[BF:%.*]] = trunc i64 [[BFCAST]] to i32
+// LLVM:  store i32 [[BF]], ptr {{.*}}, align 4
+// LLVM:  [[RET:%.*]] = load i32, ptr {{.*}}, align 4
+// LLVM:  ret i32 [[RET]]
+
+// OGCG-LABEL: @_Z8read16_2v
+// OGCG: [[BFLOAD:%.*]] = load i16, ptr getelementptr inbounds nuw (%struct.S2, ptr {{.*}}, i32 0, i32 1), align 2
+// OGCG-NEXT: [[BFCAST:%.*]] = zext i16 [[BFLOAD]] to i64
+// OGCG-NEXT: [[RET:%.*]] = trunc i64 [[BFCAST]] to i32
+// OGCG-NEXT: ret i32 [[RET]]
+
+void write16_1() {
+  a2.f1 = 5;
+}
+
+// CIR-LABEL: @_Z9write16_1v
+// CIR: [[CONST5:%.*]] = cir.const #cir.int<5> : !s32i
+// CIR: [[INT5:%.*]] = cir.cast(integral, [[CONST5]] : !s32i), !u64i
+// CIR: [[MEMBER:%.*]]  = cir.get_member {{.*}}[0] {name = "f1"} : !cir.ptr<!rec_S2> -> !cir.ptr<!u16i>
+// CIR: cir.set_bitfield align(8) (#bfi_f1, [[MEMBER]] : !cir.ptr<!u16i>, [[INT5]] : !u64i) -> !u64i
+// CIR: cir.return
+
+// LLVM-LABEL: @_Z9write16_1v
+// LLVM:  store i16 5, ptr {{.*}}, align 8
+// LLVM:  ret void
+
+// OGCG-LABEL: @_Z9write16_1v
+// OGCG: store i16 5, ptr {{.*}}, align 8
+// OGCG-NEXT: ret void
+
+void write16_2() {
+
+  a2.f2 = 5;
+}
+
+// CIR-LABEL: @_Z9write16_2v
+// CIR: [[CONST5:%.*]] = cir.const #cir.int<5> : !s32i
+// CIR: [[INT5:%.*]] = cir.cast(integral, [[CONST5]] : !s32i), !u64i
+// CIR: [[MEMBER:%.*]] = cir.get_member {{.*}}[1] {name = "f2"} : !cir.ptr<!rec_S2> -> !cir.ptr<!u16i>
+// CIR: cir.set_bitfield align(2) (#bfi_f2, [[MEMBER]] : !cir.ptr<!u16i>, {{.*}} : !u64i) -> !u64i
+// CIR: cir.return
+
+// LLVM-LABEL: @_Z9write16_2v
+// LLVM: store i16 5, ptr getelementptr inbounds nuw (i8, ptr {{.*}}, i64 2), align 2
+// LLVM: ret void
+
+// OGCG-LABEL: @_Z9write16_2v
+// OGCG: store i16 5, ptr getelementptr inbounds nuw (%struct.S2, ptr {{.*}}, i32 0, i32 1), align 2
+// OGCG-NEXT: ret void
+
+unsigned read32_1() {
+
+  return a3.f3;
+}
+// CIR-LABEL: @_Z8read32_1v
+// CIR: [[MEMBER:%.*]] = cir.get_member {{.*}}[1] {name = "f3"} : !cir.ptr<!rec_S3> -> !cir.ptr<!u32i>
+// CIR: [[BITFI:%.*]] = cir.get_bitfield align(4) (#bfi_f3_1, [[MEMBER]] : !cir.ptr<!u32i>) -> !u64i
+// CIR: [[BFCAST:%.*]] = cir.cast(integral, [[BITFI]] : !u64i), !u32i
+// CIR: cir.store [[BFCAST]], {{.*}} : !u32i, !cir.ptr<!u32i>
+// CIR: [[RET:%.*]] = cir.load {{.*}} : !cir.ptr<!u32i>, !u32i
+// CIR: cir.return [[RET]] : !u32i
+
+// LLVM-LABEL: @_Z8read32_1v
+// LLVM: [[BFLOAD:%.*]] = load i32, ptr getelementptr inbounds nuw (i8, ptr {{.*}}, i64 4), align 4
+// LLVM: [[BFCAST:%.*]] = zext i32 [[BFLOAD]] to i64
+// LLVM: [[BF:%.*]] = trunc i64 [[BFCAST]] to i32
+// LLVM: store i32 [[BF]], ptr {{.*}}, align 4
+// LLVM: [[RET:%.*]] = load i32, ptr {{.*}}, align 4
+// LLVM: ret i32 [[RET]]
+
+// OGCG-LABEL: @_Z8read32_1v
+// OGCG: [[BFLOAD:%.*]] = load i32, ptr getelementptr inbounds nuw (%struct.S3, ptr {{.*}}, i32 0, i32 1), align 4
+// OGCG-NEXT: [[BFCAST:%.*]] = zext i32 %bf.load to i64
+// OGCG-NEXT: [[RET:%.*]] = trunc i64 %bf.cast to i32
+// OGCG-NEXT: ret i32 [[RET]]
+
+void write32_1() {
+  a3.f3 = 5;
+}
+
+// CIR-LABEL: @_Z9write32_1v
+// CIR: [[CONST5:%.*]] = cir.const #cir.int<5> : !s32i
+// CIR: [[INT5:%.*]] = cir.cast(integral, [[CONST5]] : !s32i), !u64i
+// CIR: [[MEMBER:%.*]] = cir.get_member {{.*}}[1] {name = "f3"} : !cir.ptr<!rec_S3> -> !cir.ptr<!u32i>
+// CIR: cir.set_bitfield align(4) (#bfi_f3_1, [[MEMBER]] : !cir.ptr<!u32i>, [[INT5]] : !u64i) -> !u64i
+// CIR: cir.return
+
+// LLVM-LABEL: @_Z9write32_1v
+// LLVM:  store i32 5, ptr getelementptr inbounds nuw (i8, ptr {{.*}}, i64 4), align 4
+// LLVM:  ret void
+
+// OGCG-LABEL: @_Z9write32_1v
+// OGCG: store i32 5, ptr getelementptr inbounds nuw (%struct.S3, ptr {{.*}}, i32 0, i32 1), align 4
+// OGCG-NEXT: ret void
diff --git a/clang/test/CIR/CodeGen/no-prototype.c b/clang/test/CIR/CodeGen/no-prototype.c
new file mode 100644
index 0000000..4be6a94
--- /dev/null
+++ b/clang/test/CIR/CodeGen/no-prototype.c
@@ -0,0 +1,84 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+//===----------------------------------------------------------------------===//
+// DEFINED BEHAVIOUR
+//===----------------------------------------------------------------------===//
+
+// No-proto definition followed by a correct call.
+int noProto0(x) int x; { return x; }
+// CHECK: cir.func no_proto dso_local @noProto0(%arg0: !s32i {{.+}}) -> !s32i
+int test0(int x) {
+  // CHECK: cir.func dso_local @test0
+  return noProto0(x); // We know the definition. Should be a direct call.
+  // CHECK: %{{.+}} = cir.call @noProto0(%{{.+}})
+}
+
+// Declaration without prototype followed by its definition, then a correct call.
+//
+// Prototyped definition overrides no-proto declaration before any call is made,
+// only allowing calls with proper arguments. This is the only case where the
+// definition is not marked as no-proto.
+int noProto1();
+int noProto1(int x) { return x; }
+// CHECK: cir.func dso_local @noProto1(%arg0: !s32i {{.+}}) -> !s32i
+int test1(int x) {
+  // CHECK: cir.func dso_local @test1
+  return noProto1(x);
+  // CHECK: %{{.+}} = cir.call @noProto1(%{{[0-9]+}}) : (!s32i) -> !s32i
+}
+
+// Declaration without prototype followed by a correct call, then its definition.
+//
+// Call to no-proto is made before definition, so a variadic call that takes anything
+// is created. Later, when the definition is found, no-proto is replaced.
+int noProto2();
+int test2(int x) {
+  return noProto2(x);
+  // CHECK:  [[GGO:%.*]] = cir.get_global @noProto2 : !cir.ptr<!cir.func<(!s32i) -> !s32i>>
+  // CHECK:  {{.*}} = cir.call [[GGO]](%{{[0-9]+}}) : (!cir.ptr<!cir.func<(!s32i) -> !s32i>>, !s32i) -> !s32i
+}
+int noProto2(int x) { return x; }
+// CHECK: cir.func no_proto dso_local @noProto2(%arg0: !s32i {{.+}}) -> !s32i
+
+// No-proto declaration without definition (any call here is "correct").
+//
+// Call to no-proto is made before definition, so a variadic call that takes anything
+// is created. Definition is not in the translation unit, so it is left as is.
+int noProto3();
+// cir.func private no_proto @noProto3(...) -> !s32i
+int test3(int x) {
+// CHECK: cir.func dso_local @test3
+  return noProto3(x);
+  // CHECK:  [[GGO:%.*]] = cir.get_global @noProto3 : !cir.ptr<!cir.func<(...) -> !s32i>>
+  // CHECK:  [[CAST:%.*]] = cir.cast(bitcast, [[GGO]] : !cir.ptr<!cir.func<(...) -> !s32i>>), !cir.ptr<!cir.func<(!s32i) -> !s32i>>
+  // CHECK:  {{%.*}} = cir.call [[CAST]](%{{[0-9]+}}) : (!cir.ptr<!cir.func<(!s32i) -> !s32i>>, !s32i) -> !s32i
+}
+
+
+//===----------------------------------------------------------------------===//
+// UNDEFINED BEHAVIOUR
+//
+// No-proto definitions followed by incorrect calls.
+//===----------------------------------------------------------------------===//
+
+// No-proto definition followed by an incorrect call due to extra args.
+int noProto4() { return 0; }
+// cir.func private no_proto @noProto4() -> !s32i
+int test4(int x) {
+  return noProto4(x); // Even if we know the definition, this should compile.
+  // CHECK:  [[GGO:%.*]] = cir.get_global @noProto4 : !cir.ptr<!cir.func<() -> !s32i>>
+  // CHECK:  [[CAST:%.*]] = cir.cast(bitcast, [[GGO]] : !cir.ptr<!cir.func<() -> !s32i>>), !cir.ptr<!cir.func<(!s32i) -> !s32i>>
+  // CHECK:  {{%.*}} = cir.call [[CAST]]({{%.*}}) : (!cir.ptr<!cir.func<(!s32i) -> !s32i>>, !s32i) -> !s32i
+}
+
+// No-proto definition followed by an incorrect call due to lack of args.
+int noProto5();
+int test5(int x) {
+  return noProto5();
+  // CHECK:  [[GGO:%.*]] = cir.get_global @noProto5 : !cir.ptr<!cir.func<(!s32i) -> !s32i>>
+  // CHECK:  [[CAST:%.*]] = cir.cast(bitcast, [[GGO]] : !cir.ptr<!cir.func<(!s32i) -> !s32i>>), !cir.ptr<!cir.func<() -> !s32i>>
+  // CHECK:  {{%.*}} = cir.call [[CAST]]() : (!cir.ptr<!cir.func<() -> !s32i>>) -> !s32i
+}
+int noProto5(int x) { return x; }
+// CHECK: cir.func no_proto dso_local @noProto5(%arg0: !s32i {{.+}}) -> !s32i
diff --git a/clang/test/CIR/CodeGen/struct-init.cpp b/clang/test/CIR/CodeGen/struct-init.cpp
new file mode 100644
index 0000000..a47ef53
--- /dev/null
+++ b/clang/test/CIR/CodeGen/struct-init.cpp
@@ -0,0 +1,184 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+struct S {
+  int a, b, c;
+};
+
+void init() {
+  S s1 = {1, 2, 3};
+  S s2 = {4, 5};
+}
+
+// CIR: cir.func{{.*}} @_Z4initv()
+// CIR:   %[[S1:.*]] = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["s1", init]
+// CIR:   %[[S2:.*]] = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["s2", init]
+// CIR:   %[[S1_A:.*]] = cir.get_member %[[S1]][0] {name = "a"}
+// CIR:   %[[ONE:.*]] = cir.const #cir.int<1>
+// CIR:   cir.store{{.*}} %[[ONE]], %[[S1_A]]
+// CIR:   %[[S1_B:.*]] = cir.get_member %[[S1]][1] {name = "b"}
+// CIR:   %[[TWO:.*]] = cir.const #cir.int<2>
+// CIR:   cir.store{{.*}} %[[TWO]], %[[S1_B]]
+// CIR:   %[[S1_C:.*]] = cir.get_member %[[S1]][2] {name = "c"}
+// CIR:   %[[THREE:.*]] = cir.const #cir.int<3>
+// CIR:   cir.store{{.*}} %[[THREE]], %[[S1_C]]
+// CIR:   %[[S2_A:.*]] = cir.get_member %[[S2]][0] {name = "a"}
+// CIR:   %[[FOUR:.*]] = cir.const #cir.int<4>
+// CIR:   cir.store{{.*}} %[[FOUR]], %[[S2_A]]
+// CIR:   %[[S2_B:.*]] = cir.get_member %[[S2]][1] {name = "b"}
+// CIR:   %[[FIVE:.*]] = cir.const #cir.int<5>
+// CIR:   cir.store{{.*}} %[[FIVE]], %[[S2_B]]
+// CIR:   %[[S2_C:.*]] = cir.get_member %[[S2]][2] {name = "c"}
+// CIR:   %[[ZERO:.*]] = cir.const #cir.int<0>
+// CIR:   cir.store{{.*}} %[[ZERO]], %[[S2_C]]
+// CIR:   cir.return
+
+// LLVM: define{{.*}} void @_Z4initv()
+// LLVM:   %[[S1:.*]] = alloca %struct.S
+// LLVM:   %[[S2:.*]] = alloca %struct.S
+// LLVM:   %[[S1_A:.*]] = getelementptr %struct.S, ptr %[[S1]], i32 0, i32 0
+// LLVM:   store i32 1, ptr %[[S1_A]]
+// LLVM:   %[[S1_B:.*]] = getelementptr %struct.S, ptr %[[S1]], i32 0, i32 1
+// LLVM:   store i32 2, ptr %[[S1_B]]
+// LLVM:   %[[S1_C:.*]] = getelementptr %struct.S, ptr %[[S1]], i32 0, i32 2
+// LLVM:   store i32 3, ptr %[[S1_C]]
+// LLVM:   %[[S2_A:.*]] = getelementptr %struct.S, ptr %[[S2]], i32 0, i32 0
+// LLVM:   store i32 4, ptr %[[S2_A]]
+// LLVM:   %[[S2_B:.*]] = getelementptr %struct.S, ptr %[[S2]], i32 0, i32 1
+// LLVM:   store i32 5, ptr %[[S2_B]]
+// LLVM:   %[[S2_C:.*]] = getelementptr %struct.S, ptr %[[S2]], i32 0, i32 2
+// LLVM:   store i32 0, ptr %[[S2_C]]
+
+// OGCG: @__const._Z4initv.s1 = private unnamed_addr constant %struct.S { i32 1, i32 2, i32 3 }
+// OGCG: @__const._Z4initv.s2 = private unnamed_addr constant %struct.S { i32 4, i32 5, i32 0 }
+
+// OGCG: define{{.*}} void @_Z4initv()
+// OGCG:   %[[S1:.*]] = alloca %struct.S
+// OGCG:   %[[S2:.*]] = alloca %struct.S
+// OGCG:   call void @llvm.memcpy.p0.p0.i64(ptr{{.*}} %[[S1]], ptr{{.*}} @__const._Z4initv.s1, i64 12, i1 false)
+// OGCG:   call void @llvm.memcpy.p0.p0.i64(ptr{{.*}} %[[S2]], ptr{{.*}} @__const._Z4initv.s2, i64 12, i1 false)
+
+void init_var(int a, int b) {
+  S s = {a, b};
+}
+
+// CIR: cir.func{{.*}} @_Z8init_varii(%[[A_ARG:.*]]: !s32i {{.*}}, %[[B_ARG:.*]]: !s32i {{.*}})
+// CIR:   %[[A_PTR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init]
+// CIR:   %[[B_PTR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init]
+// CIR:   %[[S:.*]] = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["s", init]
+// CIR:   cir.store{{.*}} %[[A_ARG]], %[[A_PTR]]
+// CIR:   cir.store{{.*}} %[[B_ARG]], %[[B_PTR]]
+// CIR:   %[[S_A:.*]] = cir.get_member %[[S]][0] {name = "a"}
+// CIR:   %[[A:.*]] = cir.load{{.*}} %[[A_PTR]]
+// CIR:   cir.store{{.*}} %[[A]], %[[S_A]]
+// CIR:   %[[S_B:.*]] = cir.get_member %[[S]][1] {name = "b"}
+// CIR:   %[[B:.*]] = cir.load{{.*}} %[[B_PTR]]
+// CIR:   cir.store{{.*}} %[[B]], %[[S_B]]
+// CIR:   cir.return
+
+// LLVM: define{{.*}} void @_Z8init_varii(i32 %[[A_ARG:.*]], i32 %[[B_ARG:.*]])
+// LLVM:   %[[A_PTR:.*]] = alloca i32
+// LLVM:   %[[B_PTR:.*]] = alloca i32
+// LLVM:   %[[S:.*]] = alloca %struct.S
+// LLVM:   store i32 %[[A_ARG]], ptr %[[A_PTR]]
+// LLVM:   store i32 %[[B_ARG]], ptr %[[B_PTR]]
+// LLVM:   %[[S_A:.*]] = getelementptr %struct.S, ptr %[[S]], i32 0, i32 0
+// LLVM:   %[[A:.*]] = load i32, ptr %[[A_PTR]] 
+// LLVM:   store i32 %[[A]], ptr %[[S_A]]
+// LLVM:   %[[S_B:.*]] = getelementptr %struct.S, ptr %[[S]], i32 0, i32 1
+// LLVM:   %[[B:.*]] = load i32, ptr %[[B_PTR]]
+// LLVM:   store i32 %[[B]], ptr %[[S_B]]
+// LLVM:   ret void
+
+// OGCG: define{{.*}} void @_Z8init_varii(i32 {{.*}} %[[A_ARG:.*]], i32 {{.*}} %[[B_ARG:.*]])
+// OGCG:   %[[A_PTR:.*]] = alloca i32
+// OGCG:   %[[B_PTR:.*]] = alloca i32
+// OGCG:   %[[S:.*]] = alloca %struct.S
+// OGCG:   store i32 %[[A_ARG]], ptr %[[A_PTR]]
+// OGCG:   store i32 %[[B_ARG]], ptr %[[B_PTR]]
+// OGCG:   %[[S_A:.*]] = getelementptr {{.*}} %struct.S, ptr %[[S]], i32 0, i32 0
+// OGCG:   %[[A:.*]] = load i32, ptr %[[A_PTR]] 
+// OGCG:   store i32 %[[A]], ptr %[[S_A]]
+// OGCG:   %[[S_B:.*]] = getelementptr {{.*}} %struct.S, ptr %[[S]], i32 0, i32 1
+// OGCG:   %[[B:.*]] = load i32, ptr %[[B_PTR]]
+// OGCG:   store i32 %[[B]], ptr %[[S_B]]
+// OGCG:   %[[S_C:.*]] = getelementptr {{.*}} %struct.S, ptr %[[S]], i32 0, i32 2
+// OGCG:   store i32 0, ptr %[[S_C]]
+// OGCG:   ret void
+
+void init_expr(int a, int b, int c) {
+  S s = {a + 1, b + 2, c + 3};
+}
+
+// CIR: cir.func{{.*}} @_Z9init_expriii(%[[A_ARG:.*]]: !s32i {{.*}}, %[[B_ARG:.*]]: !s32i {{.*}}, %[[C_ARG:.*]]: !s32i {{.*}})
+// CIR:   %[[A_PTR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init]
+// CIR:   %[[B_PTR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["b", init]
+// CIR:   %[[C_PTR:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["c", init]
+// CIR:   %[[S:.*]] = cir.alloca !rec_S, !cir.ptr<!rec_S>, ["s", init]
+// CIR:   cir.store{{.*}} %[[A_ARG]], %[[A_PTR]]
+// CIR:   cir.store{{.*}} %[[B_ARG]], %[[B_PTR]]
+// CIR:   cir.store{{.*}} %[[C_ARG]], %[[C_PTR]]
+// CIR:   %[[S_A:.*]] = cir.get_member %[[S]][0] {name = "a"}
+// CIR:   %[[A:.*]] = cir.load{{.*}} %[[A_PTR]]
+// CIR:   %[[ONE:.*]] = cir.const #cir.int<1>
+// CIR:   %[[A_PLUS_ONE:.*]] = cir.binop(add, %[[A]], %[[ONE]])
+// CIR:   cir.store{{.*}} %[[A_PLUS_ONE]], %[[S_A]]
+// CIR:   %[[S_B:.*]] = cir.get_member %[[S]][1] {name = "b"}
+// CIR:   %[[B:.*]] = cir.load{{.*}} %[[B_PTR]]
+// CIR:   %[[TWO:.*]] = cir.const #cir.int<2>
+// CIR:   %[[B_PLUS_TWO:.*]] = cir.binop(add, %[[B]], %[[TWO]]) nsw : !s32i
+// CIR:   cir.store{{.*}} %[[B_PLUS_TWO]], %[[S_B]]
+// CIR:   %[[S_C:.*]] = cir.get_member %[[S]][2] {name = "c"}
+// CIR:   %[[C:.*]] = cir.load{{.*}} %[[C_PTR]]
+// CIR:   %[[THREE:.*]] = cir.const #cir.int<3>
+// CIR:   %[[C_PLUS_THREE:.*]] = cir.binop(add, %[[C]], %[[THREE]]) nsw : !s32i
+// CIR:   cir.store{{.*}} %[[C_PLUS_THREE]], %[[S_C]]
+// CIR:   cir.return
+
+// LLVM: define{{.*}} void @_Z9init_expriii(i32 %[[A_ARG:.*]], i32 %[[B_ARG:.*]], i32 %[[C_ARG:.*]])
+// LLVM:   %[[A_PTR:.*]] = alloca i32
+// LLVM:   %[[B_PTR:.*]] = alloca i32
+// LLVM:   %[[C_PTR:.*]] = alloca i32
+// LLVM:   %[[S:.*]] = alloca %struct.S
+// LLVM:   store i32 %[[A_ARG]], ptr %[[A_PTR]]
+// LLVM:   store i32 %[[B_ARG]], ptr %[[B_PTR]]
+// LLVM:   store i32 %[[C_ARG]], ptr %[[C_PTR]]
+// LLVM:   %[[S_A:.*]] = getelementptr %struct.S, ptr %[[S]], i32 0, i32 0
+// LLVM:   %[[A:.*]] = load i32, ptr %[[A_PTR]] 
+// LLVM:   %[[A_PLUS_ONE:.*]] = add nsw i32 %[[A]], 1
+// LLVM:   store i32 %[[A_PLUS_ONE]], ptr %[[S_A]]
+// LLVM:   %[[S_B:.*]] = getelementptr %struct.S, ptr %[[S]], i32 0, i32 1
+// LLVM:   %[[B:.*]] = load i32, ptr %[[B_PTR]]
+// LLVM:   %[[B_PLUS_TWO:.*]] = add nsw i32 %[[B]], 2
+// LLVM:   store i32 %[[B_PLUS_TWO]], ptr %[[S_B]]
+// LLVM:   %[[S_C:.*]] = getelementptr %struct.S, ptr %[[S]], i32 0, i32 2
+// LLVM:   %[[C:.*]] = load i32, ptr %[[C_PTR]]
+// LLVM:   %[[C_PLUS_THREE:.*]] = add nsw i32 %[[C]], 3
+// LLVM:   store i32 %[[C_PLUS_THREE]], ptr %[[S_C]]
+// LLVM:   ret void
+
+// OGCG: define{{.*}} void @_Z9init_expriii(i32 {{.*}} %[[A_ARG:.*]], i32 {{.*}} %[[B_ARG:.*]], i32 {{.*}} %[[C_ARG:.*]])
+// OGCG:   %[[A_PTR:.*]] = alloca i32
+// OGCG:   %[[B_PTR:.*]] = alloca i32
+// OGCG:   %[[C_PTR:.*]] = alloca i32
+// OGCG:   %[[S:.*]] = alloca %struct.S
+// OGCG:   store i32 %[[A_ARG]], ptr %[[A_PTR]]
+// OGCG:   store i32 %[[B_ARG]], ptr %[[B_PTR]]
+// OGCG:   store i32 %[[C_ARG]], ptr %[[C_PTR]]
+// OGCG:   %[[S_A:.*]] = getelementptr {{.*}} %struct.S, ptr %[[S]], i32 0, i32 0
+// OGCG:   %[[A:.*]] = load i32, ptr %[[A_PTR]] 
+// OGCG:   %[[A_PLUS_ONE:.*]] = add nsw i32 %[[A]], 1
+// OGCG:   store i32 %[[A_PLUS_ONE]], ptr %[[S_A]]
+// OGCG:   %[[S_B:.*]] = getelementptr {{.*}} %struct.S, ptr %[[S]], i32 0, i32 1
+// OGCG:   %[[B:.*]] = load i32, ptr %[[B_PTR]]
+// OGCG:   %[[B_PLUS_TWO:.*]] = add nsw i32 %[[B]], 2
+// OGCG:   store i32 %[[B_PLUS_TWO]], ptr %[[S_B]]
+// OGCG:   %[[S_C:.*]] = getelementptr {{.*}} %struct.S, ptr %[[S]], i32 0, i32 2
+// OGCG:   %[[C:.*]] = load i32, ptr %[[C_PTR]]
+// OGCG:   %[[C_PLUS_THREE:.*]] = add nsw i32 %[[C]], 3
+// OGCG:   store i32 %[[C_PLUS_THREE]], ptr %[[S_C]]
+// OGCG:   ret void
diff --git a/clang/test/CIR/CodeGen/variable-decomposition.cpp b/clang/test/CIR/CodeGen/variable-decomposition.cpp
new file mode 100644
index 0000000..022d06a
--- /dev/null
+++ b/clang/test/CIR/CodeGen/variable-decomposition.cpp
@@ -0,0 +1,55 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-pc-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-pc-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-pc-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+struct some_struct {
+  int a;
+  float b;
+};
+
+float function() {
+  auto[a, b] = some_struct{1, 2.f};
+
+  return a + b;
+}
+
+// CIR-LABEL: cir.func dso_local @_Z8functionv() -> !cir.float
+// CIR:  %[[RETVAL:.+]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["__retval"]
+// CIR:  %[[STRUCT:.+]] = cir.alloca !rec_some_struct, !cir.ptr<!rec_some_struct>, [""]
+// CIR:  %[[MEMBER_A:.+]] = cir.get_member %[[STRUCT]][0] {name = "a"} : !cir.ptr<!rec_some_struct> -> !cir.ptr<!s32i>
+// CIR:  %[[LOAD_A:.+]] = cir.load align(4) %[[MEMBER_A]] : !cir.ptr<!s32i>, !s32i
+// CIR:  %[[CAST_A:.+]] = cir.cast(int_to_float, %[[LOAD_A]] : !s32i), !cir.float
+// CIR:  %[[MEMBER_B:.+]] = cir.get_member %[[STRUCT]][1] {name = "b"} : !cir.ptr<!rec_some_struct> -> !cir.ptr<!cir.float>
+// CIR:  %[[LOAD_B:.+]] = cir.load align(4) %[[MEMBER_B]] : !cir.ptr<!cir.float>, !cir.float
+// CIR:  %[[ADD:.+]] = cir.binop(add, %[[CAST_A]], %[[LOAD_B]]) : !cir.float
+// CIR:  cir.store %[[ADD]], %[[RETVAL]] : !cir.float, !cir.ptr<!cir.float>
+// CIR:  %[[RET:.+]] = cir.load %[[RETVAL]] : !cir.ptr<!cir.float>, !cir.float
+// CIR:  cir.return %[[RET]] : !cir.float
+
+// LLVM-LABEL: define dso_local float @_Z8functionv()
+// LLVM:  %[[RETVAL:.+]] = alloca float, i64 1
+// LLVM:  %[[STRUCT:.+]] = alloca %struct.some_struct, i64 1
+// LLVM:  %[[GEP_A:.+]] = getelementptr %struct.some_struct, ptr %[[STRUCT]], i32 0, i32 0
+// LLVM:  %[[LOAD_A:.+]] = load i32, ptr %[[GEP_A]]
+// LLVM:  %[[CAST_A:.+]] = sitofp i32 %[[LOAD_A]] to float
+// LLVM:  %[[GEP_B:.+]] = getelementptr %struct.some_struct, ptr %[[STRUCT]], i32 0, i32 1
+// LLVM:  %[[LOAD_B:.+]] = load float, ptr %[[GEP_B]]
+// LLVM:  %[[ADD:.+]] = fadd float %[[CAST_A]], %[[LOAD_B]]
+// LLVM:  store float %[[ADD]], ptr %[[RETVAL]]
+// LLVM:  %[[RET:.+]] = load float, ptr %[[RETVAL]]
+// LLVM:  ret float %[[RET]]
+
+// OGCG: @__const._Z8functionv.{{.*}} = private unnamed_addr constant %struct.some_struct { i32 1, float 2.000000e+00 }
+// OGCG-LABEL: define dso_local noundef float @_Z8functionv()
+// OGCG:  %[[STRUCT:.+]] = alloca %struct.some_struct
+// OGCG:  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %[[STRUCT]], ptr align 4 @__const._Z8functionv.{{.*}}, i64 8, i1 false)
+// OGCG:  %[[GEP_A:.+]] = getelementptr inbounds nuw %struct.some_struct, ptr %[[STRUCT]], i32 0, i32 0
+// OGCG:  %[[LOAD_A:.+]] = load i32, ptr %[[GEP_A]]
+// OGCG:  %[[CAST_A:.+]] = sitofp i32 %[[LOAD_A]] to float
+// OGCG:  %[[GEP_B:.+]] = getelementptr inbounds nuw %struct.some_struct, ptr %[[STRUCT]], i32 0, i32 1
+// OGCG:  %[[LOAD_B:.+]] = load float, ptr %[[GEP_B]]
+// OGCG:  %[[ADD:.+]] = fadd float %[[CAST_A]], %[[LOAD_B]]
+// OGCG:  ret float %[[ADD]]
diff --git a/clang/test/CIR/CodeGenOpenACC/host_data.c b/clang/test/CIR/CodeGenOpenACC/host_data.c
index aeaf3d2f..fa06d2a 100644
--- a/clang/test/CIR/CodeGenOpenACC/host_data.c
+++ b/clang/test/CIR/CodeGenOpenACC/host_data.c
@@ -1,13 +1,15 @@
 // RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s
 
-void acc_host_data(int cond, int var1, int var2) {
-  // CHECK: cir.func{{.*}} @acc_host_data(%[[ARG_COND:.*]]: !s32i {{.*}}, %[[ARG_V1:.*]]: !s32i {{.*}}, %[[ARG_V2:.*]]: !s32i {{.*}}) {
+void acc_host_data(int cond, int var1, int var2, int *arr) {
+  // CHECK: cir.func{{.*}} @acc_host_data(%[[ARG_COND:.*]]: !s32i {{.*}}, %[[ARG_V1:.*]]: !s32i {{.*}}, %[[ARG_V2:.*]]: !s32i {{.*}}, %[[ARG_ARR:.*]]: !cir.ptr<!s32i> {{.*}}) {
   // CHECK-NEXT: %[[COND:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["cond", init]
   // CHECK-NEXT: %[[V1:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["var1", init]
   // CHECK-NEXT: %[[V2:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["var2", init]
+  // CHECK-NEXT: %[[ARR:.*]] = cir.alloca !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>, ["arr", init]
   // CHECK-NEXT: cir.store %[[ARG_COND]], %[[COND]] : !s32i, !cir.ptr<!s32i>
   // CHECK-NEXT: cir.store %[[ARG_V1]], %[[V1]] : !s32i, !cir.ptr<!s32i>
   // CHECK-NEXT: cir.store %[[ARG_V2]], %[[V2]] : !s32i, !cir.ptr<!s32i>
+  // CHECK-NEXT: cir.store %[[ARG_ARR]], %[[ARR]] : !cir.ptr<!s32i>, !cir.ptr<!cir.ptr<!s32i>>
 
 #pragma acc host_data use_device(var1)
   {}
@@ -52,4 +54,18 @@ void acc_host_data(int cond, int var1, int var2) {
   // CHECK-NEXT: acc.host_data if(%[[COND_CAST]]) dataOperands(%[[USE_DEV1]], %[[USE_DEV2]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>) {
   // CHECK-NEXT: acc.terminator
   // CHECK-NEXT: } attributes {ifPresent}
+
+#pragma acc host_data use_device(arr[0:var1])
+  {}
+  // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0>
+  // CHECK-NEXT: %[[ZERO_CAST:.*]] = builtin.unrealized_conversion_cast %[[ZERO]] : !s32i to si32
+  // CHECK-NEXT: %[[VAR1_LOAD:.*]] = cir.load{{.*}} %[[V1]] : !cir.ptr<!s32i>, !s32i
+  // CHECK-NEXT: %[[VAR1_CAST:.*]] = builtin.unrealized_conversion_cast %[[VAR1_LOAD]] : !s32i to si32
+  // CHECK-NEXT: %[[CONST_ZERO:.*]] = arith.constant 0
+  // CHECK-NEXT: %[[CONST_ONE:.*]] = arith.constant 1
+  // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CAST]] : si32) extent(%[[VAR1_CAST]] : si32) stride(%[[CONST_ONE]] : i64) startIdx(%[[CONST_ZERO]] : i64)
+  // CHECK-NEXT: %[[USE_DEV1:.*]] = acc.use_device varPtr(%[[ARR]] : !cir.ptr<!cir.ptr<!s32i>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.ptr<!s32i>> {name = "arr[0:var1]"}
+  // CHECK-NEXT: acc.host_data dataOperands(%[[USE_DEV1]] : !cir.ptr<!cir.ptr<!s32i>>)
+  // CHECK-NEXT: acc.terminator
+  // CHECK-NEXT: } loc
 }
diff --git a/clang/test/CIR/IR/array-ctor.cir b/clang/test/CIR/IR/array-ctor.cir
new file mode 100644
index 0000000..2378992
--- /dev/null
+++ b/clang/test/CIR/IR/array-ctor.cir
@@ -0,0 +1,29 @@
+
+// RUN: cir-opt %s | FileCheck %s
+
+!u8i = !cir.int<u, 8>
+!rec_S = !cir.record<struct "S" padded {!u8i}>
+
+module {
+  cir.func private @_ZN1SC1Ev(!cir.ptr<!rec_S>)
+  cir.func dso_local @_Z3foov() {
+    %0 = cir.alloca !cir.array<!rec_S x 42>, !cir.ptr<!cir.array<!rec_S x 42>>, ["s", init] {alignment = 16 : i64}
+    cir.array.ctor %0 : !cir.ptr<!cir.array<!rec_S x 42>> {
+    ^bb0(%arg0: !cir.ptr<!rec_S>):
+      cir.call @_ZN1SC1Ev(%arg0) : (!cir.ptr<!rec_S>) -> ()
+      cir.yield
+    }
+    cir.return
+  }
+
+  // CHECK: cir.func private @_ZN1SC1Ev(!cir.ptr<!rec_S>)
+  // CHECK: cir.func dso_local @_Z3foov() {
+  // CHECK:   %0 = cir.alloca !cir.array<!rec_S x 42>, !cir.ptr<!cir.array<!rec_S x 42>>, ["s", init] {alignment = 16 : i64}
+  // CHECK:   cir.array.ctor %0 : !cir.ptr<!cir.array<!rec_S x 42>> {
+  // CHECK:   ^bb0(%arg0: !cir.ptr<!rec_S>):
+  // CHECK:     cir.call @_ZN1SC1Ev(%arg0) : (!cir.ptr<!rec_S>) -> ()
+  // CHECK:     cir.yield
+  // CHECK:   }
+  // CHECK:   cir.return
+  // CHECK: }
+}
diff --git a/clang/test/CIR/IR/array-dtor.cir b/clang/test/CIR/IR/array-dtor.cir
new file mode 100644
index 0000000..6d08d16
--- /dev/null
+++ b/clang/test/CIR/IR/array-dtor.cir
@@ -0,0 +1,28 @@
+// RUN: cir-opt %s | FileCheck %s
+
+!u8i = !cir.int<u, 8>
+!rec_S = !cir.record<struct "S" padded {!u8i}>
+
+module {
+  cir.func private @_ZN1SD1Ev(!cir.ptr<!rec_S>)
+  cir.func dso_local @_Z3foov() {
+    %0 = cir.alloca !cir.array<!rec_S x 42>, !cir.ptr<!cir.array<!rec_S x 42>>, ["s", init] {alignment = 16 : i64}
+    cir.array.dtor %0 : !cir.ptr<!cir.array<!rec_S x 42>> {
+    ^bb0(%arg0: !cir.ptr<!rec_S>):
+      cir.call @_ZN1SD1Ev(%arg0) : (!cir.ptr<!rec_S>) -> ()
+      cir.yield
+    }
+    cir.return
+  }
+
+  // CHECK: cir.func private @_ZN1SD1Ev(!cir.ptr<!rec_S>)
+  // CHECK: cir.func dso_local @_Z3foov() {
+  // CHECK:   %0 = cir.alloca !cir.array<!rec_S x 42>, !cir.ptr<!cir.array<!rec_S x 42>>, ["s", init] {alignment = 16 : i64}
+  // CHECK:   cir.array.dtor %0 : !cir.ptr<!cir.array<!rec_S x 42>> {
+  // CHECK:   ^bb0(%arg0: !cir.ptr<!rec_S>):
+  // CHECK:     cir.call @_ZN1SD1Ev(%arg0) : (!cir.ptr<!rec_S>) -> ()
+  // CHECK:     cir.yield
+  // CHECK:   }
+  // CHECK:   cir.return
+  // CHECK: }
+}
diff --git a/clang/test/CIR/IR/func.cir b/clang/test/CIR/IR/func.cir
index 5636194..0e9a92f 100644
--- a/clang/test/CIR/IR/func.cir
+++ b/clang/test/CIR/IR/func.cir
@@ -14,6 +14,14 @@ cir.func @empty() {
 // CHECK:   cir.return
 // CHECK: }
 
+// void empty() { }
+cir.func no_proto @noProto() {
+  cir.return
+}
+// CHECK: cir.func no_proto @noProto() {
+// CHECK:   cir.return
+// CHECK: }
+
 // void voidret() { return; }
 cir.func @voidret() {
   cir.return
diff --git a/clang/test/CIR/Lowering/poison.cir b/clang/test/CIR/Lowering/poison.cir
new file mode 100644
index 0000000..6f8b792
--- /dev/null
+++ b/clang/test/CIR/Lowering/poison.cir
@@ -0,0 +1,14 @@
+// RUN: cir-translate -cir-to-llvmir --disable-cc-lowering -o %t.ll %s
+// RUN: FileCheck -check-prefix=LLVM --input-file=%t.ll %s
+
+!s32i = !cir.int<s, 32>
+
+module {
+  cir.func @lower_poison() -> !s32i {
+    %0 = cir.const #cir.poison : !s32i
+    cir.return %0 : !s32i
+  }
+  // LLVM-LABEL: @lower_poison
+  // LLVM-NEXT:    ret i32 poison
+  // LLVM-NEXT:  }
+}
diff --git a/clang/test/CIR/Transforms/bit.cir b/clang/test/CIR/Transforms/bit.cir
new file mode 100644
index 0000000..11f47aa
--- /dev/null
+++ b/clang/test/CIR/Transforms/bit.cir
@@ -0,0 +1,232 @@
+// RUN: cir-opt -cir-canonicalize -cir-simplify -o %t.cir %s
+// RUN: FileCheck --input-file=%t.cir %s
+
+!s32i = !cir.int<s, 32>
+!u32i = !cir.int<u, 32>
+
+module {
+  cir.func @fold_clrsb() -> !s32i {
+    %0 = cir.const #cir.int<1> : !s32i
+    %1 = cir.clrsb %0 : !s32i
+    cir.return %1 : !s32i
+  }
+  // CHECK-LABEL: @fold_clrsb
+  // CHECK-NEXT:    %[[R:.+]] = cir.const #cir.int<30> : !s32i
+  // CHECK-NEXT:    cir.return %[[R]] : !s32i
+  // CHECK-NEXT:  }
+
+  cir.func @fold_clz() -> !u32i {
+    %0 = cir.const #cir.int<1> : !u32i
+    %1 = cir.clz %0 : !u32i
+    cir.return %1 : !u32i
+  }
+  // CHECK-LABEL: @fold_clz
+  // CHECK-NEXT:    %[[R:.+]] = cir.const #cir.int<31> : !u32i
+  // CHECK-NEXT:    cir.return %[[R]] : !u32i
+  // CHECK-NEXT:  }
+
+  cir.func @fold_clz_zero_poison() -> !u32i {
+    %0 = cir.const #cir.int<0> : !u32i
+    %1 = cir.clz %0 poison_zero : !u32i
+    cir.return %1 : !u32i
+  }
+  // CHECK-LABEL: @fold_clz_zero_poison
+  // CHECK-NEXT:    %[[R:.+]] = cir.const #cir.poison : !u32i
+  // CHECK-NEXT:    cir.return %[[R]] : !u32i
+  // CHECK-NEXT:  }
+
+  cir.func @fold_clz_zero_no_poison() -> !u32i {
+    %0 = cir.const #cir.int<0> : !u32i
+    %1 = cir.clz %0 : !u32i
+    cir.return %1 : !u32i
+  }
+  // CHECK-LABEL: @fold_clz_zero_no_poison
+  // CHECK-NEXT:    %[[R:.+]] = cir.const #cir.int<32> : !u32i
+  // CHECK-NEXT:    cir.return %[[R]] : !u32i
+  // CHECK-NEXT:  }
+
+  cir.func @fold_ctz() -> !u32i {
+    %0 = cir.const #cir.int<2> : !u32i
+    %1 = cir.ctz %0 : !u32i
+    cir.return %1 : !u32i
+  }
+  // CHECK-LABEL: @fold_ctz
+  // CHECK-NEXT:    %[[R:.+]] = cir.const #cir.int<1> : !u32i
+  // CHECK-NEXT:    cir.return %[[R]] : !u32i
+  // CHECK-NEXT:  }
+
+  cir.func @fold_ctz_zero_poison() -> !u32i {
+    %0 = cir.const #cir.int<0> : !u32i
+    %1 = cir.ctz %0 poison_zero : !u32i
+    cir.return %1 : !u32i
+  }
+  // CHECK-LABEL: @fold_ctz_zero_poison
+  // CHECK-NEXT:    %[[R:.+]] = cir.const #cir.poison : !u32i
+  // CHECK-NEXT:    cir.return %[[R]] : !u32i
+  // CHECK-NEXT:  }
+
+  cir.func @fold_ctz_zero_no_poison() -> !u32i {
+    %0 = cir.const #cir.int<0> : !u32i
+    %1 = cir.ctz %0 : !u32i
+    cir.return %1 : !u32i
+  }
+  // CHECK-LABEL: @fold_ctz_zero_no_poison
+  // CHECK-NEXT:    %[[R:.+]] = cir.const #cir.int<32> : !u32i
+  // CHECK-NEXT:    cir.return %[[R]] : !u32i
+  // CHECK-NEXT:  }
+
+  cir.func @fold_ffs() -> !s32i {
+    // 40 is 0b0010_1000
+    %0 = cir.const #cir.int<40> : !s32i
+    %1 = cir.ffs %0 : !s32i
+    cir.return %1 : !s32i
+  }
+  // CHECK-LABEL: @fold_ffs
+  // CHECK-NEXT:    %[[R:.+]] = cir.const #cir.int<4> : !s32i
+  // CHECK-NEXT:    cir.return %[[R]] : !s32i
+  // CHECK-NEXT:  }
+
+  cir.func @fold_ffs_zero() -> !s32i {
+    %0 = cir.const #cir.int<0> : !s32i
+    %1 = cir.ffs %0 : !s32i
+    cir.return %1 : !s32i
+  }
+  // CHECK-LABEL: @fold_ffs_zero
+  // CHECK-NEXT:    %[[R:.+]] = cir.const #cir.int<0> : !s32i
+  // CHECK-NEXT:    cir.return %[[R]] : !s32i
+  // CHECK-NEXT:  }
+
+  cir.func @fold_parity() -> !u32i {
+    // 0xdeadbeef is 0b1101_1110_1010_1101_1011_1110_1110_1111
+    // 0xdeadbeef contains 24 ones
+    %0 = cir.const #cir.int<0xdeadbeef> : !u32i
+    %1 = cir.parity %0 : !u32i
+    cir.return %1 : !u32i
+  }
+  // CHECK-LABEL: @fold_parity
+  // CHECK-NEXT:    %[[R:.+]] = cir.const #cir.int<0> : !u32i
+  // CHECK-NEXT:    cir.return %[[R]] : !u32i
+  // CHECK-NEXT:  }
+
+  cir.func @fold_popcount() -> !u32i {
+    // 0xdeadbeef is 0b1101_1110_1010_1101_1011_1110_1110_1111
+    // 0xdeadbeef contains 24 ones
+    %0 = cir.const #cir.int<0xdeadbeef> : !u32i
+    %1 = cir.popcount %0 : !u32i
+    cir.return %1 : !u32i
+  }
+  // CHECK-LABEL: @fold_popcount
+  // CHECK-NEXT:    %[[R:.+]] = cir.const #cir.int<24> : !u32i
+  // CHECK-NEXT:    cir.return %[[R]] : !u32i
+  // CHECK-NEXT:  }
+
+  cir.func @fold_bitreverse() -> !u32i {
+    // 0xdeadbeef is 0b1101_1110_1010_1101_1011_1110_1110_1111
+    %0 = cir.const #cir.int<0xdeadbeef> : !u32i
+    %1 = cir.bitreverse %0 : !u32i
+    cir.return %1 : !u32i
+  }
+  // CHECK-LABEL: @fold_bitreverse
+  // 4152210811 is 0b1111_0111_0111_1101_1011_0101_0111_1011
+  // CHECK-NEXT:    %[[R:.+]] = cir.const #cir.int<4152210811> : !u32i
+  // CHECK-NEXT:    cir.return %[[R]] : !u32i
+  // CHECK-NEXT:  }
+
+  cir.func @fold_byte_swap() -> !u32i {
+    %0 = cir.const #cir.int<0xdeadbeef> : !u32i
+    %1 = cir.byte_swap %0 : !u32i
+    cir.return %1 : !u32i
+  }
+  // CHECK-LABEL: @fold_byte_swap
+  // 4022250974 is 0xefbeadde
+  // CHECK-NEXT:    %[[R:.+]] = cir.const #cir.int<4022250974> : !u32i
+  // CHECK-NEXT:    cir.return %[[R]] : !u32i
+  // CHECK-NEXT:  }
+
+  cir.func @fold_input_poison() -> !s32i {
+    %0 = cir.const #cir.poison : !s32i
+    %1 = cir.clrsb %0 : !s32i
+    cir.return %1 : !s32i
+  }
+  // CHECK-LABEL: @fold_input_poison
+  // CHECK-NEXT:    %[[P:.+]] = cir.const #cir.poison : !s32i
+  // CHECK-NEXT:    cir.return %[[P]] : !s32i
+  // CHECK-NEXT:  }
+
+  cir.func @fold_rotate_input_all_zeros(%arg0 : !u32i) -> !u32i {
+    %0 = cir.const #cir.int<0> : !u32i
+    %1 = cir.rotate left %0, %arg0 : !u32i
+    cir.return %1 : !u32i
+  }
+  // CHECK-LABEL: @fold_rotate_input_all_zeros
+  // CHECK-NEXT:    %[[R:.+]] = cir.const #cir.int<0> : !u32i
+  // CHECK-NEXT:    cir.return %[[R]] : !u32i
+  // CHECK-NEXT:  }
+
+  cir.func @fold_rotate_input_all_ones(%arg0 : !u32i) -> !u32i {
+    // 4294967295 is 0b1111_1111_1111_1111_1111_1111_1111_1111
+    %0 = cir.const #cir.int<4294967295> : !u32i
+    %1 = cir.rotate left %0, %arg0 : !u32i
+    cir.return %1 : !u32i
+  }
+  // CHECK-LABEL: @fold_rotate_input_all_ones
+  // CHECK-NEXT:    %[[R:.+]] = cir.const #cir.int<4294967295> : !u32i
+  // CHECK-NEXT:    cir.return %[[R]] : !u32i
+  // CHECK-NEXT:  }
+
+  cir.func @fold_rotate_zero_amount(%arg0 : !u32i) -> !u32i {
+    %0 = cir.const #cir.int<32> : !u32i
+    %1 = cir.rotate left %arg0, %0 : !u32i
+    cir.return %1 : !u32i
+  }
+  // CHECK-LABEL: @fold_rotate_zero_amount
+  // CHECK-SAME:  (%[[R:.+]]: !u32i)
+  // CHECK-NEXT:    cir.return %[[R]] : !u32i
+  // CHECK-NEXT:  }
+
+  cir.func @fold_rotate_left() -> !u32i {
+    // 0xdeadbeef is 0b1101_1110_1010_1101_1011_1110_1110_1111
+    %0 = cir.const #cir.int<0xdeadbeef> : !u32i
+    %1 = cir.const #cir.int<8> : !u32i
+    %2 = cir.rotate left %0, %1 : !u32i
+    cir.return %2 : !u32i
+  }
+  // CHECK-LABEL: @fold_rotate_left
+  // 2914971614 is 0b1010_1101_1011_1110_1110_1111_1101_1110
+  // CHECK-NEXT:    %[[R:.+]] = cir.const #cir.int<2914971614> : !u32i
+  // CHECK-NEXT:    cir.return %[[R]] : !u32i
+  // CHECK-NEXT:  }
+
+  cir.func @fold_rotate_right() -> !u32i {
+    // 0xdeadbeef is 0b1101_1110_1010_1101_1011_1110_1110_1111
+    %0 = cir.const #cir.int<0xdeadbeef> : !u32i
+    %1 = cir.const #cir.int<8> : !u32i
+    %2 = cir.rotate right %0, %1 : !u32i
+    cir.return %2 : !u32i
+  }
+  // CHECK-LABEL: @fold_rotate_right
+  // 4260027374 is 0b1110_1111_1101_1110_1010_1101_1011_1110
+  // CHECK-NEXT:    %[[R:.+]] = cir.const #cir.int<4024348094> : !u32i
+  // CHECK-NEXT:    cir.return %[[R]] : !u32i
+  // CHECK-NEXT:  }
+
+  cir.func @fold_rotate_input_poison(%arg0 : !u32i) -> !u32i {
+    %0 = cir.const #cir.poison : !u32i
+    %1 = cir.rotate left %0, %arg0 : !u32i
+    cir.return %1 : !u32i
+  }
+  // CHECK-LABEL: @fold_rotate_input_poison
+  // CHECK-NEXT:    %[[P:.+]] = cir.const #cir.poison : !u32i
+  // CHECK-NEXT:    cir.return %[[P]] : !u32i
+  // CHECK-NEXT:  }
+
+  cir.func @fold_rotate_amount_poison(%arg0 : !u32i) -> !u32i {
+    %0 = cir.const #cir.poison : !u32i
+    %1 = cir.rotate left %arg0, %0 : !u32i
+    cir.return %1 : !u32i
+  }
+  // CHECK-LABEL: @fold_rotate_amount_poison
+  // CHECK-NEXT:    %[[P:.+]] = cir.const #cir.poison : !u32i
+  // CHECK-NEXT:    cir.return %[[P]] : !u32i
+  // CHECK-NEXT:  }
+}
diff --git a/clang/test/CIR/Transforms/canonicalize.cir b/clang/test/CIR/Transforms/canonicalize.cir
index 7ba163e..5daff11 100644
--- a/clang/test/CIR/Transforms/canonicalize.cir
+++ b/clang/test/CIR/Transforms/canonicalize.cir
@@ -39,6 +39,16 @@ module {
   // CHECK:      cir.func{{.*}} @unary_not(%arg0: !cir.bool) -> !cir.bool
   // CHECK-NEXT:   cir.return %arg0 : !cir.bool
 
+  cir.func @unary_poison() -> !s32i {
+    %0 = cir.const #cir.poison : !s32i
+    %1 = cir.unary(inc, %0) : !s32i, !s32i
+    cir.return %1 : !s32i
+  }
+  // CHECK:      @unary_poison
+  // CHECK-NEXT:   %[[P:.+]] = cir.const #cir.poison : !s32i
+  // CHECK-NEXT:   cir.return %[[P]] : !s32i
+  // CHECK-NEXT: }
+
   cir.func @cast1(%arg0: !cir.bool) -> !cir.bool {
     %0 = cir.cast(bool_to_int, %arg0 : !cir.bool), !s32i
     %1 = cir.cast(int_to_bool, %0 : !s32i), !cir.bool
@@ -70,4 +80,14 @@ module {
   // CHECK-NEXT:   %[[CAST3:.*]] = cir.cast(integral, %[[CAST2]] : !s32i), !s64i
   // CHECK-NEXT:   cir.return %[[CAST3]] : !s64i
 
+  cir.func @cast_poison() -> !s64i {
+    %0 = cir.const #cir.poison : !s32i
+    %1 = cir.cast(integral, %0 : !s32i), !s64i
+    cir.return %1 : !s64i
+  }
+  // CHECK:      @cast_poison
+  // CHECK-NEXT:   %[[P:.+]] = cir.const #cir.poison : !s64i
+  // CHECK-NEXT:   cir.return %[[P]] : !s64i
+  // CHECK-NEXT: }
+
 }
diff --git a/clang/test/CXX/basic/basic.link/p19.cppm b/clang/test/CXX/basic/basic.link/p19.cppm
new file mode 100644
index 0000000..a0772d7
--- /dev/null
+++ b/clang/test/CXX/basic/basic.link/p19.cppm
@@ -0,0 +1,71 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+//
+// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface -verify %t/A.cppm -o %t/A.pcm
+// RUN: %clang_cc1 -std=c++20 %t/A.cpp -fmodule-file=A=%t/A.pcm -fsyntax-only -verify
+
+//--- A.cppm
+export module A;
+static void f() {}
+inline void it() { f(); }           // expected-warning {{TU local entity 'f' is exposed}}
+static inline void its() { f(); }   // OK
+template<int> void g() { its(); }   // OK
+template void g<0>();
+
+// Developers Note: We didn't track the use in decltype(). But it should be fine
+// since the underlying type is not TU-local. So we're doing nothing bad in practice.
+decltype(f) *fp;                    // error: f (though not its type) is TU-local
+auto &fr = f;                       // OK
+// Developers Note: We didn't track the use across variables. In the current implementation,
+// we don't know the value of `fr` at compile time, so we failed to detect this.
+constexpr auto &fr2 = fr;           // error: is an exposure of f
+// Developers Note: But if it is a direct use, we are able to detect it.
+constexpr auto &fr3 = f;            // expected-warning {{TU local entity 'f' is exposed}}
+constexpr static auto fp2 = fr;     // OK
+
+struct S { void (&ref)(); } s{f};               // OK, value is TU-local
+constexpr extern struct W { S &s; } wrap{s};    // OK, value is not TU-local
+
+static auto x = []{f();};           // OK
+auto x2 = x;                        // expected-warning {{TU local entity}}
+// Developers Note: Why is this an exposure?
+int y = ([]{f();}(),0);             // error: the closure type is not TU-local
+int y2 = (x,0);                     // OK expected-warning{{left operand of comma operator has no effect}}
+
+namespace N {
+  struct A {};
+  void adl(A);
+  static void adl(int);
+}
+void adl(double);
+
+inline void h(auto x) { adl(x); }   // OK, but certain specializations are exposures
+
+// Reflection is not supported yet.
+// constexpr std::meta::info r1 = ^^g<0>;  // OK
+// namespace N2 {
+//   static constexpr std::meta::info r2 = ^^g<1>;     // OK, r2 is TU-local
+// }
+// constexpr std::meta::info r3 = ^^f;                 // error: r3 is an exposure of f
+// 
+// constexpr auto ctx = std::meta::access_context::current();
+// constexpr std::meta::info r4 =
+//   std::meta::members_of(^^N2, ctx)[0];              // error: r4 is an exposure of N2::r2
+
+//--- A.cpp
+module A;
+void other() {
+  g<0>();                           // OK, specialization is explicitly instantiated
+  g<1>();                           // expected-warning {{instantiation of 'g<1>' triggers reference to TU-local entity 'its' from other TU 'A'}}
+  // Developers Note: To check use of TU-local entity when overload resolution made.
+  h(N::A{});                        // error: overload set contains TU-local N::adl(int)
+  h(0);                             // OK, calls adl(double)
+  adl(N::A{});                      // OK; N::adl(int) not found, calls N::adl(N::A)
+  fr();                             // OK, calls f
+  // Developers Note: To check use of TU-local entity when we're able to detect the TUlocalness
+  // across variables.
+  constexpr auto ptr = fr;          // error: fr is not usable in constant expressions here
+
+  constexpr auto fptr = f;          // expected-error {{use of undeclared identifier 'f'}}
+}
diff --git a/clang/test/CXX/drs/cwg18xx.cpp b/clang/test/CXX/drs/cwg18xx.cpp
index 9948075..5b4551b 100644
--- a/clang/test/CXX/drs/cwg18xx.cpp
+++ b/clang/test/CXX/drs/cwg18xx.cpp
@@ -564,12 +564,11 @@ struct A {
 namespace ex2 {
 #if __cplusplus >= 201103L
 struct Bar {
-  struct Baz { // #cwg1890-Baz
+  struct Baz {
     int a = 0;
   };
   static_assert(__is_constructible(Baz), "");
   // since-cxx11-error@-1 {{static assertion failed due to requirement '__is_constructible(cwg1890::ex2::Bar::Baz)'}}
-  // since-cxx11-note@#cwg1890-Baz {{'Baz' defined here}}
 };
 #endif
 } // namespace ex2
diff --git a/clang/test/CXX/expr/expr.const/p2-0x.cpp b/clang/test/CXX/expr/expr.const/p2-0x.cpp
index 910c863..8401d30 100644
--- a/clang/test/CXX/expr/expr.const/p2-0x.cpp
+++ b/clang/test/CXX/expr/expr.const/p2-0x.cpp
@@ -438,6 +438,11 @@ namespace ReinterpretCast {
   struct U {
     int m : (long)(S*)6; // expected-warning {{constant expression}} expected-note {{reinterpret_cast}}
   };
+  void f();
+  constexpr void* fp1 = (void*)f; // expected-error {{constant expression}} expected-note {{reinterpret_cast}}
+  constexpr int* fp2 = (int*)f; // expected-error {{constant expression}} expected-note {{reinterpret_cast}}
+  constexpr int (*fp3)() = (int(*)())f; // expected-error {{constant expression}} expected-note {{reinterpret_cast}}
+  constexpr int (&fp4)() = (int(&)())f; // expected-error {{constant expression}} expected-note {{reinterpret_cast}}
 }
 
 // - a pseudo-destructor call (5.2.4);
diff --git a/clang/test/ClangScanDeps/modules-full-named-modules.cppm b/clang/test/ClangScanDeps/modules-full-named-modules.cppm
index 5967a87..c69a215 100644
--- a/clang/test/ClangScanDeps/modules-full-named-modules.cppm
+++ b/clang/test/ClangScanDeps/modules-full-named-modules.cppm
@@ -92,14 +92,7 @@ export void Hello();
 // CHECK-NEXT:             ]
 // CHECK:                 "command-line": [
 // CHECK:                   "-o",
-// CHECK-NEXT:              "{{.*}}/M-{{.*}}.pcm"
-// CHECK:                 ]
-// CHECK:                 "input-file": "[[PREFIX]]/M.cppm"
-// CHECK:               },
-// CHECK-NEXT:          {
-// CHECK:                 "command-line": [
-// CHECK:                   "-o",
-// CHECK-NEXT:              "[[PREFIX]]/M.o"
+// CHECK-NEXT:              "{{.*}}/M.o"
 // CHECK:                 ]
 // CHECK:                 "input-file": "[[PREFIX]]/M.cppm"
 // CHECK:               }
@@ -160,18 +153,7 @@ void World() {
 // CHECK-NEXT:             ]
 // CHECK:                 "command-line": [
 // CHECK:                   "-o",
-// CHECK-NEXT:              "{{.*}}/impl_part-{{.*}}.pcm",
-// CHECK:                 ]
-// CHECK:                 "input-file": "[[PREFIX]]/impl_part.cppm"
-// CHECK:               },
-// CHECK-NEXT:          {
-// CHECK:                 "named-module": "M:impl_part"
-// CHECK-NEXT:            "named-module-deps": [
-// CHECK-NEXT:              "M:interface_part"
-// CHECK-NEXT:             ]
-// CHECK:                 "command-line": [
-// CHECK:                   "-o",
-// CHECK-NEXT:              "[[PREFIX]]/impl_part.o",
+// CHECK-NEXT:              "{{.*}}/impl_part.o",
 // CHECK:                 ]
 // CHECK:                 "input-file": "[[PREFIX]]/impl_part.cppm"
 // CHECK:               }
@@ -194,16 +176,7 @@ export void World();
 // CHECK-NOT:             "named-module-deps": []
 // CHECK:                 "command-line": [
 // CHECK:                   "-o",
-// CHECK-NEXT:              "{{.*}}/interface_part-{{.*}}.pcm",
-// CHECK:                 ]
-// CHECK:                 "input-file": "[[PREFIX]]/interface_part.cppm"
-// CHECK:               },
-// CHECK-NEXT:          {
-// CHECK:                 "named-module": "M:interface_part"
-// CHECK-NOT:             "named-module-deps": []
-// CHECK:                 "command-line": [
-// CHECK:                   "-o",
-// CHECK-NEXT:              "[[PREFIX]]/interface_part.o",
+// CHECK-NEXT:              "{{.*}}/interface_part.o",
 // CHECK:                 ]
 // CHECK:                 "input-file": "[[PREFIX]]/interface_part.cppm"
 // CHECK:               }
@@ -259,14 +232,7 @@ int main() {
 // CHECK-NEXT:             ]
 // CHECK:                 "command-line": [
 // CHECK:                   "-o",
-// CHECK-NEXT:              "{{.*}}/M-{{.*}}.pcm"
-// CHECK:                 ]
-// CHECK:                 "input-file": "[[PREFIX]]/M.cppm"
-// CHECK:               },
-// CHECK-NEXT:          {
-// CHECK:                 "command-line": [
-// CHECK:                   "-o",
-// CHECK-NEXT:              "[[PREFIX]]/M.o"
+// CHECK-NEXT:              "{{.*}}/M.o"
 // CHECK:                 ]
 // CHECK:                 "input-file": "[[PREFIX]]/M.cppm"
 // CHECK:               },
@@ -292,18 +258,7 @@ int main() {
 // CHECK-NEXT:             ]
 // CHECK:                 "command-line": [
 // CHECK:                   "-o",
-// CHECK-NEXT:              "{{.*}}/impl_part-{{.*}}.pcm",
-// CHECK:                 ]
-// CHECK:                 "input-file": "[[PREFIX]]/impl_part.cppm"
-// CHECK:               },
-// CHECK-NEXT:          {
-// CHECK:                 "named-module": "M:impl_part"
-// CHECK-NEXT:            "named-module-deps": [
-// CHECK-NEXT:              "M:interface_part"
-// CHECK-NEXT:             ]
-// CHECK:                 "command-line": [
-// CHECK:                   "-o",
-// CHECK-NEXT:              "[[PREFIX]]/impl_part.o",
+// CHECK-NEXT:              "{{.*}}/impl_part.o",
 // CHECK:                 ]
 // CHECK:                 "input-file": "[[PREFIX]]/impl_part.cppm"
 // CHECK:               }
@@ -316,16 +271,7 @@ int main() {
 // CHECK-NOT:             "named-module-deps": []
 // CHECK:                 "command-line": [
 // CHECK:                   "-o",
-// CHECK-NEXT:              "{{.*}}/interface_part-{{.*}}.pcm",
-// CHECK:                 ]
-// CHECK:                 "input-file": "[[PREFIX]]/interface_part.cppm"
-// CHECK:               },
-// CHECK-NEXT:          {
-// CHECK:                 "named-module": "M:interface_part"
-// CHECK-NOT:             "named-module-deps": []
-// CHECK:                 "command-line": [
-// CHECK:                   "-o",
-// CHECK-NEXT:              "[[PREFIX]]/interface_part.o",
+// CHECK-NEXT:              "{{.*}}/interface_part.o",
 // CHECK:                 ]
 // CHECK:                 "input-file": "[[PREFIX]]/interface_part.cppm"
 // CHECK:               }
diff --git a/clang/test/CodeCompletion/skip-explicit-object-parameter.cpp b/clang/test/CodeCompletion/skip-explicit-object-parameter.cpp
index 55c16bb..587d6cb 100644
--- a/clang/test/CodeCompletion/skip-explicit-object-parameter.cpp
+++ b/clang/test/CodeCompletion/skip-explicit-object-parameter.cpp
@@ -1,14 +1,48 @@
 struct A {
-  void foo(this A self, int arg);
+  void foo(this auto&& self, int arg);
+  void bar(this A self, int arg);
 };
 
-int main() {
+int func1() {
   A a {};
   a.
 }
-// RUN: %clang_cc1 -cc1 -fsyntax-only -code-completion-at=%s:%(line-2):5 -std=c++23 %s | FileCheck %s
-// CHECK: COMPLETION: A : A::
-// CHECK-NEXT: COMPLETION: foo : [#void#]foo(<#int arg#>)
-// CHECK-NEXT: COMPLETION: operator= : [#A &#]operator=(<#const A &#>)
-// CHECK-NEXT: COMPLETION: operator= : [#A &#]operator=(<#A &&#>)
-// CHECK-NEXT: COMPLETION: ~A : [#void#]~A()
+// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:%(line-2):5 -std=c++23 %s | FileCheck -check-prefix=CHECK-CC1 %s
+// CHECK-CC1: COMPLETION: A : A::
+// CHECK-NEXT-CC1: COMPLETION: bar : [#void#]bar(<#int arg#>)
+// CHECK-NEXT-CC1: COMPLETION: foo : [#void#]foo(<#int arg#>)
+// CHECK-NEXT-CC1: COMPLETION: operator= : [#A &#]operator=(<#const A &#>)
+// CHECK-NEXT-CC1: COMPLETION: operator= : [#A &#]operator=(<#A &&#>)
+// CHECK-NEXT-CC1: COMPLETION: ~A : [#void#]~A()
+
+struct B {
+  template <typename T>
+  void foo(this T&& self, int arg);
+};
+
+int func2() {
+  B b {};
+  b.foo();
+}
+// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:%(line-2):9 -std=c++23 %s | FileCheck -check-prefix=CHECK-CC2 %s
+// CHECK-CC2: OVERLOAD: [#void#]foo(int arg)
+
+// TODO: llvm/llvm-project/146649
+// This is incorrect behavior. Correct Result should be a variant of, 
+// CC3: should be something like [#void#]foo(<#A self#>, <#int arg#>)
+// CC4: should be something like [#void#]bar(<#A self#>, <#int arg#>)
+int func3() {
+  (&A::foo)
+  (&A::bar)
+}
+// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:%(line-3):10 -std=c++23 %s | FileCheck -check-prefix=CHECK-CC3 %s
+// CHECK-CC3: COMPLETION: foo : [#void#]foo<<#class self:auto#>>(<#int arg#>)
+// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:%(line-4):10 -std=c++23 %s | FileCheck -check-prefix=CHECK-CC4 %s
+// CHECK-CC4: COMPLETION: bar : [#void#]bar(<#int arg#>)
+
+int func4() {
+  // TODO (&A::foo)(
+  (&A::bar)(
+}
+// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:%(line-2):13 -std=c++23 %s | FileCheck -check-prefix=CHECK-CC5 %s
+// CHECK-CC5: OVERLOAD: [#void#](<#A#>, int)
diff --git a/clang/test/CodeGen/AArch64/neon-intrinsics.c b/clang/test/CodeGen/AArch64/neon-intrinsics.c
index 6304245..035e1ca 100644
--- a/clang/test/CodeGen/AArch64/neon-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/neon-intrinsics.c
@@ -8585,7 +8585,7 @@ uint32x2_t test_vqshrun_n_s64(int64x2_t a) {
 // CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[VQSHRUN_N3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK-NEXT:    ret <16 x i8> [[SHUFFLE_I]]
 //
-int8x16_t test_vqshrun_high_n_s16(int8x8_t a, int16x8_t b) {
+uint8x16_t test_vqshrun_high_n_s16(uint8x8_t a, int16x8_t b) {
   return vqshrun_high_n_s16(a, b, 3);
 }
 
@@ -8598,7 +8598,7 @@ int8x16_t test_vqshrun_high_n_s16(int8x8_t a, int16x8_t b) {
 // CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[VQSHRUN_N3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 // CHECK-NEXT:    ret <8 x i16> [[SHUFFLE_I]]
 //
-int16x8_t test_vqshrun_high_n_s32(int16x4_t a, int32x4_t b) {
+uint16x8_t test_vqshrun_high_n_s32(uint16x4_t a, int32x4_t b) {
   return vqshrun_high_n_s32(a, b, 9);
 }
 
@@ -8611,7 +8611,7 @@ int16x8_t test_vqshrun_high_n_s32(int16x4_t a, int32x4_t b) {
 // CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[VQSHRUN_N3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 // CHECK-NEXT:    ret <4 x i32> [[SHUFFLE_I]]
 //
-int32x4_t test_vqshrun_high_n_s64(int32x2_t a, int64x2_t b) {
+uint32x4_t test_vqshrun_high_n_s64(uint32x2_t a, int64x2_t b) {
   return vqshrun_high_n_s64(a, b, 19);
 }
 
@@ -8810,7 +8810,7 @@ uint32x2_t test_vqrshrun_n_s64(int64x2_t a) {
 // CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[VQRSHRUN_N3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK-NEXT:    ret <16 x i8> [[SHUFFLE_I]]
 //
-int8x16_t test_vqrshrun_high_n_s16(int8x8_t a, int16x8_t b) {
+uint8x16_t test_vqrshrun_high_n_s16(uint8x8_t a, int16x8_t b) {
   return vqrshrun_high_n_s16(a, b, 3);
 }
 
@@ -8823,7 +8823,7 @@ int8x16_t test_vqrshrun_high_n_s16(int8x8_t a, int16x8_t b) {
 // CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[VQRSHRUN_N3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 // CHECK-NEXT:    ret <8 x i16> [[SHUFFLE_I]]
 //
-int16x8_t test_vqrshrun_high_n_s32(int16x4_t a, int32x4_t b) {
+uint16x8_t test_vqrshrun_high_n_s32(uint16x4_t a, int32x4_t b) {
   return vqrshrun_high_n_s32(a, b, 9);
 }
 
@@ -8836,7 +8836,7 @@ int16x8_t test_vqrshrun_high_n_s32(int16x4_t a, int32x4_t b) {
 // CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[VQRSHRUN_N3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 // CHECK-NEXT:    ret <4 x i32> [[SHUFFLE_I]]
 //
-int32x4_t test_vqrshrun_high_n_s64(int32x2_t a, int64x2_t b) {
+uint32x4_t test_vqrshrun_high_n_s64(uint32x2_t a, int64x2_t b) {
   return vqrshrun_high_n_s64(a, b, 19);
 }
 
diff --git a/clang/test/CodeGen/AArch64/neon-scalar-copy.c b/clang/test/CodeGen/AArch64/neon-scalar-copy.c
index 4ad1ce5..bd80068 100644
--- a/clang/test/CodeGen/AArch64/neon-scalar-copy.c
+++ b/clang/test/CodeGen/AArch64/neon-scalar-copy.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -flax-vector-conversions=none\
 // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
 
 // REQUIRES: aarch64-registered-target || arm-registered-target
diff --git a/clang/test/CodeGen/AArch64/neon-vget.c b/clang/test/CodeGen/AArch64/neon-vget.c
index b17a7ab..ebc8c2f 100644
--- a/clang/test/CodeGen/AArch64/neon-vget.c
+++ b/clang/test/CodeGen/AArch64/neon-vget.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple arm64-apple-darwin -target-feature +neon \
+// RUN: %clang_cc1 -triple arm64-apple-darwin -target-feature +neon -flax-vector-conversions=none \
 // RUN:   -disable-O0-optnone -emit-llvm -o - %s \
 // RUN: | opt -S -passes=mem2reg | FileCheck %s
 
diff --git a/clang/test/CodeGen/AArch64/poly64.c b/clang/test/CodeGen/AArch64/poly64.c
index 578dd20..00838b6 100644
--- a/clang/test/CodeGen/AArch64/poly64.c
+++ b/clang/test/CodeGen/AArch64/poly64.c
@@ -1,5 +1,5 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
-// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -flax-vector-conversions=none\
 // RUN:  -ffp-contract=fast -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa \
 // RUN:  | FileCheck %s
 
diff --git a/clang/test/CodeGen/PowerPC/ppc-dmf-future-builtin-err.c b/clang/test/CodeGen/PowerPC/ppc-dmf-future-builtin-err.c
deleted file mode 100644
index 9def39f..0000000
--- a/clang/test/CodeGen/PowerPC/ppc-dmf-future-builtin-err.c
+++ /dev/null
@@ -1,15 +0,0 @@
-// RUN: not %clang_cc1 -triple powerpc64le-unknown-linux-gnu -target-cpu pwr10 \
-// RUN:   %s -emit-llvm-only 2>&1 | FileCheck %s
-
-__attribute__((target("no-mma")))
-void test_mma(unsigned char *vdmrp, unsigned char *vpp, vector unsigned char vc) {
-  __dmr1024 vdmr = *((__dmr1024 *)vdmrp);
-  __vector_pair vp = *((__vector_pair *)vpp);
-  __builtin_mma_dmsetdmrz(&vdmr);
-  __builtin_mma_dmmr(&vdmr, (__dmr1024*)vpp);
-  __builtin_mma_dmxor(&vdmr, (__dmr1024*)vpp);
-
-// CHECK: error: '__builtin_mma_dmsetdmrz' needs target feature mma,isa-future-instructions
-// CHECK: error: '__builtin_mma_dmmr' needs target feature mma,isa-future-instructions
-// CHECK: error: '__builtin_mma_dmxor' needs target feature mma,isa-future-instructions
-}
diff --git a/clang/test/CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c b/clang/test/CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c
index c022746..5a92d6e 100644
--- a/clang/test/CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c
+++ b/clang/test/CodeGen/PowerPC/ppc-dmf-mma-builtin-err.c
@@ -1,3 +1,5 @@
+// RUN: not %clang_cc1 -triple powerpc64le-unknown-linux-gnu -target-cpu pwr10 \
+// RUN:   %s -emit-llvm-only 2>&1 | FileCheck %s
 // RUN: not %clang_cc1 -triple powerpc64le-unknown-linux-gnu -target-cpu future \
 // RUN:   %s -emit-llvm-only 2>&1 | FileCheck %s
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vcompress.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vcompress.c
new file mode 100644
index 0000000..6cff3df
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vcompress.c
@@ -0,0 +1,74 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \
+// RUN:   -target-feature +zvfbfmin -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x bfloat> @test_vcompress_vm_bf16mf4(
+// CHECK-RV64-SAME: <vscale x 1 x bfloat> [[VS2:%.*]], <vscale x 1 x i1> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x bfloat> @llvm.riscv.vcompress.nxv1bf16.i64(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> [[VS2]], <vscale x 1 x i1> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 1 x bfloat> [[TMP0]]
+//
+vbfloat16mf4_t test_vcompress_vm_bf16mf4(vbfloat16mf4_t vs2, vbool64_t vs1,
+                                         size_t vl) {
+  return __riscv_vcompress_vm_bf16mf4(vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x bfloat> @test_vcompress_vm_bf16mf2(
+// CHECK-RV64-SAME: <vscale x 2 x bfloat> [[VS2:%.*]], <vscale x 2 x i1> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x bfloat> @llvm.riscv.vcompress.nxv2bf16.i64(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> [[VS2]], <vscale x 2 x i1> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 2 x bfloat> [[TMP0]]
+//
+vbfloat16mf2_t test_vcompress_vm_bf16mf2(vbfloat16mf2_t vs2, vbool32_t vs1,
+                                         size_t vl) {
+  return __riscv_vcompress_vm_bf16mf2(vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x bfloat> @test_vcompress_vm_bf16m1(
+// CHECK-RV64-SAME: <vscale x 4 x bfloat> [[VS2:%.*]], <vscale x 4 x i1> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x bfloat> @llvm.riscv.vcompress.nxv4bf16.i64(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> [[VS2]], <vscale x 4 x i1> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 4 x bfloat> [[TMP0]]
+//
+vbfloat16m1_t test_vcompress_vm_bf16m1(vbfloat16m1_t vs2, vbool16_t vs1,
+                                       size_t vl) {
+  return __riscv_vcompress_vm_bf16m1(vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x bfloat> @test_vcompress_vm_bf16m2(
+// CHECK-RV64-SAME: <vscale x 8 x bfloat> [[VS2:%.*]], <vscale x 8 x i1> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x bfloat> @llvm.riscv.vcompress.nxv8bf16.i64(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> [[VS2]], <vscale x 8 x i1> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+vbfloat16m2_t test_vcompress_vm_bf16m2(vbfloat16m2_t vs2, vbool8_t vs1,
+                                       size_t vl) {
+  return __riscv_vcompress_vm_bf16m2(vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x bfloat> @test_vcompress_vm_bf16m4(
+// CHECK-RV64-SAME: <vscale x 16 x bfloat> [[VS2:%.*]], <vscale x 16 x i1> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x bfloat> @llvm.riscv.vcompress.nxv16bf16.i64(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> [[VS2]], <vscale x 16 x i1> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 16 x bfloat> [[TMP0]]
+//
+vbfloat16m4_t test_vcompress_vm_bf16m4(vbfloat16m4_t vs2, vbool4_t vs1,
+                                       size_t vl) {
+  return __riscv_vcompress_vm_bf16m4(vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x bfloat> @test_vcompress_vm_bf16m8(
+// CHECK-RV64-SAME: <vscale x 32 x bfloat> [[VS2:%.*]], <vscale x 32 x i1> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x bfloat> @llvm.riscv.vcompress.nxv32bf16.i64(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> [[VS2]], <vscale x 32 x i1> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 32 x bfloat> [[TMP0]]
+//
+vbfloat16m8_t test_vcompress_vm_bf16m8(vbfloat16m8_t vs2, vbool2_t vs1,
+                                       size_t vl) {
+  return __riscv_vcompress_vm_bf16m8(vs2, vs1, vl);
+}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vrgather.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vrgather.c
new file mode 100644
index 0000000..cb0004f
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/non-overloaded/vrgather.c
@@ -0,0 +1,272 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \
+// RUN:   -target-feature +zvfbfmin -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x bfloat> @test_vrgather_vv_bf16mf4(
+// CHECK-RV64-SAME: <vscale x 1 x bfloat> [[VS2:%.*]], <vscale x 1 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x bfloat> @llvm.riscv.vrgather.vv.nxv1bf16.i64(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> [[VS2]], <vscale x 1 x i16> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 1 x bfloat> [[TMP0]]
+//
+vbfloat16mf4_t test_vrgather_vv_bf16mf4(vbfloat16mf4_t vs2, vuint16mf4_t vs1,
+                                        size_t vl) {
+  return __riscv_vrgather_vv_bf16mf4(vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x bfloat> @test_vrgather_vx_bf16mf4(
+// CHECK-RV64-SAME: <vscale x 1 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x bfloat> @llvm.riscv.vrgather.vx.nxv1bf16.i64(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> [[VS2]], i64 [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 1 x bfloat> [[TMP0]]
+//
+vbfloat16mf4_t test_vrgather_vx_bf16mf4(vbfloat16mf4_t vs2, size_t vs1,
+                                        size_t vl) {
+  return __riscv_vrgather_vx_bf16mf4(vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x bfloat> @test_vrgather_vv_bf16mf2(
+// CHECK-RV64-SAME: <vscale x 2 x bfloat> [[VS2:%.*]], <vscale x 2 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x bfloat> @llvm.riscv.vrgather.vv.nxv2bf16.i64(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> [[VS2]], <vscale x 2 x i16> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 2 x bfloat> [[TMP0]]
+//
+vbfloat16mf2_t test_vrgather_vv_bf16mf2(vbfloat16mf2_t vs2, vuint16mf2_t vs1,
+                                        size_t vl) {
+  return __riscv_vrgather_vv_bf16mf2(vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x bfloat> @test_vrgather_vx_bf16mf2(
+// CHECK-RV64-SAME: <vscale x 2 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x bfloat> @llvm.riscv.vrgather.vx.nxv2bf16.i64(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> [[VS2]], i64 [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 2 x bfloat> [[TMP0]]
+//
+vbfloat16mf2_t test_vrgather_vx_bf16mf2(vbfloat16mf2_t vs2, size_t vs1,
+                                        size_t vl) {
+  return __riscv_vrgather_vx_bf16mf2(vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x bfloat> @test_vrgather_vv_bf16m1(
+// CHECK-RV64-SAME: <vscale x 4 x bfloat> [[VS2:%.*]], <vscale x 4 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x bfloat> @llvm.riscv.vrgather.vv.nxv4bf16.i64(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> [[VS2]], <vscale x 4 x i16> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 4 x bfloat> [[TMP0]]
+//
+vbfloat16m1_t test_vrgather_vv_bf16m1(vbfloat16m1_t vs2, vuint16m1_t vs1,
+                                      size_t vl) {
+  return __riscv_vrgather_vv_bf16m1(vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x bfloat> @test_vrgather_vx_bf16m1(
+// CHECK-RV64-SAME: <vscale x 4 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x bfloat> @llvm.riscv.vrgather.vx.nxv4bf16.i64(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> [[VS2]], i64 [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 4 x bfloat> [[TMP0]]
+//
+vbfloat16m1_t test_vrgather_vx_bf16m1(vbfloat16m1_t vs2, size_t vs1,
+                                      size_t vl) {
+  return __riscv_vrgather_vx_bf16m1(vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x bfloat> @test_vrgather_vv_bf16m2(
+// CHECK-RV64-SAME: <vscale x 8 x bfloat> [[VS2:%.*]], <vscale x 8 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x bfloat> @llvm.riscv.vrgather.vv.nxv8bf16.i64(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> [[VS2]], <vscale x 8 x i16> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+vbfloat16m2_t test_vrgather_vv_bf16m2(vbfloat16m2_t vs2, vuint16m2_t vs1,
+                                      size_t vl) {
+  return __riscv_vrgather_vv_bf16m2(vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x bfloat> @test_vrgather_vx_bf16m2(
+// CHECK-RV64-SAME: <vscale x 8 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x bfloat> @llvm.riscv.vrgather.vx.nxv8bf16.i64(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> [[VS2]], i64 [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+vbfloat16m2_t test_vrgather_vx_bf16m2(vbfloat16m2_t vs2, size_t vs1,
+                                      size_t vl) {
+  return __riscv_vrgather_vx_bf16m2(vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x bfloat> @test_vrgather_vv_bf16m4(
+// CHECK-RV64-SAME: <vscale x 16 x bfloat> [[VS2:%.*]], <vscale x 16 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x bfloat> @llvm.riscv.vrgather.vv.nxv16bf16.i64(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> [[VS2]], <vscale x 16 x i16> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 16 x bfloat> [[TMP0]]
+//
+vbfloat16m4_t test_vrgather_vv_bf16m4(vbfloat16m4_t vs2, vuint16m4_t vs1,
+                                      size_t vl) {
+  return __riscv_vrgather_vv_bf16m4(vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x bfloat> @test_vrgather_vx_bf16m4(
+// CHECK-RV64-SAME: <vscale x 16 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x bfloat> @llvm.riscv.vrgather.vx.nxv16bf16.i64(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> [[VS2]], i64 [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 16 x bfloat> [[TMP0]]
+//
+vbfloat16m4_t test_vrgather_vx_bf16m4(vbfloat16m4_t vs2, size_t vs1,
+                                      size_t vl) {
+  return __riscv_vrgather_vx_bf16m4(vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x bfloat> @test_vrgather_vv_bf16m8(
+// CHECK-RV64-SAME: <vscale x 32 x bfloat> [[VS2:%.*]], <vscale x 32 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x bfloat> @llvm.riscv.vrgather.vv.nxv32bf16.i64(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> [[VS2]], <vscale x 32 x i16> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 32 x bfloat> [[TMP0]]
+//
+vbfloat16m8_t test_vrgather_vv_bf16m8(vbfloat16m8_t vs2, vuint16m8_t vs1,
+                                      size_t vl) {
+  return __riscv_vrgather_vv_bf16m8(vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x bfloat> @test_vrgather_vx_bf16m8(
+// CHECK-RV64-SAME: <vscale x 32 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x bfloat> @llvm.riscv.vrgather.vx.nxv32bf16.i64(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> [[VS2]], i64 [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 32 x bfloat> [[TMP0]]
+//
+vbfloat16m8_t test_vrgather_vx_bf16m8(vbfloat16m8_t vs2, size_t vs1,
+                                      size_t vl) {
+  return __riscv_vrgather_vx_bf16m8(vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x bfloat> @test_vrgather_vv_bf16mf4_m(
+// CHECK-RV64-SAME: <vscale x 1 x i1> [[VM:%.*]], <vscale x 1 x bfloat> [[VS2:%.*]], <vscale x 1 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv1bf16.i64(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> [[VS2]], <vscale x 1 x i16> [[VS1]], <vscale x 1 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x bfloat> [[TMP0]]
+//
+vbfloat16mf4_t test_vrgather_vv_bf16mf4_m(vbool64_t vm, vbfloat16mf4_t vs2,
+                                          vuint16mf4_t vs1, size_t vl) {
+  return __riscv_vrgather_vv_bf16mf4_m(vm, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x bfloat> @test_vrgather_vx_bf16mf4_m(
+// CHECK-RV64-SAME: <vscale x 1 x i1> [[VM:%.*]], <vscale x 1 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv1bf16.i64(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 1 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x bfloat> [[TMP0]]
+//
+vbfloat16mf4_t test_vrgather_vx_bf16mf4_m(vbool64_t vm, vbfloat16mf4_t vs2,
+                                          size_t vs1, size_t vl) {
+  return __riscv_vrgather_vx_bf16mf4_m(vm, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x bfloat> @test_vrgather_vv_bf16mf2_m(
+// CHECK-RV64-SAME: <vscale x 2 x i1> [[VM:%.*]], <vscale x 2 x bfloat> [[VS2:%.*]], <vscale x 2 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv2bf16.i64(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> [[VS2]], <vscale x 2 x i16> [[VS1]], <vscale x 2 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x bfloat> [[TMP0]]
+//
+vbfloat16mf2_t test_vrgather_vv_bf16mf2_m(vbool32_t vm, vbfloat16mf2_t vs2,
+                                          vuint16mf2_t vs1, size_t vl) {
+  return __riscv_vrgather_vv_bf16mf2_m(vm, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x bfloat> @test_vrgather_vx_bf16mf2_m(
+// CHECK-RV64-SAME: <vscale x 2 x i1> [[VM:%.*]], <vscale x 2 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv2bf16.i64(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 2 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x bfloat> [[TMP0]]
+//
+vbfloat16mf2_t test_vrgather_vx_bf16mf2_m(vbool32_t vm, vbfloat16mf2_t vs2,
+                                          size_t vs1, size_t vl) {
+  return __riscv_vrgather_vx_bf16mf2_m(vm, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x bfloat> @test_vrgather_vv_bf16m1_m(
+// CHECK-RV64-SAME: <vscale x 4 x i1> [[VM:%.*]], <vscale x 4 x bfloat> [[VS2:%.*]], <vscale x 4 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv4bf16.i64(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> [[VS2]], <vscale x 4 x i16> [[VS1]], <vscale x 4 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x bfloat> [[TMP0]]
+//
+vbfloat16m1_t test_vrgather_vv_bf16m1_m(vbool16_t vm, vbfloat16m1_t vs2,
+                                        vuint16m1_t vs1, size_t vl) {
+  return __riscv_vrgather_vv_bf16m1_m(vm, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x bfloat> @test_vrgather_vx_bf16m1_m(
+// CHECK-RV64-SAME: <vscale x 4 x i1> [[VM:%.*]], <vscale x 4 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv4bf16.i64(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 4 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x bfloat> [[TMP0]]
+//
+vbfloat16m1_t test_vrgather_vx_bf16m1_m(vbool16_t vm, vbfloat16m1_t vs2,
+                                        size_t vs1, size_t vl) {
+  return __riscv_vrgather_vx_bf16m1_m(vm, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x bfloat> @test_vrgather_vv_bf16m2_m(
+// CHECK-RV64-SAME: <vscale x 8 x i1> [[VM:%.*]], <vscale x 8 x bfloat> [[VS2:%.*]], <vscale x 8 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv8bf16.i64(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> [[VS2]], <vscale x 8 x i16> [[VS1]], <vscale x 8 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+vbfloat16m2_t test_vrgather_vv_bf16m2_m(vbool8_t vm, vbfloat16m2_t vs2,
+                                        vuint16m2_t vs1, size_t vl) {
+  return __riscv_vrgather_vv_bf16m2_m(vm, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x bfloat> @test_vrgather_vx_bf16m2_m(
+// CHECK-RV64-SAME: <vscale x 8 x i1> [[VM:%.*]], <vscale x 8 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv8bf16.i64(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 8 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+vbfloat16m2_t test_vrgather_vx_bf16m2_m(vbool8_t vm, vbfloat16m2_t vs2,
+                                        size_t vs1, size_t vl) {
+  return __riscv_vrgather_vx_bf16m2_m(vm, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x bfloat> @test_vrgather_vv_bf16m4_m(
+// CHECK-RV64-SAME: <vscale x 16 x i1> [[VM:%.*]], <vscale x 16 x bfloat> [[VS2:%.*]], <vscale x 16 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv16bf16.i64(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> [[VS2]], <vscale x 16 x i16> [[VS1]], <vscale x 16 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x bfloat> [[TMP0]]
+//
+vbfloat16m4_t test_vrgather_vv_bf16m4_m(vbool4_t vm, vbfloat16m4_t vs2,
+                                        vuint16m4_t vs1, size_t vl) {
+  return __riscv_vrgather_vv_bf16m4_m(vm, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x bfloat> @test_vrgather_vx_bf16m4_m(
+// CHECK-RV64-SAME: <vscale x 16 x i1> [[VM:%.*]], <vscale x 16 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv16bf16.i64(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 16 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x bfloat> [[TMP0]]
+//
+vbfloat16m4_t test_vrgather_vx_bf16m4_m(vbool4_t vm, vbfloat16m4_t vs2,
+                                        size_t vs1, size_t vl) {
+  return __riscv_vrgather_vx_bf16m4_m(vm, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x bfloat> @test_vrgather_vv_bf16m8_m(
+// CHECK-RV64-SAME: <vscale x 32 x i1> [[VM:%.*]], <vscale x 32 x bfloat> [[VS2:%.*]], <vscale x 32 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv32bf16.i64(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> [[VS2]], <vscale x 32 x i16> [[VS1]], <vscale x 32 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 32 x bfloat> [[TMP0]]
+//
+vbfloat16m8_t test_vrgather_vv_bf16m8_m(vbool2_t vm, vbfloat16m8_t vs2,
+                                        vuint16m8_t vs1, size_t vl) {
+  return __riscv_vrgather_vv_bf16m8_m(vm, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x bfloat> @test_vrgather_vx_bf16m8_m(
+// CHECK-RV64-SAME: <vscale x 32 x i1> [[VM:%.*]], <vscale x 32 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv32bf16.i64(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 32 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 32 x bfloat> [[TMP0]]
+//
+vbfloat16m8_t test_vrgather_vx_bf16m8_m(vbool2_t vm, vbfloat16m8_t vs2,
+                                        size_t vs1, size_t vl) {
+  return __riscv_vrgather_vx_bf16m8_m(vm, vs2, vs1, vl);
+}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vcompress.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vcompress.c
new file mode 100644
index 0000000..40de6fd
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vcompress.c
@@ -0,0 +1,74 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \
+// RUN:   -target-feature +zvfbfmin -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x bfloat> @test_vcompress_vm_bf16mf4(
+// CHECK-RV64-SAME: <vscale x 1 x bfloat> [[VS2:%.*]], <vscale x 1 x i1> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x bfloat> @llvm.riscv.vcompress.nxv1bf16.i64(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> [[VS2]], <vscale x 1 x i1> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 1 x bfloat> [[TMP0]]
+//
+vbfloat16mf4_t test_vcompress_vm_bf16mf4(vbfloat16mf4_t vs2, vbool64_t vs1,
+                                         size_t vl) {
+  return __riscv_vcompress(vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x bfloat> @test_vcompress_vm_bf16mf2(
+// CHECK-RV64-SAME: <vscale x 2 x bfloat> [[VS2:%.*]], <vscale x 2 x i1> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x bfloat> @llvm.riscv.vcompress.nxv2bf16.i64(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> [[VS2]], <vscale x 2 x i1> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 2 x bfloat> [[TMP0]]
+//
+vbfloat16mf2_t test_vcompress_vm_bf16mf2(vbfloat16mf2_t vs2, vbool32_t vs1,
+                                         size_t vl) {
+  return __riscv_vcompress(vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x bfloat> @test_vcompress_vm_bf16m1(
+// CHECK-RV64-SAME: <vscale x 4 x bfloat> [[VS2:%.*]], <vscale x 4 x i1> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x bfloat> @llvm.riscv.vcompress.nxv4bf16.i64(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> [[VS2]], <vscale x 4 x i1> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 4 x bfloat> [[TMP0]]
+//
+vbfloat16m1_t test_vcompress_vm_bf16m1(vbfloat16m1_t vs2, vbool16_t vs1,
+                                       size_t vl) {
+  return __riscv_vcompress(vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x bfloat> @test_vcompress_vm_bf16m2(
+// CHECK-RV64-SAME: <vscale x 8 x bfloat> [[VS2:%.*]], <vscale x 8 x i1> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x bfloat> @llvm.riscv.vcompress.nxv8bf16.i64(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> [[VS2]], <vscale x 8 x i1> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+vbfloat16m2_t test_vcompress_vm_bf16m2(vbfloat16m2_t vs2, vbool8_t vs1,
+                                       size_t vl) {
+  return __riscv_vcompress(vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x bfloat> @test_vcompress_vm_bf16m4(
+// CHECK-RV64-SAME: <vscale x 16 x bfloat> [[VS2:%.*]], <vscale x 16 x i1> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x bfloat> @llvm.riscv.vcompress.nxv16bf16.i64(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> [[VS2]], <vscale x 16 x i1> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 16 x bfloat> [[TMP0]]
+//
+vbfloat16m4_t test_vcompress_vm_bf16m4(vbfloat16m4_t vs2, vbool4_t vs1,
+                                       size_t vl) {
+  return __riscv_vcompress(vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x bfloat> @test_vcompress_vm_bf16m8(
+// CHECK-RV64-SAME: <vscale x 32 x bfloat> [[VS2:%.*]], <vscale x 32 x i1> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x bfloat> @llvm.riscv.vcompress.nxv32bf16.i64(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> [[VS2]], <vscale x 32 x i1> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 32 x bfloat> [[TMP0]]
+//
+vbfloat16m8_t test_vcompress_vm_bf16m8(vbfloat16m8_t vs2, vbool2_t vs1,
+                                       size_t vl) {
+  return __riscv_vcompress(vs2, vs1, vl);
+}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vrgather.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vrgather.c
new file mode 100644
index 0000000..068d849
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/non-policy/overloaded/vrgather.c
@@ -0,0 +1,272 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \
+// RUN:   -target-feature +zvfbfmin -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x bfloat> @test_vrgather_vv_bf16mf4(
+// CHECK-RV64-SAME: <vscale x 1 x bfloat> [[VS2:%.*]], <vscale x 1 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x bfloat> @llvm.riscv.vrgather.vv.nxv1bf16.i64(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> [[VS2]], <vscale x 1 x i16> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 1 x bfloat> [[TMP0]]
+//
+vbfloat16mf4_t test_vrgather_vv_bf16mf4(vbfloat16mf4_t vs2, vuint16mf4_t vs1,
+                                        size_t vl) {
+  return __riscv_vrgather(vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x bfloat> @test_vrgather_vx_bf16mf4(
+// CHECK-RV64-SAME: <vscale x 1 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x bfloat> @llvm.riscv.vrgather.vx.nxv1bf16.i64(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> [[VS2]], i64 [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 1 x bfloat> [[TMP0]]
+//
+vbfloat16mf4_t test_vrgather_vx_bf16mf4(vbfloat16mf4_t vs2, size_t vs1,
+                                        size_t vl) {
+  return __riscv_vrgather(vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x bfloat> @test_vrgather_vv_bf16mf2(
+// CHECK-RV64-SAME: <vscale x 2 x bfloat> [[VS2:%.*]], <vscale x 2 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x bfloat> @llvm.riscv.vrgather.vv.nxv2bf16.i64(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> [[VS2]], <vscale x 2 x i16> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 2 x bfloat> [[TMP0]]
+//
+vbfloat16mf2_t test_vrgather_vv_bf16mf2(vbfloat16mf2_t vs2, vuint16mf2_t vs1,
+                                        size_t vl) {
+  return __riscv_vrgather(vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x bfloat> @test_vrgather_vx_bf16mf2(
+// CHECK-RV64-SAME: <vscale x 2 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x bfloat> @llvm.riscv.vrgather.vx.nxv2bf16.i64(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> [[VS2]], i64 [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 2 x bfloat> [[TMP0]]
+//
+vbfloat16mf2_t test_vrgather_vx_bf16mf2(vbfloat16mf2_t vs2, size_t vs1,
+                                        size_t vl) {
+  return __riscv_vrgather(vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x bfloat> @test_vrgather_vv_bf16m1(
+// CHECK-RV64-SAME: <vscale x 4 x bfloat> [[VS2:%.*]], <vscale x 4 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x bfloat> @llvm.riscv.vrgather.vv.nxv4bf16.i64(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> [[VS2]], <vscale x 4 x i16> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 4 x bfloat> [[TMP0]]
+//
+vbfloat16m1_t test_vrgather_vv_bf16m1(vbfloat16m1_t vs2, vuint16m1_t vs1,
+                                      size_t vl) {
+  return __riscv_vrgather(vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x bfloat> @test_vrgather_vx_bf16m1(
+// CHECK-RV64-SAME: <vscale x 4 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x bfloat> @llvm.riscv.vrgather.vx.nxv4bf16.i64(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> [[VS2]], i64 [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 4 x bfloat> [[TMP0]]
+//
+vbfloat16m1_t test_vrgather_vx_bf16m1(vbfloat16m1_t vs2, size_t vs1,
+                                      size_t vl) {
+  return __riscv_vrgather(vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x bfloat> @test_vrgather_vv_bf16m2(
+// CHECK-RV64-SAME: <vscale x 8 x bfloat> [[VS2:%.*]], <vscale x 8 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x bfloat> @llvm.riscv.vrgather.vv.nxv8bf16.i64(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> [[VS2]], <vscale x 8 x i16> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+vbfloat16m2_t test_vrgather_vv_bf16m2(vbfloat16m2_t vs2, vuint16m2_t vs1,
+                                      size_t vl) {
+  return __riscv_vrgather(vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x bfloat> @test_vrgather_vx_bf16m2(
+// CHECK-RV64-SAME: <vscale x 8 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x bfloat> @llvm.riscv.vrgather.vx.nxv8bf16.i64(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> [[VS2]], i64 [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+vbfloat16m2_t test_vrgather_vx_bf16m2(vbfloat16m2_t vs2, size_t vs1,
+                                      size_t vl) {
+  return __riscv_vrgather(vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x bfloat> @test_vrgather_vv_bf16m4(
+// CHECK-RV64-SAME: <vscale x 16 x bfloat> [[VS2:%.*]], <vscale x 16 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x bfloat> @llvm.riscv.vrgather.vv.nxv16bf16.i64(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> [[VS2]], <vscale x 16 x i16> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 16 x bfloat> [[TMP0]]
+//
+vbfloat16m4_t test_vrgather_vv_bf16m4(vbfloat16m4_t vs2, vuint16m4_t vs1,
+                                      size_t vl) {
+  return __riscv_vrgather(vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x bfloat> @test_vrgather_vx_bf16m4(
+// CHECK-RV64-SAME: <vscale x 16 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x bfloat> @llvm.riscv.vrgather.vx.nxv16bf16.i64(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> [[VS2]], i64 [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 16 x bfloat> [[TMP0]]
+//
+vbfloat16m4_t test_vrgather_vx_bf16m4(vbfloat16m4_t vs2, size_t vs1,
+                                      size_t vl) {
+  return __riscv_vrgather(vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x bfloat> @test_vrgather_vv_bf16m8(
+// CHECK-RV64-SAME: <vscale x 32 x bfloat> [[VS2:%.*]], <vscale x 32 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x bfloat> @llvm.riscv.vrgather.vv.nxv32bf16.i64(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> [[VS2]], <vscale x 32 x i16> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 32 x bfloat> [[TMP0]]
+//
+vbfloat16m8_t test_vrgather_vv_bf16m8(vbfloat16m8_t vs2, vuint16m8_t vs1,
+                                      size_t vl) {
+  return __riscv_vrgather(vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x bfloat> @test_vrgather_vx_bf16m8(
+// CHECK-RV64-SAME: <vscale x 32 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x bfloat> @llvm.riscv.vrgather.vx.nxv32bf16.i64(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> [[VS2]], i64 [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 32 x bfloat> [[TMP0]]
+//
+vbfloat16m8_t test_vrgather_vx_bf16m8(vbfloat16m8_t vs2, size_t vs1,
+                                      size_t vl) {
+  return __riscv_vrgather(vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x bfloat> @test_vrgather_vv_bf16mf4_m(
+// CHECK-RV64-SAME: <vscale x 1 x i1> [[VM:%.*]], <vscale x 1 x bfloat> [[VS2:%.*]], <vscale x 1 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv1bf16.i64(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> [[VS2]], <vscale x 1 x i16> [[VS1]], <vscale x 1 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x bfloat> [[TMP0]]
+//
+vbfloat16mf4_t test_vrgather_vv_bf16mf4_m(vbool64_t vm, vbfloat16mf4_t vs2,
+                                          vuint16mf4_t vs1, size_t vl) {
+  return __riscv_vrgather(vm, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x bfloat> @test_vrgather_vx_bf16mf4_m(
+// CHECK-RV64-SAME: <vscale x 1 x i1> [[VM:%.*]], <vscale x 1 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv1bf16.i64(<vscale x 1 x bfloat> poison, <vscale x 1 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 1 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x bfloat> [[TMP0]]
+//
+vbfloat16mf4_t test_vrgather_vx_bf16mf4_m(vbool64_t vm, vbfloat16mf4_t vs2,
+                                          size_t vs1, size_t vl) {
+  return __riscv_vrgather(vm, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x bfloat> @test_vrgather_vv_bf16mf2_m(
+// CHECK-RV64-SAME: <vscale x 2 x i1> [[VM:%.*]], <vscale x 2 x bfloat> [[VS2:%.*]], <vscale x 2 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv2bf16.i64(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> [[VS2]], <vscale x 2 x i16> [[VS1]], <vscale x 2 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x bfloat> [[TMP0]]
+//
+vbfloat16mf2_t test_vrgather_vv_bf16mf2_m(vbool32_t vm, vbfloat16mf2_t vs2,
+                                          vuint16mf2_t vs1, size_t vl) {
+  return __riscv_vrgather(vm, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x bfloat> @test_vrgather_vx_bf16mf2_m(
+// CHECK-RV64-SAME: <vscale x 2 x i1> [[VM:%.*]], <vscale x 2 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv2bf16.i64(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 2 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x bfloat> [[TMP0]]
+//
+vbfloat16mf2_t test_vrgather_vx_bf16mf2_m(vbool32_t vm, vbfloat16mf2_t vs2,
+                                          size_t vs1, size_t vl) {
+  return __riscv_vrgather(vm, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x bfloat> @test_vrgather_vv_bf16m1_m(
+// CHECK-RV64-SAME: <vscale x 4 x i1> [[VM:%.*]], <vscale x 4 x bfloat> [[VS2:%.*]], <vscale x 4 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv4bf16.i64(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> [[VS2]], <vscale x 4 x i16> [[VS1]], <vscale x 4 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x bfloat> [[TMP0]]
+//
+vbfloat16m1_t test_vrgather_vv_bf16m1_m(vbool16_t vm, vbfloat16m1_t vs2,
+                                        vuint16m1_t vs1, size_t vl) {
+  return __riscv_vrgather(vm, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x bfloat> @test_vrgather_vx_bf16m1_m(
+// CHECK-RV64-SAME: <vscale x 4 x i1> [[VM:%.*]], <vscale x 4 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv4bf16.i64(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 4 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x bfloat> [[TMP0]]
+//
+vbfloat16m1_t test_vrgather_vx_bf16m1_m(vbool16_t vm, vbfloat16m1_t vs2,
+                                        size_t vs1, size_t vl) {
+  return __riscv_vrgather(vm, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x bfloat> @test_vrgather_vv_bf16m2_m(
+// CHECK-RV64-SAME: <vscale x 8 x i1> [[VM:%.*]], <vscale x 8 x bfloat> [[VS2:%.*]], <vscale x 8 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv8bf16.i64(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> [[VS2]], <vscale x 8 x i16> [[VS1]], <vscale x 8 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+vbfloat16m2_t test_vrgather_vv_bf16m2_m(vbool8_t vm, vbfloat16m2_t vs2,
+                                        vuint16m2_t vs1, size_t vl) {
+  return __riscv_vrgather(vm, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x bfloat> @test_vrgather_vx_bf16m2_m(
+// CHECK-RV64-SAME: <vscale x 8 x i1> [[VM:%.*]], <vscale x 8 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv8bf16.i64(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 8 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+vbfloat16m2_t test_vrgather_vx_bf16m2_m(vbool8_t vm, vbfloat16m2_t vs2,
+                                        size_t vs1, size_t vl) {
+  return __riscv_vrgather(vm, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x bfloat> @test_vrgather_vv_bf16m4_m(
+// CHECK-RV64-SAME: <vscale x 16 x i1> [[VM:%.*]], <vscale x 16 x bfloat> [[VS2:%.*]], <vscale x 16 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv16bf16.i64(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> [[VS2]], <vscale x 16 x i16> [[VS1]], <vscale x 16 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x bfloat> [[TMP0]]
+//
+vbfloat16m4_t test_vrgather_vv_bf16m4_m(vbool4_t vm, vbfloat16m4_t vs2,
+                                        vuint16m4_t vs1, size_t vl) {
+  return __riscv_vrgather(vm, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x bfloat> @test_vrgather_vx_bf16m4_m(
+// CHECK-RV64-SAME: <vscale x 16 x i1> [[VM:%.*]], <vscale x 16 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv16bf16.i64(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 16 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x bfloat> [[TMP0]]
+//
+vbfloat16m4_t test_vrgather_vx_bf16m4_m(vbool4_t vm, vbfloat16m4_t vs2,
+                                        size_t vs1, size_t vl) {
+  return __riscv_vrgather(vm, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x bfloat> @test_vrgather_vv_bf16m8_m(
+// CHECK-RV64-SAME: <vscale x 32 x i1> [[VM:%.*]], <vscale x 32 x bfloat> [[VS2:%.*]], <vscale x 32 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv32bf16.i64(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> [[VS2]], <vscale x 32 x i16> [[VS1]], <vscale x 32 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 32 x bfloat> [[TMP0]]
+//
+vbfloat16m8_t test_vrgather_vv_bf16m8_m(vbool2_t vm, vbfloat16m8_t vs2,
+                                        vuint16m8_t vs1, size_t vl) {
+  return __riscv_vrgather(vm, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x bfloat> @test_vrgather_vx_bf16m8_m(
+// CHECK-RV64-SAME: <vscale x 32 x i1> [[VM:%.*]], <vscale x 32 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv32bf16.i64(<vscale x 32 x bfloat> poison, <vscale x 32 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 32 x i1> [[VM]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 32 x bfloat> [[TMP0]]
+//
+vbfloat16m8_t test_vrgather_vx_bf16m8_m(vbool2_t vm, vbfloat16m8_t vs2,
+                                        size_t vs1, size_t vl) {
+  return __riscv_vrgather(vm, vs2, vs1, vl);
+}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vcompress.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vcompress.c
new file mode 100644
index 0000000..90160c8
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vcompress.c
@@ -0,0 +1,68 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \
+// RUN:   -target-feature +zvfbfmin -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x bfloat> @test_vcompress_vm_bf16mf4_tu(
+// CHECK-RV64-SAME: <vscale x 1 x bfloat> [[VD:%.*]], <vscale x 1 x bfloat> [[VS2:%.*]], <vscale x 1 x i1> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x bfloat> @llvm.riscv.vcompress.nxv1bf16.i64(<vscale x 1 x bfloat> [[VD]], <vscale x 1 x bfloat> [[VS2]], <vscale x 1 x i1> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 1 x bfloat> [[TMP0]]
+//
+vbfloat16mf4_t test_vcompress_vm_bf16mf4_tu(vbfloat16mf4_t vd, vbfloat16mf4_t vs2, vbool64_t vs1, size_t vl) {
+  return __riscv_vcompress_vm_bf16mf4_tu(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x bfloat> @test_vcompress_vm_bf16mf2_tu(
+// CHECK-RV64-SAME: <vscale x 2 x bfloat> [[VD:%.*]], <vscale x 2 x bfloat> [[VS2:%.*]], <vscale x 2 x i1> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x bfloat> @llvm.riscv.vcompress.nxv2bf16.i64(<vscale x 2 x bfloat> [[VD]], <vscale x 2 x bfloat> [[VS2]], <vscale x 2 x i1> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 2 x bfloat> [[TMP0]]
+//
+vbfloat16mf2_t test_vcompress_vm_bf16mf2_tu(vbfloat16mf2_t vd, vbfloat16mf2_t vs2, vbool32_t vs1, size_t vl) {
+  return __riscv_vcompress_vm_bf16mf2_tu(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x bfloat> @test_vcompress_vm_bf16m1_tu(
+// CHECK-RV64-SAME: <vscale x 4 x bfloat> [[VD:%.*]], <vscale x 4 x bfloat> [[VS2:%.*]], <vscale x 4 x i1> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x bfloat> @llvm.riscv.vcompress.nxv4bf16.i64(<vscale x 4 x bfloat> [[VD]], <vscale x 4 x bfloat> [[VS2]], <vscale x 4 x i1> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 4 x bfloat> [[TMP0]]
+//
+vbfloat16m1_t test_vcompress_vm_bf16m1_tu(vbfloat16m1_t vd, vbfloat16m1_t vs2, vbool16_t vs1, size_t vl) {
+  return __riscv_vcompress_vm_bf16m1_tu(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x bfloat> @test_vcompress_vm_bf16m2_tu(
+// CHECK-RV64-SAME: <vscale x 8 x bfloat> [[VD:%.*]], <vscale x 8 x bfloat> [[VS2:%.*]], <vscale x 8 x i1> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x bfloat> @llvm.riscv.vcompress.nxv8bf16.i64(<vscale x 8 x bfloat> [[VD]], <vscale x 8 x bfloat> [[VS2]], <vscale x 8 x i1> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+vbfloat16m2_t test_vcompress_vm_bf16m2_tu(vbfloat16m2_t vd, vbfloat16m2_t vs2, vbool8_t vs1, size_t vl) {
+  return __riscv_vcompress_vm_bf16m2_tu(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x bfloat> @test_vcompress_vm_bf16m4_tu(
+// CHECK-RV64-SAME: <vscale x 16 x bfloat> [[VD:%.*]], <vscale x 16 x bfloat> [[VS2:%.*]], <vscale x 16 x i1> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x bfloat> @llvm.riscv.vcompress.nxv16bf16.i64(<vscale x 16 x bfloat> [[VD]], <vscale x 16 x bfloat> [[VS2]], <vscale x 16 x i1> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 16 x bfloat> [[TMP0]]
+//
+vbfloat16m4_t test_vcompress_vm_bf16m4_tu(vbfloat16m4_t vd, vbfloat16m4_t vs2, vbool4_t vs1, size_t vl) {
+  return __riscv_vcompress_vm_bf16m4_tu(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x bfloat> @test_vcompress_vm_bf16m8_tu(
+// CHECK-RV64-SAME: <vscale x 32 x bfloat> [[VD:%.*]], <vscale x 32 x bfloat> [[VS2:%.*]], <vscale x 32 x i1> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x bfloat> @llvm.riscv.vcompress.nxv32bf16.i64(<vscale x 32 x bfloat> [[VD]], <vscale x 32 x bfloat> [[VS2]], <vscale x 32 x i1> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 32 x bfloat> [[TMP0]]
+//
+vbfloat16m8_t test_vcompress_vm_bf16m8_tu(vbfloat16m8_t vd, vbfloat16m8_t vs2, vbool2_t vs1, size_t vl) {
+  return __riscv_vcompress_vm_bf16m8_tu(vd, vs2, vs1, vl);
+}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vrgather.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vrgather.c
new file mode 100644
index 0000000..137ab17
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/non-overloaded/vrgather.c
@@ -0,0 +1,488 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \
+// RUN:   -target-feature +zvfbfmin -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x bfloat> @test_vrgather_vv_bf16mf4_tu(
+// CHECK-RV64-SAME: <vscale x 1 x bfloat> [[VD:%.*]], <vscale x 1 x bfloat> [[VS2:%.*]], <vscale x 1 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x bfloat> @llvm.riscv.vrgather.vv.nxv1bf16.i64(<vscale x 1 x bfloat> [[VD]], <vscale x 1 x bfloat> [[VS2]], <vscale x 1 x i16> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 1 x bfloat> [[TMP0]]
+//
+vbfloat16mf4_t test_vrgather_vv_bf16mf4_tu(vbfloat16mf4_t vd, vbfloat16mf4_t vs2, vuint16mf4_t vs1, size_t vl) {
+  return __riscv_vrgather_vv_bf16mf4_tu(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x bfloat> @test_vrgather_vx_bf16mf4_tu(
+// CHECK-RV64-SAME: <vscale x 1 x bfloat> [[VD:%.*]], <vscale x 1 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x bfloat> @llvm.riscv.vrgather.vx.nxv1bf16.i64(<vscale x 1 x bfloat> [[VD]], <vscale x 1 x bfloat> [[VS2]], i64 [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 1 x bfloat> [[TMP0]]
+//
+vbfloat16mf4_t test_vrgather_vx_bf16mf4_tu(vbfloat16mf4_t vd, vbfloat16mf4_t vs2, size_t vs1, size_t vl) {
+  return __riscv_vrgather_vx_bf16mf4_tu(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x bfloat> @test_vrgather_vv_bf16mf2_tu(
+// CHECK-RV64-SAME: <vscale x 2 x bfloat> [[VD:%.*]], <vscale x 2 x bfloat> [[VS2:%.*]], <vscale x 2 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x bfloat> @llvm.riscv.vrgather.vv.nxv2bf16.i64(<vscale x 2 x bfloat> [[VD]], <vscale x 2 x bfloat> [[VS2]], <vscale x 2 x i16> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 2 x bfloat> [[TMP0]]
+//
+vbfloat16mf2_t test_vrgather_vv_bf16mf2_tu(vbfloat16mf2_t vd, vbfloat16mf2_t vs2, vuint16mf2_t vs1, size_t vl) {
+  return __riscv_vrgather_vv_bf16mf2_tu(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x bfloat> @test_vrgather_vx_bf16mf2_tu(
+// CHECK-RV64-SAME: <vscale x 2 x bfloat> [[VD:%.*]], <vscale x 2 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x bfloat> @llvm.riscv.vrgather.vx.nxv2bf16.i64(<vscale x 2 x bfloat> [[VD]], <vscale x 2 x bfloat> [[VS2]], i64 [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 2 x bfloat> [[TMP0]]
+//
+vbfloat16mf2_t test_vrgather_vx_bf16mf2_tu(vbfloat16mf2_t vd, vbfloat16mf2_t vs2, size_t vs1, size_t vl) {
+  return __riscv_vrgather_vx_bf16mf2_tu(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x bfloat> @test_vrgather_vv_bf16m1_tu(
+// CHECK-RV64-SAME: <vscale x 4 x bfloat> [[VD:%.*]], <vscale x 4 x bfloat> [[VS2:%.*]], <vscale x 4 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x bfloat> @llvm.riscv.vrgather.vv.nxv4bf16.i64(<vscale x 4 x bfloat> [[VD]], <vscale x 4 x bfloat> [[VS2]], <vscale x 4 x i16> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 4 x bfloat> [[TMP0]]
+//
+vbfloat16m1_t test_vrgather_vv_bf16m1_tu(vbfloat16m1_t vd, vbfloat16m1_t vs2, vuint16m1_t vs1, size_t vl) {
+  return __riscv_vrgather_vv_bf16m1_tu(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x bfloat> @test_vrgather_vx_bf16m1_tu(
+// CHECK-RV64-SAME: <vscale x 4 x bfloat> [[VD:%.*]], <vscale x 4 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x bfloat> @llvm.riscv.vrgather.vx.nxv4bf16.i64(<vscale x 4 x bfloat> [[VD]], <vscale x 4 x bfloat> [[VS2]], i64 [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 4 x bfloat> [[TMP0]]
+//
+vbfloat16m1_t test_vrgather_vx_bf16m1_tu(vbfloat16m1_t vd, vbfloat16m1_t vs2, size_t vs1, size_t vl) {
+  return __riscv_vrgather_vx_bf16m1_tu(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x bfloat> @test_vrgather_vv_bf16m2_tu(
+// CHECK-RV64-SAME: <vscale x 8 x bfloat> [[VD:%.*]], <vscale x 8 x bfloat> [[VS2:%.*]], <vscale x 8 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x bfloat> @llvm.riscv.vrgather.vv.nxv8bf16.i64(<vscale x 8 x bfloat> [[VD]], <vscale x 8 x bfloat> [[VS2]], <vscale x 8 x i16> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+vbfloat16m2_t test_vrgather_vv_bf16m2_tu(vbfloat16m2_t vd, vbfloat16m2_t vs2, vuint16m2_t vs1, size_t vl) {
+  return __riscv_vrgather_vv_bf16m2_tu(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x bfloat> @test_vrgather_vx_bf16m2_tu(
+// CHECK-RV64-SAME: <vscale x 8 x bfloat> [[VD:%.*]], <vscale x 8 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x bfloat> @llvm.riscv.vrgather.vx.nxv8bf16.i64(<vscale x 8 x bfloat> [[VD]], <vscale x 8 x bfloat> [[VS2]], i64 [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+vbfloat16m2_t test_vrgather_vx_bf16m2_tu(vbfloat16m2_t vd, vbfloat16m2_t vs2, size_t vs1, size_t vl) {
+  return __riscv_vrgather_vx_bf16m2_tu(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x bfloat> @test_vrgather_vv_bf16m4_tu(
+// CHECK-RV64-SAME: <vscale x 16 x bfloat> [[VD:%.*]], <vscale x 16 x bfloat> [[VS2:%.*]], <vscale x 16 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x bfloat> @llvm.riscv.vrgather.vv.nxv16bf16.i64(<vscale x 16 x bfloat> [[VD]], <vscale x 16 x bfloat> [[VS2]], <vscale x 16 x i16> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 16 x bfloat> [[TMP0]]
+//
+vbfloat16m4_t test_vrgather_vv_bf16m4_tu(vbfloat16m4_t vd, vbfloat16m4_t vs2, vuint16m4_t vs1, size_t vl) {
+  return __riscv_vrgather_vv_bf16m4_tu(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x bfloat> @test_vrgather_vx_bf16m4_tu(
+// CHECK-RV64-SAME: <vscale x 16 x bfloat> [[VD:%.*]], <vscale x 16 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x bfloat> @llvm.riscv.vrgather.vx.nxv16bf16.i64(<vscale x 16 x bfloat> [[VD]], <vscale x 16 x bfloat> [[VS2]], i64 [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 16 x bfloat> [[TMP0]]
+//
+vbfloat16m4_t test_vrgather_vx_bf16m4_tu(vbfloat16m4_t vd, vbfloat16m4_t vs2, size_t vs1, size_t vl) {
+  return __riscv_vrgather_vx_bf16m4_tu(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x bfloat> @test_vrgather_vv_bf16m8_tu(
+// CHECK-RV64-SAME: <vscale x 32 x bfloat> [[VD:%.*]], <vscale x 32 x bfloat> [[VS2:%.*]], <vscale x 32 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x bfloat> @llvm.riscv.vrgather.vv.nxv32bf16.i64(<vscale x 32 x bfloat> [[VD]], <vscale x 32 x bfloat> [[VS2]], <vscale x 32 x i16> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 32 x bfloat> [[TMP0]]
+//
+vbfloat16m8_t test_vrgather_vv_bf16m8_tu(vbfloat16m8_t vd, vbfloat16m8_t vs2, vuint16m8_t vs1, size_t vl) {
+  return __riscv_vrgather_vv_bf16m8_tu(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x bfloat> @test_vrgather_vx_bf16m8_tu(
+// CHECK-RV64-SAME: <vscale x 32 x bfloat> [[VD:%.*]], <vscale x 32 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x bfloat> @llvm.riscv.vrgather.vx.nxv32bf16.i64(<vscale x 32 x bfloat> [[VD]], <vscale x 32 x bfloat> [[VS2]], i64 [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 32 x bfloat> [[TMP0]]
+//
+vbfloat16m8_t test_vrgather_vx_bf16m8_tu(vbfloat16m8_t vd, vbfloat16m8_t vs2, size_t vs1, size_t vl) {
+  return __riscv_vrgather_vx_bf16m8_tu(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x bfloat> @test_vrgather_vv_bf16mf4_tum(
+// CHECK-RV64-SAME: <vscale x 1 x i1> [[VM:%.*]], <vscale x 1 x bfloat> [[VD:%.*]], <vscale x 1 x bfloat> [[VS2:%.*]], <vscale x 1 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv1bf16.i64(<vscale x 1 x bfloat> [[VD]], <vscale x 1 x bfloat> [[VS2]], <vscale x 1 x i16> [[VS1]], <vscale x 1 x i1> [[VM]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x bfloat> [[TMP0]]
+//
+vbfloat16mf4_t test_vrgather_vv_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd, vbfloat16mf4_t vs2, vuint16mf4_t vs1, size_t vl) {
+  return __riscv_vrgather_vv_bf16mf4_tum(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x bfloat> @test_vrgather_vx_bf16mf4_tum(
+// CHECK-RV64-SAME: <vscale x 1 x i1> [[VM:%.*]], <vscale x 1 x bfloat> [[VD:%.*]], <vscale x 1 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv1bf16.i64(<vscale x 1 x bfloat> [[VD]], <vscale x 1 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 1 x i1> [[VM]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x bfloat> [[TMP0]]
+//
+vbfloat16mf4_t test_vrgather_vx_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd, vbfloat16mf4_t vs2, size_t vs1, size_t vl) {
+  return __riscv_vrgather_vx_bf16mf4_tum(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x bfloat> @test_vrgather_vv_bf16mf2_tum(
+// CHECK-RV64-SAME: <vscale x 2 x i1> [[VM:%.*]], <vscale x 2 x bfloat> [[VD:%.*]], <vscale x 2 x bfloat> [[VS2:%.*]], <vscale x 2 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv2bf16.i64(<vscale x 2 x bfloat> [[VD]], <vscale x 2 x bfloat> [[VS2]], <vscale x 2 x i16> [[VS1]], <vscale x 2 x i1> [[VM]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x bfloat> [[TMP0]]
+//
+vbfloat16mf2_t test_vrgather_vv_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd, vbfloat16mf2_t vs2, vuint16mf2_t vs1, size_t vl) {
+  return __riscv_vrgather_vv_bf16mf2_tum(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x bfloat> @test_vrgather_vx_bf16mf2_tum(
+// CHECK-RV64-SAME: <vscale x 2 x i1> [[VM:%.*]], <vscale x 2 x bfloat> [[VD:%.*]], <vscale x 2 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv2bf16.i64(<vscale x 2 x bfloat> [[VD]], <vscale x 2 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 2 x i1> [[VM]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x bfloat> [[TMP0]]
+//
+vbfloat16mf2_t test_vrgather_vx_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd, vbfloat16mf2_t vs2, size_t vs1, size_t vl) {
+  return __riscv_vrgather_vx_bf16mf2_tum(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x bfloat> @test_vrgather_vv_bf16m1_tum(
+// CHECK-RV64-SAME: <vscale x 4 x i1> [[VM:%.*]], <vscale x 4 x bfloat> [[VD:%.*]], <vscale x 4 x bfloat> [[VS2:%.*]], <vscale x 4 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv4bf16.i64(<vscale x 4 x bfloat> [[VD]], <vscale x 4 x bfloat> [[VS2]], <vscale x 4 x i16> [[VS1]], <vscale x 4 x i1> [[VM]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x bfloat> [[TMP0]]
+//
+vbfloat16m1_t test_vrgather_vv_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd, vbfloat16m1_t vs2, vuint16m1_t vs1, size_t vl) {
+  return __riscv_vrgather_vv_bf16m1_tum(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x bfloat> @test_vrgather_vx_bf16m1_tum(
+// CHECK-RV64-SAME: <vscale x 4 x i1> [[VM:%.*]], <vscale x 4 x bfloat> [[VD:%.*]], <vscale x 4 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv4bf16.i64(<vscale x 4 x bfloat> [[VD]], <vscale x 4 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 4 x i1> [[VM]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x bfloat> [[TMP0]]
+//
+vbfloat16m1_t test_vrgather_vx_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd, vbfloat16m1_t vs2, size_t vs1, size_t vl) {
+  return __riscv_vrgather_vx_bf16m1_tum(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x bfloat> @test_vrgather_vv_bf16m2_tum(
+// CHECK-RV64-SAME: <vscale x 8 x i1> [[VM:%.*]], <vscale x 8 x bfloat> [[VD:%.*]], <vscale x 8 x bfloat> [[VS2:%.*]], <vscale x 8 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv8bf16.i64(<vscale x 8 x bfloat> [[VD]], <vscale x 8 x bfloat> [[VS2]], <vscale x 8 x i16> [[VS1]], <vscale x 8 x i1> [[VM]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+vbfloat16m2_t test_vrgather_vv_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd, vbfloat16m2_t vs2, vuint16m2_t vs1, size_t vl) {
+  return __riscv_vrgather_vv_bf16m2_tum(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x bfloat> @test_vrgather_vx_bf16m2_tum(
+// CHECK-RV64-SAME: <vscale x 8 x i1> [[VM:%.*]], <vscale x 8 x bfloat> [[VD:%.*]], <vscale x 8 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv8bf16.i64(<vscale x 8 x bfloat> [[VD]], <vscale x 8 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 8 x i1> [[VM]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+vbfloat16m2_t test_vrgather_vx_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd, vbfloat16m2_t vs2, size_t vs1, size_t vl) {
+  return __riscv_vrgather_vx_bf16m2_tum(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x bfloat> @test_vrgather_vv_bf16m4_tum(
+// CHECK-RV64-SAME: <vscale x 16 x i1> [[VM:%.*]], <vscale x 16 x bfloat> [[VD:%.*]], <vscale x 16 x bfloat> [[VS2:%.*]], <vscale x 16 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv16bf16.i64(<vscale x 16 x bfloat> [[VD]], <vscale x 16 x bfloat> [[VS2]], <vscale x 16 x i16> [[VS1]], <vscale x 16 x i1> [[VM]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x bfloat> [[TMP0]]
+//
+vbfloat16m4_t test_vrgather_vv_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd, vbfloat16m4_t vs2, vuint16m4_t vs1, size_t vl) {
+  return __riscv_vrgather_vv_bf16m4_tum(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x bfloat> @test_vrgather_vx_bf16m4_tum(
+// CHECK-RV64-SAME: <vscale x 16 x i1> [[VM:%.*]], <vscale x 16 x bfloat> [[VD:%.*]], <vscale x 16 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv16bf16.i64(<vscale x 16 x bfloat> [[VD]], <vscale x 16 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 16 x i1> [[VM]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x bfloat> [[TMP0]]
+//
+vbfloat16m4_t test_vrgather_vx_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd, vbfloat16m4_t vs2, size_t vs1, size_t vl) {
+  return __riscv_vrgather_vx_bf16m4_tum(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x bfloat> @test_vrgather_vv_bf16m8_tum(
+// CHECK-RV64-SAME: <vscale x 32 x i1> [[VM:%.*]], <vscale x 32 x bfloat> [[VD:%.*]], <vscale x 32 x bfloat> [[VS2:%.*]], <vscale x 32 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv32bf16.i64(<vscale x 32 x bfloat> [[VD]], <vscale x 32 x bfloat> [[VS2]], <vscale x 32 x i16> [[VS1]], <vscale x 32 x i1> [[VM]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 32 x bfloat> [[TMP0]]
+//
+vbfloat16m8_t test_vrgather_vv_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd, vbfloat16m8_t vs2, vuint16m8_t vs1, size_t vl) {
+  return __riscv_vrgather_vv_bf16m8_tum(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x bfloat> @test_vrgather_vx_bf16m8_tum(
+// CHECK-RV64-SAME: <vscale x 32 x i1> [[VM:%.*]], <vscale x 32 x bfloat> [[VD:%.*]], <vscale x 32 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv32bf16.i64(<vscale x 32 x bfloat> [[VD]], <vscale x 32 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 32 x i1> [[VM]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 32 x bfloat> [[TMP0]]
+//
+vbfloat16m8_t test_vrgather_vx_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd, vbfloat16m8_t vs2, size_t vs1, size_t vl) {
+  return __riscv_vrgather_vx_bf16m8_tum(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x bfloat> @test_vrgather_vv_bf16mf4_tumu(
+// CHECK-RV64-SAME: <vscale x 1 x i1> [[VM:%.*]], <vscale x 1 x bfloat> [[VD:%.*]], <vscale x 1 x bfloat> [[VS2:%.*]], <vscale x 1 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv1bf16.i64(<vscale x 1 x bfloat> [[VD]], <vscale x 1 x bfloat> [[VS2]], <vscale x 1 x i16> [[VS1]], <vscale x 1 x i1> [[VM]], i64 [[VL]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x bfloat> [[TMP0]]
+//
+vbfloat16mf4_t test_vrgather_vv_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd, vbfloat16mf4_t vs2, vuint16mf4_t vs1, size_t vl) {
+  return __riscv_vrgather_vv_bf16mf4_tumu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x bfloat> @test_vrgather_vx_bf16mf4_tumu(
+// CHECK-RV64-SAME: <vscale x 1 x i1> [[VM:%.*]], <vscale x 1 x bfloat> [[VD:%.*]], <vscale x 1 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv1bf16.i64(<vscale x 1 x bfloat> [[VD]], <vscale x 1 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 1 x i1> [[VM]], i64 [[VL]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x bfloat> [[TMP0]]
+//
+vbfloat16mf4_t test_vrgather_vx_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd, vbfloat16mf4_t vs2, size_t vs1, size_t vl) {
+  return __riscv_vrgather_vx_bf16mf4_tumu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x bfloat> @test_vrgather_vv_bf16mf2_tumu(
+// CHECK-RV64-SAME: <vscale x 2 x i1> [[VM:%.*]], <vscale x 2 x bfloat> [[VD:%.*]], <vscale x 2 x bfloat> [[VS2:%.*]], <vscale x 2 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv2bf16.i64(<vscale x 2 x bfloat> [[VD]], <vscale x 2 x bfloat> [[VS2]], <vscale x 2 x i16> [[VS1]], <vscale x 2 x i1> [[VM]], i64 [[VL]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x bfloat> [[TMP0]]
+//
+vbfloat16mf2_t test_vrgather_vv_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd, vbfloat16mf2_t vs2, vuint16mf2_t vs1, size_t vl) {
+  return __riscv_vrgather_vv_bf16mf2_tumu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x bfloat> @test_vrgather_vx_bf16mf2_tumu(
+// CHECK-RV64-SAME: <vscale x 2 x i1> [[VM:%.*]], <vscale x 2 x bfloat> [[VD:%.*]], <vscale x 2 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv2bf16.i64(<vscale x 2 x bfloat> [[VD]], <vscale x 2 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 2 x i1> [[VM]], i64 [[VL]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x bfloat> [[TMP0]]
+//
+vbfloat16mf2_t test_vrgather_vx_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd, vbfloat16mf2_t vs2, size_t vs1, size_t vl) {
+  return __riscv_vrgather_vx_bf16mf2_tumu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x bfloat> @test_vrgather_vv_bf16m1_tumu(
+// CHECK-RV64-SAME: <vscale x 4 x i1> [[VM:%.*]], <vscale x 4 x bfloat> [[VD:%.*]], <vscale x 4 x bfloat> [[VS2:%.*]], <vscale x 4 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv4bf16.i64(<vscale x 4 x bfloat> [[VD]], <vscale x 4 x bfloat> [[VS2]], <vscale x 4 x i16> [[VS1]], <vscale x 4 x i1> [[VM]], i64 [[VL]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x bfloat> [[TMP0]]
+//
+vbfloat16m1_t test_vrgather_vv_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd, vbfloat16m1_t vs2, vuint16m1_t vs1, size_t vl) {
+  return __riscv_vrgather_vv_bf16m1_tumu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x bfloat> @test_vrgather_vx_bf16m1_tumu(
+// CHECK-RV64-SAME: <vscale x 4 x i1> [[VM:%.*]], <vscale x 4 x bfloat> [[VD:%.*]], <vscale x 4 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv4bf16.i64(<vscale x 4 x bfloat> [[VD]], <vscale x 4 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 4 x i1> [[VM]], i64 [[VL]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x bfloat> [[TMP0]]
+//
+vbfloat16m1_t test_vrgather_vx_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd, vbfloat16m1_t vs2, size_t vs1, size_t vl) {
+  return __riscv_vrgather_vx_bf16m1_tumu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x bfloat> @test_vrgather_vv_bf16m2_tumu(
+// CHECK-RV64-SAME: <vscale x 8 x i1> [[VM:%.*]], <vscale x 8 x bfloat> [[VD:%.*]], <vscale x 8 x bfloat> [[VS2:%.*]], <vscale x 8 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv8bf16.i64(<vscale x 8 x bfloat> [[VD]], <vscale x 8 x bfloat> [[VS2]], <vscale x 8 x i16> [[VS1]], <vscale x 8 x i1> [[VM]], i64 [[VL]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+vbfloat16m2_t test_vrgather_vv_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd, vbfloat16m2_t vs2, vuint16m2_t vs1, size_t vl) {
+  return __riscv_vrgather_vv_bf16m2_tumu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x bfloat> @test_vrgather_vx_bf16m2_tumu(
+// CHECK-RV64-SAME: <vscale x 8 x i1> [[VM:%.*]], <vscale x 8 x bfloat> [[VD:%.*]], <vscale x 8 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv8bf16.i64(<vscale x 8 x bfloat> [[VD]], <vscale x 8 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 8 x i1> [[VM]], i64 [[VL]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+vbfloat16m2_t test_vrgather_vx_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd, vbfloat16m2_t vs2, size_t vs1, size_t vl) {
+  return __riscv_vrgather_vx_bf16m2_tumu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x bfloat> @test_vrgather_vv_bf16m4_tumu(
+// CHECK-RV64-SAME: <vscale x 16 x i1> [[VM:%.*]], <vscale x 16 x bfloat> [[VD:%.*]], <vscale x 16 x bfloat> [[VS2:%.*]], <vscale x 16 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv16bf16.i64(<vscale x 16 x bfloat> [[VD]], <vscale x 16 x bfloat> [[VS2]], <vscale x 16 x i16> [[VS1]], <vscale x 16 x i1> [[VM]], i64 [[VL]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x bfloat> [[TMP0]]
+//
+vbfloat16m4_t test_vrgather_vv_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd, vbfloat16m4_t vs2, vuint16m4_t vs1, size_t vl) {
+  return __riscv_vrgather_vv_bf16m4_tumu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x bfloat> @test_vrgather_vx_bf16m4_tumu(
+// CHECK-RV64-SAME: <vscale x 16 x i1> [[VM:%.*]], <vscale x 16 x bfloat> [[VD:%.*]], <vscale x 16 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv16bf16.i64(<vscale x 16 x bfloat> [[VD]], <vscale x 16 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 16 x i1> [[VM]], i64 [[VL]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x bfloat> [[TMP0]]
+//
+vbfloat16m4_t test_vrgather_vx_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd, vbfloat16m4_t vs2, size_t vs1, size_t vl) {
+  return __riscv_vrgather_vx_bf16m4_tumu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x bfloat> @test_vrgather_vv_bf16m8_tumu(
+// CHECK-RV64-SAME: <vscale x 32 x i1> [[VM:%.*]], <vscale x 32 x bfloat> [[VD:%.*]], <vscale x 32 x bfloat> [[VS2:%.*]], <vscale x 32 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv32bf16.i64(<vscale x 32 x bfloat> [[VD]], <vscale x 32 x bfloat> [[VS2]], <vscale x 32 x i16> [[VS1]], <vscale x 32 x i1> [[VM]], i64 [[VL]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 32 x bfloat> [[TMP0]]
+//
+vbfloat16m8_t test_vrgather_vv_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd, vbfloat16m8_t vs2, vuint16m8_t vs1, size_t vl) {
+  return __riscv_vrgather_vv_bf16m8_tumu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x bfloat> @test_vrgather_vx_bf16m8_tumu(
+// CHECK-RV64-SAME: <vscale x 32 x i1> [[VM:%.*]], <vscale x 32 x bfloat> [[VD:%.*]], <vscale x 32 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv32bf16.i64(<vscale x 32 x bfloat> [[VD]], <vscale x 32 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 32 x i1> [[VM]], i64 [[VL]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 32 x bfloat> [[TMP0]]
+//
+vbfloat16m8_t test_vrgather_vx_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd, vbfloat16m8_t vs2, size_t vs1, size_t vl) {
+  return __riscv_vrgather_vx_bf16m8_tumu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x bfloat> @test_vrgather_vv_bf16mf4_mu(
+// CHECK-RV64-SAME: <vscale x 1 x i1> [[VM:%.*]], <vscale x 1 x bfloat> [[VD:%.*]], <vscale x 1 x bfloat> [[VS2:%.*]], <vscale x 1 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv1bf16.i64(<vscale x 1 x bfloat> [[VD]], <vscale x 1 x bfloat> [[VS2]], <vscale x 1 x i16> [[VS1]], <vscale x 1 x i1> [[VM]], i64 [[VL]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x bfloat> [[TMP0]]
+//
+vbfloat16mf4_t test_vrgather_vv_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd, vbfloat16mf4_t vs2, vuint16mf4_t vs1, size_t vl) {
+  return __riscv_vrgather_vv_bf16mf4_mu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x bfloat> @test_vrgather_vx_bf16mf4_mu(
+// CHECK-RV64-SAME: <vscale x 1 x i1> [[VM:%.*]], <vscale x 1 x bfloat> [[VD:%.*]], <vscale x 1 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv1bf16.i64(<vscale x 1 x bfloat> [[VD]], <vscale x 1 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 1 x i1> [[VM]], i64 [[VL]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x bfloat> [[TMP0]]
+//
+vbfloat16mf4_t test_vrgather_vx_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd, vbfloat16mf4_t vs2, size_t vs1, size_t vl) {
+  return __riscv_vrgather_vx_bf16mf4_mu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x bfloat> @test_vrgather_vv_bf16mf2_mu(
+// CHECK-RV64-SAME: <vscale x 2 x i1> [[VM:%.*]], <vscale x 2 x bfloat> [[VD:%.*]], <vscale x 2 x bfloat> [[VS2:%.*]], <vscale x 2 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv2bf16.i64(<vscale x 2 x bfloat> [[VD]], <vscale x 2 x bfloat> [[VS2]], <vscale x 2 x i16> [[VS1]], <vscale x 2 x i1> [[VM]], i64 [[VL]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x bfloat> [[TMP0]]
+//
+vbfloat16mf2_t test_vrgather_vv_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd, vbfloat16mf2_t vs2, vuint16mf2_t vs1, size_t vl) {
+  return __riscv_vrgather_vv_bf16mf2_mu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x bfloat> @test_vrgather_vx_bf16mf2_mu(
+// CHECK-RV64-SAME: <vscale x 2 x i1> [[VM:%.*]], <vscale x 2 x bfloat> [[VD:%.*]], <vscale x 2 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv2bf16.i64(<vscale x 2 x bfloat> [[VD]], <vscale x 2 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 2 x i1> [[VM]], i64 [[VL]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x bfloat> [[TMP0]]
+//
+vbfloat16mf2_t test_vrgather_vx_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd, vbfloat16mf2_t vs2, size_t vs1, size_t vl) {
+  return __riscv_vrgather_vx_bf16mf2_mu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x bfloat> @test_vrgather_vv_bf16m1_mu(
+// CHECK-RV64-SAME: <vscale x 4 x i1> [[VM:%.*]], <vscale x 4 x bfloat> [[VD:%.*]], <vscale x 4 x bfloat> [[VS2:%.*]], <vscale x 4 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv4bf16.i64(<vscale x 4 x bfloat> [[VD]], <vscale x 4 x bfloat> [[VS2]], <vscale x 4 x i16> [[VS1]], <vscale x 4 x i1> [[VM]], i64 [[VL]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x bfloat> [[TMP0]]
+//
+vbfloat16m1_t test_vrgather_vv_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd, vbfloat16m1_t vs2, vuint16m1_t vs1, size_t vl) {
+  return __riscv_vrgather_vv_bf16m1_mu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x bfloat> @test_vrgather_vx_bf16m1_mu(
+// CHECK-RV64-SAME: <vscale x 4 x i1> [[VM:%.*]], <vscale x 4 x bfloat> [[VD:%.*]], <vscale x 4 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv4bf16.i64(<vscale x 4 x bfloat> [[VD]], <vscale x 4 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 4 x i1> [[VM]], i64 [[VL]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x bfloat> [[TMP0]]
+//
+vbfloat16m1_t test_vrgather_vx_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd, vbfloat16m1_t vs2, size_t vs1, size_t vl) {
+  return __riscv_vrgather_vx_bf16m1_mu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x bfloat> @test_vrgather_vv_bf16m2_mu(
+// CHECK-RV64-SAME: <vscale x 8 x i1> [[VM:%.*]], <vscale x 8 x bfloat> [[VD:%.*]], <vscale x 8 x bfloat> [[VS2:%.*]], <vscale x 8 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv8bf16.i64(<vscale x 8 x bfloat> [[VD]], <vscale x 8 x bfloat> [[VS2]], <vscale x 8 x i16> [[VS1]], <vscale x 8 x i1> [[VM]], i64 [[VL]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+vbfloat16m2_t test_vrgather_vv_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd, vbfloat16m2_t vs2, vuint16m2_t vs1, size_t vl) {
+  return __riscv_vrgather_vv_bf16m2_mu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x bfloat> @test_vrgather_vx_bf16m2_mu(
+// CHECK-RV64-SAME: <vscale x 8 x i1> [[VM:%.*]], <vscale x 8 x bfloat> [[VD:%.*]], <vscale x 8 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv8bf16.i64(<vscale x 8 x bfloat> [[VD]], <vscale x 8 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 8 x i1> [[VM]], i64 [[VL]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+vbfloat16m2_t test_vrgather_vx_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd, vbfloat16m2_t vs2, size_t vs1, size_t vl) {
+  return __riscv_vrgather_vx_bf16m2_mu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x bfloat> @test_vrgather_vv_bf16m4_mu(
+// CHECK-RV64-SAME: <vscale x 16 x i1> [[VM:%.*]], <vscale x 16 x bfloat> [[VD:%.*]], <vscale x 16 x bfloat> [[VS2:%.*]], <vscale x 16 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv16bf16.i64(<vscale x 16 x bfloat> [[VD]], <vscale x 16 x bfloat> [[VS2]], <vscale x 16 x i16> [[VS1]], <vscale x 16 x i1> [[VM]], i64 [[VL]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x bfloat> [[TMP0]]
+//
+vbfloat16m4_t test_vrgather_vv_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd, vbfloat16m4_t vs2, vuint16m4_t vs1, size_t vl) {
+  return __riscv_vrgather_vv_bf16m4_mu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x bfloat> @test_vrgather_vx_bf16m4_mu(
+// CHECK-RV64-SAME: <vscale x 16 x i1> [[VM:%.*]], <vscale x 16 x bfloat> [[VD:%.*]], <vscale x 16 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv16bf16.i64(<vscale x 16 x bfloat> [[VD]], <vscale x 16 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 16 x i1> [[VM]], i64 [[VL]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x bfloat> [[TMP0]]
+//
+vbfloat16m4_t test_vrgather_vx_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd, vbfloat16m4_t vs2, size_t vs1, size_t vl) {
+  return __riscv_vrgather_vx_bf16m4_mu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x bfloat> @test_vrgather_vv_bf16m8_mu(
+// CHECK-RV64-SAME: <vscale x 32 x i1> [[VM:%.*]], <vscale x 32 x bfloat> [[VD:%.*]], <vscale x 32 x bfloat> [[VS2:%.*]], <vscale x 32 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv32bf16.i64(<vscale x 32 x bfloat> [[VD]], <vscale x 32 x bfloat> [[VS2]], <vscale x 32 x i16> [[VS1]], <vscale x 32 x i1> [[VM]], i64 [[VL]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 32 x bfloat> [[TMP0]]
+//
+vbfloat16m8_t test_vrgather_vv_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd, vbfloat16m8_t vs2, vuint16m8_t vs1, size_t vl) {
+  return __riscv_vrgather_vv_bf16m8_mu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x bfloat> @test_vrgather_vx_bf16m8_mu(
+// CHECK-RV64-SAME: <vscale x 32 x i1> [[VM:%.*]], <vscale x 32 x bfloat> [[VD:%.*]], <vscale x 32 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv32bf16.i64(<vscale x 32 x bfloat> [[VD]], <vscale x 32 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 32 x i1> [[VM]], i64 [[VL]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 32 x bfloat> [[TMP0]]
+//
+vbfloat16m8_t test_vrgather_vx_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd, vbfloat16m8_t vs2, size_t vs1, size_t vl) {
+  return __riscv_vrgather_vx_bf16m8_mu(vm, vd, vs2, vs1, vl);
+}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vcompress.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vcompress.c
new file mode 100644
index 0000000..079977a
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vcompress.c
@@ -0,0 +1,76 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \
+// RUN:   -target-feature +zvfbfmin -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x bfloat> @test_vcompress_vm_bf16mf4_tu(
+// CHECK-RV64-SAME: <vscale x 1 x bfloat> [[VD:%.*]], <vscale x 1 x bfloat> [[VS2:%.*]], <vscale x 1 x i1> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x bfloat> @llvm.riscv.vcompress.nxv1bf16.i64(<vscale x 1 x bfloat> [[VD]], <vscale x 1 x bfloat> [[VS2]], <vscale x 1 x i1> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 1 x bfloat> [[TMP0]]
+//
+vbfloat16mf4_t test_vcompress_vm_bf16mf4_tu(vbfloat16mf4_t vd,
+                                            vbfloat16mf4_t vs2, vbool64_t vs1,
+                                            size_t vl) {
+  return __riscv_vcompress_tu(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x bfloat> @test_vcompress_vm_bf16mf2_tu(
+// CHECK-RV64-SAME: <vscale x 2 x bfloat> [[VD:%.*]], <vscale x 2 x bfloat> [[VS2:%.*]], <vscale x 2 x i1> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x bfloat> @llvm.riscv.vcompress.nxv2bf16.i64(<vscale x 2 x bfloat> [[VD]], <vscale x 2 x bfloat> [[VS2]], <vscale x 2 x i1> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 2 x bfloat> [[TMP0]]
+//
+vbfloat16mf2_t test_vcompress_vm_bf16mf2_tu(vbfloat16mf2_t vd,
+                                            vbfloat16mf2_t vs2, vbool32_t vs1,
+                                            size_t vl) {
+  return __riscv_vcompress_tu(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x bfloat> @test_vcompress_vm_bf16m1_tu(
+// CHECK-RV64-SAME: <vscale x 4 x bfloat> [[VD:%.*]], <vscale x 4 x bfloat> [[VS2:%.*]], <vscale x 4 x i1> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x bfloat> @llvm.riscv.vcompress.nxv4bf16.i64(<vscale x 4 x bfloat> [[VD]], <vscale x 4 x bfloat> [[VS2]], <vscale x 4 x i1> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 4 x bfloat> [[TMP0]]
+//
+vbfloat16m1_t test_vcompress_vm_bf16m1_tu(vbfloat16m1_t vd, vbfloat16m1_t vs2,
+                                          vbool16_t vs1, size_t vl) {
+  return __riscv_vcompress_tu(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x bfloat> @test_vcompress_vm_bf16m2_tu(
+// CHECK-RV64-SAME: <vscale x 8 x bfloat> [[VD:%.*]], <vscale x 8 x bfloat> [[VS2:%.*]], <vscale x 8 x i1> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x bfloat> @llvm.riscv.vcompress.nxv8bf16.i64(<vscale x 8 x bfloat> [[VD]], <vscale x 8 x bfloat> [[VS2]], <vscale x 8 x i1> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+vbfloat16m2_t test_vcompress_vm_bf16m2_tu(vbfloat16m2_t vd, vbfloat16m2_t vs2,
+                                          vbool8_t vs1, size_t vl) {
+  return __riscv_vcompress_tu(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x bfloat> @test_vcompress_vm_bf16m4_tu(
+// CHECK-RV64-SAME: <vscale x 16 x bfloat> [[VD:%.*]], <vscale x 16 x bfloat> [[VS2:%.*]], <vscale x 16 x i1> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x bfloat> @llvm.riscv.vcompress.nxv16bf16.i64(<vscale x 16 x bfloat> [[VD]], <vscale x 16 x bfloat> [[VS2]], <vscale x 16 x i1> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 16 x bfloat> [[TMP0]]
+//
+vbfloat16m4_t test_vcompress_vm_bf16m4_tu(vbfloat16m4_t vd, vbfloat16m4_t vs2,
+                                          vbool4_t vs1, size_t vl) {
+  return __riscv_vcompress_tu(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x bfloat> @test_vcompress_vm_bf16m8_tu(
+// CHECK-RV64-SAME: <vscale x 32 x bfloat> [[VD:%.*]], <vscale x 32 x bfloat> [[VS2:%.*]], <vscale x 32 x i1> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x bfloat> @llvm.riscv.vcompress.nxv32bf16.i64(<vscale x 32 x bfloat> [[VD]], <vscale x 32 x bfloat> [[VS2]], <vscale x 32 x i1> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 32 x bfloat> [[TMP0]]
+//
+vbfloat16m8_t test_vcompress_vm_bf16m8_tu(vbfloat16m8_t vd, vbfloat16m8_t vs2,
+                                          vbool2_t vs1, size_t vl) {
+  return __riscv_vcompress_tu(vd, vs2, vs1, vl);
+}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vrgather.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vrgather.c
new file mode 100644
index 0000000..7a5624a
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/zvfbfmin/policy/overloaded/vrgather.c
@@ -0,0 +1,576 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x \
+// RUN:   -target-feature +zvfbfmin -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <riscv_vector.h>
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x bfloat> @test_vrgather_vv_bf16mf4_tu(
+// CHECK-RV64-SAME: <vscale x 1 x bfloat> [[VD:%.*]], <vscale x 1 x bfloat> [[VS2:%.*]], <vscale x 1 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x bfloat> @llvm.riscv.vrgather.vv.nxv1bf16.i64(<vscale x 1 x bfloat> [[VD]], <vscale x 1 x bfloat> [[VS2]], <vscale x 1 x i16> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 1 x bfloat> [[TMP0]]
+//
+vbfloat16mf4_t test_vrgather_vv_bf16mf4_tu(vbfloat16mf4_t vd,
+                                           vbfloat16mf4_t vs2, vuint16mf4_t vs1,
+                                           size_t vl) {
+  return __riscv_vrgather_tu(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x bfloat> @test_vrgather_vx_bf16mf4_tu(
+// CHECK-RV64-SAME: <vscale x 1 x bfloat> [[VD:%.*]], <vscale x 1 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x bfloat> @llvm.riscv.vrgather.vx.nxv1bf16.i64(<vscale x 1 x bfloat> [[VD]], <vscale x 1 x bfloat> [[VS2]], i64 [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 1 x bfloat> [[TMP0]]
+//
+vbfloat16mf4_t test_vrgather_vx_bf16mf4_tu(vbfloat16mf4_t vd,
+                                           vbfloat16mf4_t vs2, size_t vs1,
+                                           size_t vl) {
+  return __riscv_vrgather_tu(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x bfloat> @test_vrgather_vv_bf16mf2_tu(
+// CHECK-RV64-SAME: <vscale x 2 x bfloat> [[VD:%.*]], <vscale x 2 x bfloat> [[VS2:%.*]], <vscale x 2 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x bfloat> @llvm.riscv.vrgather.vv.nxv2bf16.i64(<vscale x 2 x bfloat> [[VD]], <vscale x 2 x bfloat> [[VS2]], <vscale x 2 x i16> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 2 x bfloat> [[TMP0]]
+//
+vbfloat16mf2_t test_vrgather_vv_bf16mf2_tu(vbfloat16mf2_t vd,
+                                           vbfloat16mf2_t vs2, vuint16mf2_t vs1,
+                                           size_t vl) {
+  return __riscv_vrgather_tu(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x bfloat> @test_vrgather_vx_bf16mf2_tu(
+// CHECK-RV64-SAME: <vscale x 2 x bfloat> [[VD:%.*]], <vscale x 2 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x bfloat> @llvm.riscv.vrgather.vx.nxv2bf16.i64(<vscale x 2 x bfloat> [[VD]], <vscale x 2 x bfloat> [[VS2]], i64 [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 2 x bfloat> [[TMP0]]
+//
+vbfloat16mf2_t test_vrgather_vx_bf16mf2_tu(vbfloat16mf2_t vd,
+                                           vbfloat16mf2_t vs2, size_t vs1,
+                                           size_t vl) {
+  return __riscv_vrgather_tu(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x bfloat> @test_vrgather_vv_bf16m1_tu(
+// CHECK-RV64-SAME: <vscale x 4 x bfloat> [[VD:%.*]], <vscale x 4 x bfloat> [[VS2:%.*]], <vscale x 4 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x bfloat> @llvm.riscv.vrgather.vv.nxv4bf16.i64(<vscale x 4 x bfloat> [[VD]], <vscale x 4 x bfloat> [[VS2]], <vscale x 4 x i16> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 4 x bfloat> [[TMP0]]
+//
+vbfloat16m1_t test_vrgather_vv_bf16m1_tu(vbfloat16m1_t vd, vbfloat16m1_t vs2,
+                                         vuint16m1_t vs1, size_t vl) {
+  return __riscv_vrgather_tu(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x bfloat> @test_vrgather_vx_bf16m1_tu(
+// CHECK-RV64-SAME: <vscale x 4 x bfloat> [[VD:%.*]], <vscale x 4 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x bfloat> @llvm.riscv.vrgather.vx.nxv4bf16.i64(<vscale x 4 x bfloat> [[VD]], <vscale x 4 x bfloat> [[VS2]], i64 [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 4 x bfloat> [[TMP0]]
+//
+vbfloat16m1_t test_vrgather_vx_bf16m1_tu(vbfloat16m1_t vd, vbfloat16m1_t vs2,
+                                         size_t vs1, size_t vl) {
+  return __riscv_vrgather_tu(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x bfloat> @test_vrgather_vv_bf16m2_tu(
+// CHECK-RV64-SAME: <vscale x 8 x bfloat> [[VD:%.*]], <vscale x 8 x bfloat> [[VS2:%.*]], <vscale x 8 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x bfloat> @llvm.riscv.vrgather.vv.nxv8bf16.i64(<vscale x 8 x bfloat> [[VD]], <vscale x 8 x bfloat> [[VS2]], <vscale x 8 x i16> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+vbfloat16m2_t test_vrgather_vv_bf16m2_tu(vbfloat16m2_t vd, vbfloat16m2_t vs2,
+                                         vuint16m2_t vs1, size_t vl) {
+  return __riscv_vrgather_tu(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x bfloat> @test_vrgather_vx_bf16m2_tu(
+// CHECK-RV64-SAME: <vscale x 8 x bfloat> [[VD:%.*]], <vscale x 8 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x bfloat> @llvm.riscv.vrgather.vx.nxv8bf16.i64(<vscale x 8 x bfloat> [[VD]], <vscale x 8 x bfloat> [[VS2]], i64 [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+vbfloat16m2_t test_vrgather_vx_bf16m2_tu(vbfloat16m2_t vd, vbfloat16m2_t vs2,
+                                         size_t vs1, size_t vl) {
+  return __riscv_vrgather_tu(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x bfloat> @test_vrgather_vv_bf16m4_tu(
+// CHECK-RV64-SAME: <vscale x 16 x bfloat> [[VD:%.*]], <vscale x 16 x bfloat> [[VS2:%.*]], <vscale x 16 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x bfloat> @llvm.riscv.vrgather.vv.nxv16bf16.i64(<vscale x 16 x bfloat> [[VD]], <vscale x 16 x bfloat> [[VS2]], <vscale x 16 x i16> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 16 x bfloat> [[TMP0]]
+//
+vbfloat16m4_t test_vrgather_vv_bf16m4_tu(vbfloat16m4_t vd, vbfloat16m4_t vs2,
+                                         vuint16m4_t vs1, size_t vl) {
+  return __riscv_vrgather_tu(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x bfloat> @test_vrgather_vx_bf16m4_tu(
+// CHECK-RV64-SAME: <vscale x 16 x bfloat> [[VD:%.*]], <vscale x 16 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x bfloat> @llvm.riscv.vrgather.vx.nxv16bf16.i64(<vscale x 16 x bfloat> [[VD]], <vscale x 16 x bfloat> [[VS2]], i64 [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 16 x bfloat> [[TMP0]]
+//
+vbfloat16m4_t test_vrgather_vx_bf16m4_tu(vbfloat16m4_t vd, vbfloat16m4_t vs2,
+                                         size_t vs1, size_t vl) {
+  return __riscv_vrgather_tu(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x bfloat> @test_vrgather_vv_bf16m8_tu(
+// CHECK-RV64-SAME: <vscale x 32 x bfloat> [[VD:%.*]], <vscale x 32 x bfloat> [[VS2:%.*]], <vscale x 32 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x bfloat> @llvm.riscv.vrgather.vv.nxv32bf16.i64(<vscale x 32 x bfloat> [[VD]], <vscale x 32 x bfloat> [[VS2]], <vscale x 32 x i16> [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 32 x bfloat> [[TMP0]]
+//
+vbfloat16m8_t test_vrgather_vv_bf16m8_tu(vbfloat16m8_t vd, vbfloat16m8_t vs2,
+                                         vuint16m8_t vs1, size_t vl) {
+  return __riscv_vrgather_tu(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x bfloat> @test_vrgather_vx_bf16m8_tu(
+// CHECK-RV64-SAME: <vscale x 32 x bfloat> [[VD:%.*]], <vscale x 32 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x bfloat> @llvm.riscv.vrgather.vx.nxv32bf16.i64(<vscale x 32 x bfloat> [[VD]], <vscale x 32 x bfloat> [[VS2]], i64 [[VS1]], i64 [[VL]])
+// CHECK-RV64-NEXT:    ret <vscale x 32 x bfloat> [[TMP0]]
+//
+vbfloat16m8_t test_vrgather_vx_bf16m8_tu(vbfloat16m8_t vd, vbfloat16m8_t vs2,
+                                         size_t vs1, size_t vl) {
+  return __riscv_vrgather_tu(vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x bfloat> @test_vrgather_vv_bf16mf4_tum(
+// CHECK-RV64-SAME: <vscale x 1 x i1> [[VM:%.*]], <vscale x 1 x bfloat> [[VD:%.*]], <vscale x 1 x bfloat> [[VS2:%.*]], <vscale x 1 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv1bf16.i64(<vscale x 1 x bfloat> [[VD]], <vscale x 1 x bfloat> [[VS2]], <vscale x 1 x i16> [[VS1]], <vscale x 1 x i1> [[VM]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x bfloat> [[TMP0]]
+//
+vbfloat16mf4_t test_vrgather_vv_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                            vbfloat16mf4_t vs2,
+                                            vuint16mf4_t vs1, size_t vl) {
+  return __riscv_vrgather_tum(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x bfloat> @test_vrgather_vx_bf16mf4_tum(
+// CHECK-RV64-SAME: <vscale x 1 x i1> [[VM:%.*]], <vscale x 1 x bfloat> [[VD:%.*]], <vscale x 1 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv1bf16.i64(<vscale x 1 x bfloat> [[VD]], <vscale x 1 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 1 x i1> [[VM]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x bfloat> [[TMP0]]
+//
+vbfloat16mf4_t test_vrgather_vx_bf16mf4_tum(vbool64_t vm, vbfloat16mf4_t vd,
+                                            vbfloat16mf4_t vs2, size_t vs1,
+                                            size_t vl) {
+  return __riscv_vrgather_tum(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x bfloat> @test_vrgather_vv_bf16mf2_tum(
+// CHECK-RV64-SAME: <vscale x 2 x i1> [[VM:%.*]], <vscale x 2 x bfloat> [[VD:%.*]], <vscale x 2 x bfloat> [[VS2:%.*]], <vscale x 2 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv2bf16.i64(<vscale x 2 x bfloat> [[VD]], <vscale x 2 x bfloat> [[VS2]], <vscale x 2 x i16> [[VS1]], <vscale x 2 x i1> [[VM]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x bfloat> [[TMP0]]
+//
+vbfloat16mf2_t test_vrgather_vv_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                            vbfloat16mf2_t vs2,
+                                            vuint16mf2_t vs1, size_t vl) {
+  return __riscv_vrgather_tum(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x bfloat> @test_vrgather_vx_bf16mf2_tum(
+// CHECK-RV64-SAME: <vscale x 2 x i1> [[VM:%.*]], <vscale x 2 x bfloat> [[VD:%.*]], <vscale x 2 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv2bf16.i64(<vscale x 2 x bfloat> [[VD]], <vscale x 2 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 2 x i1> [[VM]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x bfloat> [[TMP0]]
+//
+vbfloat16mf2_t test_vrgather_vx_bf16mf2_tum(vbool32_t vm, vbfloat16mf2_t vd,
+                                            vbfloat16mf2_t vs2, size_t vs1,
+                                            size_t vl) {
+  return __riscv_vrgather_tum(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x bfloat> @test_vrgather_vv_bf16m1_tum(
+// CHECK-RV64-SAME: <vscale x 4 x i1> [[VM:%.*]], <vscale x 4 x bfloat> [[VD:%.*]], <vscale x 4 x bfloat> [[VS2:%.*]], <vscale x 4 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv4bf16.i64(<vscale x 4 x bfloat> [[VD]], <vscale x 4 x bfloat> [[VS2]], <vscale x 4 x i16> [[VS1]], <vscale x 4 x i1> [[VM]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x bfloat> [[TMP0]]
+//
+vbfloat16m1_t test_vrgather_vv_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                          vbfloat16m1_t vs2, vuint16m1_t vs1,
+                                          size_t vl) {
+  return __riscv_vrgather_tum(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x bfloat> @test_vrgather_vx_bf16m1_tum(
+// CHECK-RV64-SAME: <vscale x 4 x i1> [[VM:%.*]], <vscale x 4 x bfloat> [[VD:%.*]], <vscale x 4 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv4bf16.i64(<vscale x 4 x bfloat> [[VD]], <vscale x 4 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 4 x i1> [[VM]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x bfloat> [[TMP0]]
+//
+vbfloat16m1_t test_vrgather_vx_bf16m1_tum(vbool16_t vm, vbfloat16m1_t vd,
+                                          vbfloat16m1_t vs2, size_t vs1,
+                                          size_t vl) {
+  return __riscv_vrgather_tum(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x bfloat> @test_vrgather_vv_bf16m2_tum(
+// CHECK-RV64-SAME: <vscale x 8 x i1> [[VM:%.*]], <vscale x 8 x bfloat> [[VD:%.*]], <vscale x 8 x bfloat> [[VS2:%.*]], <vscale x 8 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv8bf16.i64(<vscale x 8 x bfloat> [[VD]], <vscale x 8 x bfloat> [[VS2]], <vscale x 8 x i16> [[VS1]], <vscale x 8 x i1> [[VM]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+vbfloat16m2_t test_vrgather_vv_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                          vbfloat16m2_t vs2, vuint16m2_t vs1,
+                                          size_t vl) {
+  return __riscv_vrgather_tum(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x bfloat> @test_vrgather_vx_bf16m2_tum(
+// CHECK-RV64-SAME: <vscale x 8 x i1> [[VM:%.*]], <vscale x 8 x bfloat> [[VD:%.*]], <vscale x 8 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv8bf16.i64(<vscale x 8 x bfloat> [[VD]], <vscale x 8 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 8 x i1> [[VM]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+vbfloat16m2_t test_vrgather_vx_bf16m2_tum(vbool8_t vm, vbfloat16m2_t vd,
+                                          vbfloat16m2_t vs2, size_t vs1,
+                                          size_t vl) {
+  return __riscv_vrgather_tum(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x bfloat> @test_vrgather_vv_bf16m4_tum(
+// CHECK-RV64-SAME: <vscale x 16 x i1> [[VM:%.*]], <vscale x 16 x bfloat> [[VD:%.*]], <vscale x 16 x bfloat> [[VS2:%.*]], <vscale x 16 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv16bf16.i64(<vscale x 16 x bfloat> [[VD]], <vscale x 16 x bfloat> [[VS2]], <vscale x 16 x i16> [[VS1]], <vscale x 16 x i1> [[VM]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x bfloat> [[TMP0]]
+//
+vbfloat16m4_t test_vrgather_vv_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                          vbfloat16m4_t vs2, vuint16m4_t vs1,
+                                          size_t vl) {
+  return __riscv_vrgather_tum(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x bfloat> @test_vrgather_vx_bf16m4_tum(
+// CHECK-RV64-SAME: <vscale x 16 x i1> [[VM:%.*]], <vscale x 16 x bfloat> [[VD:%.*]], <vscale x 16 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv16bf16.i64(<vscale x 16 x bfloat> [[VD]], <vscale x 16 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 16 x i1> [[VM]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x bfloat> [[TMP0]]
+//
+vbfloat16m4_t test_vrgather_vx_bf16m4_tum(vbool4_t vm, vbfloat16m4_t vd,
+                                          vbfloat16m4_t vs2, size_t vs1,
+                                          size_t vl) {
+  return __riscv_vrgather_tum(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x bfloat> @test_vrgather_vv_bf16m8_tum(
+// CHECK-RV64-SAME: <vscale x 32 x i1> [[VM:%.*]], <vscale x 32 x bfloat> [[VD:%.*]], <vscale x 32 x bfloat> [[VS2:%.*]], <vscale x 32 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv32bf16.i64(<vscale x 32 x bfloat> [[VD]], <vscale x 32 x bfloat> [[VS2]], <vscale x 32 x i16> [[VS1]], <vscale x 32 x i1> [[VM]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 32 x bfloat> [[TMP0]]
+//
+vbfloat16m8_t test_vrgather_vv_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd,
+                                          vbfloat16m8_t vs2, vuint16m8_t vs1,
+                                          size_t vl) {
+  return __riscv_vrgather_tum(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x bfloat> @test_vrgather_vx_bf16m8_tum(
+// CHECK-RV64-SAME: <vscale x 32 x i1> [[VM:%.*]], <vscale x 32 x bfloat> [[VD:%.*]], <vscale x 32 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv32bf16.i64(<vscale x 32 x bfloat> [[VD]], <vscale x 32 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 32 x i1> [[VM]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 32 x bfloat> [[TMP0]]
+//
+vbfloat16m8_t test_vrgather_vx_bf16m8_tum(vbool2_t vm, vbfloat16m8_t vd,
+                                          vbfloat16m8_t vs2, size_t vs1,
+                                          size_t vl) {
+  return __riscv_vrgather_tum(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x bfloat> @test_vrgather_vv_bf16mf4_tumu(
+// CHECK-RV64-SAME: <vscale x 1 x i1> [[VM:%.*]], <vscale x 1 x bfloat> [[VD:%.*]], <vscale x 1 x bfloat> [[VS2:%.*]], <vscale x 1 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv1bf16.i64(<vscale x 1 x bfloat> [[VD]], <vscale x 1 x bfloat> [[VS2]], <vscale x 1 x i16> [[VS1]], <vscale x 1 x i1> [[VM]], i64 [[VL]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x bfloat> [[TMP0]]
+//
+vbfloat16mf4_t test_vrgather_vv_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                             vbfloat16mf4_t vs2,
+                                             vuint16mf4_t vs1, size_t vl) {
+  return __riscv_vrgather_tumu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x bfloat> @test_vrgather_vx_bf16mf4_tumu(
+// CHECK-RV64-SAME: <vscale x 1 x i1> [[VM:%.*]], <vscale x 1 x bfloat> [[VD:%.*]], <vscale x 1 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv1bf16.i64(<vscale x 1 x bfloat> [[VD]], <vscale x 1 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 1 x i1> [[VM]], i64 [[VL]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x bfloat> [[TMP0]]
+//
+vbfloat16mf4_t test_vrgather_vx_bf16mf4_tumu(vbool64_t vm, vbfloat16mf4_t vd,
+                                             vbfloat16mf4_t vs2, size_t vs1,
+                                             size_t vl) {
+  return __riscv_vrgather_tumu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x bfloat> @test_vrgather_vv_bf16mf2_tumu(
+// CHECK-RV64-SAME: <vscale x 2 x i1> [[VM:%.*]], <vscale x 2 x bfloat> [[VD:%.*]], <vscale x 2 x bfloat> [[VS2:%.*]], <vscale x 2 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv2bf16.i64(<vscale x 2 x bfloat> [[VD]], <vscale x 2 x bfloat> [[VS2]], <vscale x 2 x i16> [[VS1]], <vscale x 2 x i1> [[VM]], i64 [[VL]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x bfloat> [[TMP0]]
+//
+vbfloat16mf2_t test_vrgather_vv_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                             vbfloat16mf2_t vs2,
+                                             vuint16mf2_t vs1, size_t vl) {
+  return __riscv_vrgather_tumu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x bfloat> @test_vrgather_vx_bf16mf2_tumu(
+// CHECK-RV64-SAME: <vscale x 2 x i1> [[VM:%.*]], <vscale x 2 x bfloat> [[VD:%.*]], <vscale x 2 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv2bf16.i64(<vscale x 2 x bfloat> [[VD]], <vscale x 2 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 2 x i1> [[VM]], i64 [[VL]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x bfloat> [[TMP0]]
+//
+vbfloat16mf2_t test_vrgather_vx_bf16mf2_tumu(vbool32_t vm, vbfloat16mf2_t vd,
+                                             vbfloat16mf2_t vs2, size_t vs1,
+                                             size_t vl) {
+  return __riscv_vrgather_tumu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x bfloat> @test_vrgather_vv_bf16m1_tumu(
+// CHECK-RV64-SAME: <vscale x 4 x i1> [[VM:%.*]], <vscale x 4 x bfloat> [[VD:%.*]], <vscale x 4 x bfloat> [[VS2:%.*]], <vscale x 4 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv4bf16.i64(<vscale x 4 x bfloat> [[VD]], <vscale x 4 x bfloat> [[VS2]], <vscale x 4 x i16> [[VS1]], <vscale x 4 x i1> [[VM]], i64 [[VL]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x bfloat> [[TMP0]]
+//
+vbfloat16m1_t test_vrgather_vv_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                           vbfloat16m1_t vs2, vuint16m1_t vs1,
+                                           size_t vl) {
+  return __riscv_vrgather_tumu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x bfloat> @test_vrgather_vx_bf16m1_tumu(
+// CHECK-RV64-SAME: <vscale x 4 x i1> [[VM:%.*]], <vscale x 4 x bfloat> [[VD:%.*]], <vscale x 4 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv4bf16.i64(<vscale x 4 x bfloat> [[VD]], <vscale x 4 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 4 x i1> [[VM]], i64 [[VL]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x bfloat> [[TMP0]]
+//
+vbfloat16m1_t test_vrgather_vx_bf16m1_tumu(vbool16_t vm, vbfloat16m1_t vd,
+                                           vbfloat16m1_t vs2, size_t vs1,
+                                           size_t vl) {
+  return __riscv_vrgather_tumu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x bfloat> @test_vrgather_vv_bf16m2_tumu(
+// CHECK-RV64-SAME: <vscale x 8 x i1> [[VM:%.*]], <vscale x 8 x bfloat> [[VD:%.*]], <vscale x 8 x bfloat> [[VS2:%.*]], <vscale x 8 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv8bf16.i64(<vscale x 8 x bfloat> [[VD]], <vscale x 8 x bfloat> [[VS2]], <vscale x 8 x i16> [[VS1]], <vscale x 8 x i1> [[VM]], i64 [[VL]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+vbfloat16m2_t test_vrgather_vv_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                           vbfloat16m2_t vs2, vuint16m2_t vs1,
+                                           size_t vl) {
+  return __riscv_vrgather_tumu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x bfloat> @test_vrgather_vx_bf16m2_tumu(
+// CHECK-RV64-SAME: <vscale x 8 x i1> [[VM:%.*]], <vscale x 8 x bfloat> [[VD:%.*]], <vscale x 8 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv8bf16.i64(<vscale x 8 x bfloat> [[VD]], <vscale x 8 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 8 x i1> [[VM]], i64 [[VL]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+vbfloat16m2_t test_vrgather_vx_bf16m2_tumu(vbool8_t vm, vbfloat16m2_t vd,
+                                           vbfloat16m2_t vs2, size_t vs1,
+                                           size_t vl) {
+  return __riscv_vrgather_tumu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x bfloat> @test_vrgather_vv_bf16m4_tumu(
+// CHECK-RV64-SAME: <vscale x 16 x i1> [[VM:%.*]], <vscale x 16 x bfloat> [[VD:%.*]], <vscale x 16 x bfloat> [[VS2:%.*]], <vscale x 16 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv16bf16.i64(<vscale x 16 x bfloat> [[VD]], <vscale x 16 x bfloat> [[VS2]], <vscale x 16 x i16> [[VS1]], <vscale x 16 x i1> [[VM]], i64 [[VL]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x bfloat> [[TMP0]]
+//
+vbfloat16m4_t test_vrgather_vv_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                           vbfloat16m4_t vs2, vuint16m4_t vs1,
+                                           size_t vl) {
+  return __riscv_vrgather_tumu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x bfloat> @test_vrgather_vx_bf16m4_tumu(
+// CHECK-RV64-SAME: <vscale x 16 x i1> [[VM:%.*]], <vscale x 16 x bfloat> [[VD:%.*]], <vscale x 16 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv16bf16.i64(<vscale x 16 x bfloat> [[VD]], <vscale x 16 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 16 x i1> [[VM]], i64 [[VL]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x bfloat> [[TMP0]]
+//
+vbfloat16m4_t test_vrgather_vx_bf16m4_tumu(vbool4_t vm, vbfloat16m4_t vd,
+                                           vbfloat16m4_t vs2, size_t vs1,
+                                           size_t vl) {
+  return __riscv_vrgather_tumu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x bfloat> @test_vrgather_vv_bf16m8_tumu(
+// CHECK-RV64-SAME: <vscale x 32 x i1> [[VM:%.*]], <vscale x 32 x bfloat> [[VD:%.*]], <vscale x 32 x bfloat> [[VS2:%.*]], <vscale x 32 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv32bf16.i64(<vscale x 32 x bfloat> [[VD]], <vscale x 32 x bfloat> [[VS2]], <vscale x 32 x i16> [[VS1]], <vscale x 32 x i1> [[VM]], i64 [[VL]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 32 x bfloat> [[TMP0]]
+//
+vbfloat16m8_t test_vrgather_vv_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd,
+                                           vbfloat16m8_t vs2, vuint16m8_t vs1,
+                                           size_t vl) {
+  return __riscv_vrgather_tumu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x bfloat> @test_vrgather_vx_bf16m8_tumu(
+// CHECK-RV64-SAME: <vscale x 32 x i1> [[VM:%.*]], <vscale x 32 x bfloat> [[VD:%.*]], <vscale x 32 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv32bf16.i64(<vscale x 32 x bfloat> [[VD]], <vscale x 32 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 32 x i1> [[VM]], i64 [[VL]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 32 x bfloat> [[TMP0]]
+//
+vbfloat16m8_t test_vrgather_vx_bf16m8_tumu(vbool2_t vm, vbfloat16m8_t vd,
+                                           vbfloat16m8_t vs2, size_t vs1,
+                                           size_t vl) {
+  return __riscv_vrgather_tumu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x bfloat> @test_vrgather_vv_bf16mf4_mu(
+// CHECK-RV64-SAME: <vscale x 1 x i1> [[VM:%.*]], <vscale x 1 x bfloat> [[VD:%.*]], <vscale x 1 x bfloat> [[VS2:%.*]], <vscale x 1 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv1bf16.i64(<vscale x 1 x bfloat> [[VD]], <vscale x 1 x bfloat> [[VS2]], <vscale x 1 x i16> [[VS1]], <vscale x 1 x i1> [[VM]], i64 [[VL]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x bfloat> [[TMP0]]
+//
+vbfloat16mf4_t test_vrgather_vv_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                           vbfloat16mf4_t vs2, vuint16mf4_t vs1,
+                                           size_t vl) {
+  return __riscv_vrgather_mu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x bfloat> @test_vrgather_vx_bf16mf4_mu(
+// CHECK-RV64-SAME: <vscale x 1 x i1> [[VM:%.*]], <vscale x 1 x bfloat> [[VD:%.*]], <vscale x 1 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv1bf16.i64(<vscale x 1 x bfloat> [[VD]], <vscale x 1 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 1 x i1> [[VM]], i64 [[VL]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x bfloat> [[TMP0]]
+//
+vbfloat16mf4_t test_vrgather_vx_bf16mf4_mu(vbool64_t vm, vbfloat16mf4_t vd,
+                                           vbfloat16mf4_t vs2, size_t vs1,
+                                           size_t vl) {
+  return __riscv_vrgather_mu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x bfloat> @test_vrgather_vv_bf16mf2_mu(
+// CHECK-RV64-SAME: <vscale x 2 x i1> [[VM:%.*]], <vscale x 2 x bfloat> [[VD:%.*]], <vscale x 2 x bfloat> [[VS2:%.*]], <vscale x 2 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv2bf16.i64(<vscale x 2 x bfloat> [[VD]], <vscale x 2 x bfloat> [[VS2]], <vscale x 2 x i16> [[VS1]], <vscale x 2 x i1> [[VM]], i64 [[VL]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x bfloat> [[TMP0]]
+//
+vbfloat16mf2_t test_vrgather_vv_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                           vbfloat16mf2_t vs2, vuint16mf2_t vs1,
+                                           size_t vl) {
+  return __riscv_vrgather_mu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x bfloat> @test_vrgather_vx_bf16mf2_mu(
+// CHECK-RV64-SAME: <vscale x 2 x i1> [[VM:%.*]], <vscale x 2 x bfloat> [[VD:%.*]], <vscale x 2 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv2bf16.i64(<vscale x 2 x bfloat> [[VD]], <vscale x 2 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 2 x i1> [[VM]], i64 [[VL]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x bfloat> [[TMP0]]
+//
+vbfloat16mf2_t test_vrgather_vx_bf16mf2_mu(vbool32_t vm, vbfloat16mf2_t vd,
+                                           vbfloat16mf2_t vs2, size_t vs1,
+                                           size_t vl) {
+  return __riscv_vrgather_mu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x bfloat> @test_vrgather_vv_bf16m1_mu(
+// CHECK-RV64-SAME: <vscale x 4 x i1> [[VM:%.*]], <vscale x 4 x bfloat> [[VD:%.*]], <vscale x 4 x bfloat> [[VS2:%.*]], <vscale x 4 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv4bf16.i64(<vscale x 4 x bfloat> [[VD]], <vscale x 4 x bfloat> [[VS2]], <vscale x 4 x i16> [[VS1]], <vscale x 4 x i1> [[VM]], i64 [[VL]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x bfloat> [[TMP0]]
+//
+vbfloat16m1_t test_vrgather_vv_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                         vbfloat16m1_t vs2, vuint16m1_t vs1,
+                                         size_t vl) {
+  return __riscv_vrgather_mu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x bfloat> @test_vrgather_vx_bf16m1_mu(
+// CHECK-RV64-SAME: <vscale x 4 x i1> [[VM:%.*]], <vscale x 4 x bfloat> [[VD:%.*]], <vscale x 4 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv4bf16.i64(<vscale x 4 x bfloat> [[VD]], <vscale x 4 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 4 x i1> [[VM]], i64 [[VL]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x bfloat> [[TMP0]]
+//
+vbfloat16m1_t test_vrgather_vx_bf16m1_mu(vbool16_t vm, vbfloat16m1_t vd,
+                                         vbfloat16m1_t vs2, size_t vs1,
+                                         size_t vl) {
+  return __riscv_vrgather_mu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x bfloat> @test_vrgather_vv_bf16m2_mu(
+// CHECK-RV64-SAME: <vscale x 8 x i1> [[VM:%.*]], <vscale x 8 x bfloat> [[VD:%.*]], <vscale x 8 x bfloat> [[VS2:%.*]], <vscale x 8 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv8bf16.i64(<vscale x 8 x bfloat> [[VD]], <vscale x 8 x bfloat> [[VS2]], <vscale x 8 x i16> [[VS1]], <vscale x 8 x i1> [[VM]], i64 [[VL]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+vbfloat16m2_t test_vrgather_vv_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                         vbfloat16m2_t vs2, vuint16m2_t vs1,
+                                         size_t vl) {
+  return __riscv_vrgather_mu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x bfloat> @test_vrgather_vx_bf16m2_mu(
+// CHECK-RV64-SAME: <vscale x 8 x i1> [[VM:%.*]], <vscale x 8 x bfloat> [[VD:%.*]], <vscale x 8 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv8bf16.i64(<vscale x 8 x bfloat> [[VD]], <vscale x 8 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 8 x i1> [[VM]], i64 [[VL]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+vbfloat16m2_t test_vrgather_vx_bf16m2_mu(vbool8_t vm, vbfloat16m2_t vd,
+                                         vbfloat16m2_t vs2, size_t vs1,
+                                         size_t vl) {
+  return __riscv_vrgather_mu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x bfloat> @test_vrgather_vv_bf16m4_mu(
+// CHECK-RV64-SAME: <vscale x 16 x i1> [[VM:%.*]], <vscale x 16 x bfloat> [[VD:%.*]], <vscale x 16 x bfloat> [[VS2:%.*]], <vscale x 16 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv16bf16.i64(<vscale x 16 x bfloat> [[VD]], <vscale x 16 x bfloat> [[VS2]], <vscale x 16 x i16> [[VS1]], <vscale x 16 x i1> [[VM]], i64 [[VL]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x bfloat> [[TMP0]]
+//
+vbfloat16m4_t test_vrgather_vv_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                         vbfloat16m4_t vs2, vuint16m4_t vs1,
+                                         size_t vl) {
+  return __riscv_vrgather_mu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x bfloat> @test_vrgather_vx_bf16m4_mu(
+// CHECK-RV64-SAME: <vscale x 16 x i1> [[VM:%.*]], <vscale x 16 x bfloat> [[VD:%.*]], <vscale x 16 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv16bf16.i64(<vscale x 16 x bfloat> [[VD]], <vscale x 16 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 16 x i1> [[VM]], i64 [[VL]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x bfloat> [[TMP0]]
+//
+vbfloat16m4_t test_vrgather_vx_bf16m4_mu(vbool4_t vm, vbfloat16m4_t vd,
+                                         vbfloat16m4_t vs2, size_t vs1,
+                                         size_t vl) {
+  return __riscv_vrgather_mu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x bfloat> @test_vrgather_vv_bf16m8_mu(
+// CHECK-RV64-SAME: <vscale x 32 x i1> [[VM:%.*]], <vscale x 32 x bfloat> [[VD:%.*]], <vscale x 32 x bfloat> [[VS2:%.*]], <vscale x 32 x i16> [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x bfloat> @llvm.riscv.vrgather.vv.mask.nxv32bf16.i64(<vscale x 32 x bfloat> [[VD]], <vscale x 32 x bfloat> [[VS2]], <vscale x 32 x i16> [[VS1]], <vscale x 32 x i1> [[VM]], i64 [[VL]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 32 x bfloat> [[TMP0]]
+//
+vbfloat16m8_t test_vrgather_vv_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd,
+                                         vbfloat16m8_t vs2, vuint16m8_t vs1,
+                                         size_t vl) {
+  return __riscv_vrgather_mu(vm, vd, vs2, vs1, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 32 x bfloat> @test_vrgather_vx_bf16m8_mu(
+// CHECK-RV64-SAME: <vscale x 32 x i1> [[VM:%.*]], <vscale x 32 x bfloat> [[VD:%.*]], <vscale x 32 x bfloat> [[VS2:%.*]], i64 noundef [[VS1:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 32 x bfloat> @llvm.riscv.vrgather.vx.mask.nxv32bf16.i64(<vscale x 32 x bfloat> [[VD]], <vscale x 32 x bfloat> [[VS2]], i64 [[VS1]], <vscale x 32 x i1> [[VM]], i64 [[VL]], i64 1)
+// CHECK-RV64-NEXT:    ret <vscale x 32 x bfloat> [[TMP0]]
+//
+vbfloat16m8_t test_vrgather_vx_bf16m8_mu(vbool2_t vm, vbfloat16m8_t vd,
+                                         vbfloat16m8_t vs2, size_t vs1,
+                                         size_t vl) {
+  return __riscv_vrgather_mu(vm, vd, vs2, vs1, vl);
+}
diff --git a/clang/test/CodeGen/X86/avx10_2_512ni-builtins.c b/clang/test/CodeGen/X86/avx10_2_512ni-builtins.c
index 26e0d12..d143188 100644
--- a/clang/test/CodeGen/X86/avx10_2_512ni-builtins.c
+++ b/clang/test/CodeGen/X86/avx10_2_512ni-builtins.c
@@ -187,12 +187,12 @@ __m512i test_mm512_mask_dpwsud_epi32(__m512i __A, __mmask16 __B, __m512i __C, __
   return _mm512_mask_dpwsud_epi32(__A, __B, __C, __D);
 }
 
-__m512i test_mm512_maskz_dpwsud_epi32(__m512i __A, __mmask16 __B, __m512i __C, __m512i __D) {
+__m512i test_mm512_maskz_dpwsud_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
 // CHECK-LABEL: @test_mm512_maskz_dpwsud_epi32(
 // CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwsud.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
 // CHECK: zeroinitializer
 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
-  return _mm512_maskz_dpwsud_epi32(__A, __B, __C, __D);
+  return _mm512_maskz_dpwsud_epi32(__U, __A, __B, __C);
 }
 
 __m512i test_mm512_dpwsuds_epi32(__m512i __A, __m512i __B, __m512i __C) {
@@ -208,12 +208,12 @@ __m512i test_mm512_mask_dpwsuds_epi32(__m512i __A, __mmask16 __B, __m512i __C, _
   return _mm512_mask_dpwsuds_epi32(__A, __B, __C, __D);
 }
 
-__m512i test_mm512_maskz_dpwsuds_epi32(__m512i __A, __mmask16 __B, __m512i __C, __m512i __D) {
+__m512i test_mm512_maskz_dpwsuds_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
 // CHECK-LABEL: @test_mm512_maskz_dpwsuds_epi32(
 // CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwsuds.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
 // CHECK: zeroinitializer
 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
-  return _mm512_maskz_dpwsuds_epi32(__A, __B, __C, __D);
+  return _mm512_maskz_dpwsuds_epi32(__U, __A, __B, __C);
 }
 
 __m512i test_mm512_dpwusd_epi32(__m512i __A, __m512i __B, __m512i __C) {
@@ -229,12 +229,12 @@ __m512i test_mm512_mask_dpwusd_epi32(__m512i __A, __mmask16 __B, __m512i __C, __
   return _mm512_mask_dpwusd_epi32(__A, __B, __C, __D);
 }
 
-__m512i test_mm512_maskz_dpwusd_epi32(__m512i __A, __mmask16 __B, __m512i __C, __m512i __D) {
+__m512i test_mm512_maskz_dpwusd_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
 // CHECK-LABEL: @test_mm512_maskz_dpwusd_epi32(
 // CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwusd.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
 // CHECK: zeroinitializer
 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
-  return _mm512_maskz_dpwusd_epi32(__A, __B, __C, __D);
+  return _mm512_maskz_dpwusd_epi32(__U, __A, __B, __C);
 }
 
 __m512i test_mm512_dpwusds_epi32(__m512i __A, __m512i __B, __m512i __C) {
@@ -250,12 +250,12 @@ __m512i test_mm512_mask_dpwusds_epi32(__m512i __A, __mmask16 __B, __m512i __C, _
   return _mm512_mask_dpwusds_epi32(__A, __B, __C, __D);
 }
 
-__m512i test_mm512_maskz_dpwusds_epi32(__m512i __A, __mmask16 __B, __m512i __C, __m512i __D) {
+__m512i test_mm512_maskz_dpwusds_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
 // CHECK-LABEL: @test_mm512_maskz_dpwusds_epi32(
 // CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwusds.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
 // CHECK: zeroinitializer
 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
-  return _mm512_maskz_dpwusds_epi32(__A, __B, __C, __D);
+  return _mm512_maskz_dpwusds_epi32(__U, __A, __B, __C);
 }
 
 __m512i test_mm512_dpwuud_epi32(__m512i __A, __m512i __B, __m512i __C) {
@@ -271,12 +271,12 @@ __m512i test_mm512_mask_dpwuud_epi32(__m512i __A, __mmask16 __B, __m512i __C, __
   return _mm512_mask_dpwuud_epi32(__A, __B, __C, __D);
 }
 
-__m512i test_mm512_maskz_dpwuud_epi32(__m512i __A, __mmask16 __B, __m512i __C, __m512i __D) {
+__m512i test_mm512_maskz_dpwuud_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
 // CHECK-LABEL: @test_mm512_maskz_dpwuud_epi32(
 // CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwuud.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
 // CHECK: zeroinitializer
 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
-  return _mm512_maskz_dpwuud_epi32(__A, __B, __C, __D);
+  return _mm512_maskz_dpwuud_epi32(__U, __A, __B, __C);
 }
 
 __m512i test_mm512_dpwuuds_epi32(__m512i __A, __m512i __B, __m512i __C) {
@@ -292,10 +292,10 @@ __m512i test_mm512_mask_dpwuuds_epi32(__m512i __A, __mmask16 __B, __m512i __C, _
   return _mm512_mask_dpwuuds_epi32(__A, __B, __C, __D);
 }
 
-__m512i test_mm512_maskz_dpwuuds_epi32(__m512i __A, __mmask16 __B, __m512i __C, __m512i __D) {
+__m512i test_mm512_maskz_dpwuuds_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
 // CHECK-LABEL: @test_mm512_maskz_dpwuuds_epi32(
 // CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwuuds.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
 // CHECK: zeroinitializer
 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
-  return _mm512_maskz_dpwuuds_epi32(__A, __B, __C, __D);
+  return _mm512_maskz_dpwuuds_epi32(__U, __A, __B, __C);
 }
diff --git a/clang/test/CodeGen/X86/avx10_2ni-builtins.c b/clang/test/CodeGen/X86/avx10_2ni-builtins.c
index 936be27..b4b12c9 100644
--- a/clang/test/CodeGen/X86/avx10_2ni-builtins.c
+++ b/clang/test/CodeGen/X86/avx10_2ni-builtins.c
@@ -264,11 +264,11 @@ __m128i test_mm_mask_dpwsud_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128
   return _mm_mask_dpwsud_epi32(__A, __B, __C, __D);
 }
 
-__m128i test_mm_maskz_dpwsud_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
+__m128i test_mm_maskz_dpwsud_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
 // CHECK-LABEL: @test_mm_maskz_dpwsud_epi32(
 // CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwsud.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
 // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
-  return _mm_maskz_dpwsud_epi32(__A, __B, __C, __D);
+  return _mm_maskz_dpwsud_epi32(__U, __A, __B, __C);
 }
 
 __m256i test_mm256_mask_dpwsud_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
@@ -278,11 +278,11 @@ __m256i test_mm256_mask_dpwsud_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m
   return _mm256_mask_dpwsud_epi32(__A, __B, __C, __D);
 }
 
-__m256i test_mm256_maskz_dpwsud_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
+__m256i test_mm256_maskz_dpwsud_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
 // CHECK-LABEL: @test_mm256_maskz_dpwsud_epi32(
 // CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwsud.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
 // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
-  return _mm256_maskz_dpwsud_epi32(__A, __B, __C, __D);
+  return _mm256_maskz_dpwsud_epi32(__U, __A, __B, __C);
 }
 
 __m128i test_mm_mask_dpwsuds_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
@@ -292,11 +292,11 @@ __m128i test_mm_mask_dpwsuds_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m12
   return _mm_mask_dpwsuds_epi32(__A, __B, __C, __D);
 }
 
-__m128i test_mm_maskz_dpwsuds_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
+__m128i test_mm_maskz_dpwsuds_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
 // CHECK-LABEL: @test_mm_maskz_dpwsuds_epi32(
 // CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwsuds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
 // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
-  return _mm_maskz_dpwsuds_epi32(__A, __B, __C, __D);
+  return _mm_maskz_dpwsuds_epi32(__U, __A, __B, __C);
 }
 
 __m256i test_mm256_mask_dpwsuds_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
@@ -306,11 +306,11 @@ __m256i test_mm256_mask_dpwsuds_epi32(__m256i __A, __mmask8 __B, __m256i __C, __
   return _mm256_mask_dpwsuds_epi32(__A, __B, __C, __D);
 }
 
-__m256i test_mm256_maskz_dpwsuds_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
+__m256i test_mm256_maskz_dpwsuds_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
 // CHECK-LABEL: @test_mm256_maskz_dpwsuds_epi32(
 // CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwsuds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
 // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
-  return _mm256_maskz_dpwsuds_epi32(__A, __B, __C, __D);
+  return _mm256_maskz_dpwsuds_epi32(__U, __A, __B, __C);
 }
 
 __m128i test_mm_mask_dpwusd_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
@@ -320,11 +320,11 @@ __m128i test_mm_mask_dpwusd_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128
   return _mm_mask_dpwusd_epi32(__A, __B, __C, __D);
 }
 
-__m128i test_mm_maskz_dpwusd_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
+__m128i test_mm_maskz_dpwusd_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
 // CHECK-LABEL: @test_mm_maskz_dpwusd_epi32(
 // CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwusd.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
 // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
-  return _mm_maskz_dpwusd_epi32(__A, __B, __C, __D);
+  return _mm_maskz_dpwusd_epi32(__U, __A, __B, __C);
 }
 
 __m256i test_mm256_mask_dpwusd_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
@@ -334,11 +334,11 @@ __m256i test_mm256_mask_dpwusd_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m
   return _mm256_mask_dpwusd_epi32(__A, __B, __C, __D);
 }
 
-__m256i test_mm256_maskz_dpwusd_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
+__m256i test_mm256_maskz_dpwusd_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
 // CHECK-LABEL: @test_mm256_maskz_dpwusd_epi32(
 // CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwusd.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
 // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
-  return _mm256_maskz_dpwusd_epi32(__A, __B, __C, __D);
+  return _mm256_maskz_dpwusd_epi32(__U, __A, __B, __C);
 }
 
 __m128i test_mm_mask_dpwusds_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
@@ -348,11 +348,11 @@ __m128i test_mm_mask_dpwusds_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m12
   return _mm_mask_dpwusds_epi32(__A, __B, __C, __D);
 }
 
-__m128i test_mm_maskz_dpwusds_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
+__m128i test_mm_maskz_dpwusds_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
 // CHECK-LABEL: @test_mm_maskz_dpwusds_epi32(
 // CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwusds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
 // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
-  return _mm_maskz_dpwusds_epi32(__A, __B, __C, __D);
+  return _mm_maskz_dpwusds_epi32(__U, __A, __B, __C);
 }
 
 __m256i test_mm256_mask_dpwusds_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
@@ -362,11 +362,11 @@ __m256i test_mm256_mask_dpwusds_epi32(__m256i __A, __mmask8 __B, __m256i __C, __
   return _mm256_mask_dpwusds_epi32(__A, __B, __C, __D);
 }
 
-__m256i test_mm256_maskz_dpwusds_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
+__m256i test_mm256_maskz_dpwusds_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
 // CHECK-LABEL: @test_mm256_maskz_dpwusds_epi32(
 // CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwusds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
 // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
-  return _mm256_maskz_dpwusds_epi32(__A, __B, __C, __D);
+  return _mm256_maskz_dpwusds_epi32(__U, __A, __B, __C);
 }
 
 __m128i test_mm_mask_dpwuud_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
@@ -376,11 +376,11 @@ __m128i test_mm_mask_dpwuud_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128
   return _mm_mask_dpwuud_epi32(__A, __B, __C, __D);
 }
 
-__m128i test_mm_maskz_dpwuud_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
+__m128i test_mm_maskz_dpwuud_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
 // CHECK-LABEL: @test_mm_maskz_dpwuud_epi32(
 // CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwuud.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
 // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
-  return _mm_maskz_dpwuud_epi32(__A, __B, __C, __D);
+  return _mm_maskz_dpwuud_epi32(__U, __A, __B, __C);
 }
 
 __m256i test_mm256_mask_dpwuud_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
@@ -390,11 +390,11 @@ __m256i test_mm256_mask_dpwuud_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m
   return _mm256_mask_dpwuud_epi32(__A, __B, __C, __D);
 }
 
-__m256i test_mm256_maskz_dpwuud_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
+__m256i test_mm256_maskz_dpwuud_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
 // CHECK-LABEL: @test_mm256_maskz_dpwuud_epi32(
 // CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwuud.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
 // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
-  return _mm256_maskz_dpwuud_epi32(__A, __B, __C, __D);
+  return _mm256_maskz_dpwuud_epi32(__U, __A, __B, __C);
 }
 
 __m128i test_mm_mask_dpwuuds_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
@@ -404,11 +404,11 @@ __m128i test_mm_mask_dpwuuds_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m12
   return _mm_mask_dpwuuds_epi32(__A, __B, __C, __D);
 }
 
-__m128i test_mm_maskz_dpwuuds_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
+__m128i test_mm_maskz_dpwuuds_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
 // CHECK-LABEL: @test_mm_maskz_dpwuuds_epi32(
 // CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwuuds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
 // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
-  return _mm_maskz_dpwuuds_epi32(__A, __B, __C, __D);
+  return _mm_maskz_dpwuuds_epi32(__U, __A, __B, __C);
 }
 
 __m256i test_mm256_mask_dpwuuds_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
@@ -418,9 +418,9 @@ __m256i test_mm256_mask_dpwuuds_epi32(__m256i __A, __mmask8 __B, __m256i __C, __
   return _mm256_mask_dpwuuds_epi32(__A, __B, __C, __D);
 }
 
-__m256i test_mm256_maskz_dpwuuds_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
+__m256i test_mm256_maskz_dpwuuds_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
 // CHECK-LABEL: @test_mm256_maskz_dpwuuds_epi32(
 // CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwuuds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
 // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
-  return _mm256_maskz_dpwuuds_epi32(__A, __B, __C, __D);
+  return _mm256_maskz_dpwuuds_epi32(__U, __A, __B, __C);
 }
diff --git a/clang/test/CodeGen/X86/prefetchi-error.c b/clang/test/CodeGen/X86/prefetchi-error.c
new file mode 100644
index 0000000..31494f7
--- /dev/null
+++ b/clang/test/CodeGen/X86/prefetchi-error.c
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +prefetchi  -fsyntax-only -verify
+
+#include <immintrin.h>
+
+void test_invalid_prefetchi(void* p) {
+  __builtin_ia32_prefetchi(p, 1); // expected-error {{argument value 1 is outside the valid range [2, 3]}}
+}
diff --git a/clang/test/CodeGen/attr-counted-by-for-pointers.c b/clang/test/CodeGen/attr-counted-by-for-pointers.c
index 2407654..e939e49 100644
--- a/clang/test/CodeGen/attr-counted-by-for-pointers.c
+++ b/clang/test/CodeGen/attr-counted-by-for-pointers.c
@@ -471,3 +471,80 @@ size_t test9(struct annotated_sized_ptr *p, int index) {
 size_t test10(struct annotated_sized_ptr *p, int index) {
   return __bdos(&((unsigned int *) p->buf)[index]);
 }
+
+struct pr151236_struct {
+        int *a __counted_by(a_count);
+        short a_count;
+};
+
+// SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -262144, 262137) i64 @test11(
+// SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
+// SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i16, ptr [[COUNTED_BY_GEP]], align 4
+// SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp sgt i16 [[COUNTED_BY_LOAD]], -1
+// SANITIZE-WITH-ATTR-NEXT:    [[COUNT:%.*]] = sext i16 [[COUNTED_BY_LOAD]] to i64
+// SANITIZE-WITH-ATTR-NEXT:    [[ARRAY_SIZE:%.*]] = shl nsw i64 [[COUNT]], 3
+// SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = select i1 [[TMP0]], i64 [[ARRAY_SIZE]], i64 0
+// SANITIZE-WITH-ATTR-NEXT:    ret i64 [[TMP1]]
+//
+// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -262144, 262137) i64 @test11(
+// NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i16, ptr [[COUNTED_BY_GEP]], align 4
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNT:%.*]] = sext i16 [[COUNTED_BY_LOAD]] to i64
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAY_SIZE:%.*]] = shl nsw i64 [[COUNT]], 3
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp sgt i16 [[COUNTED_BY_LOAD]], -1
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = select i1 [[TMP0]], i64 [[ARRAY_SIZE]], i64 0
+// NO-SANITIZE-WITH-ATTR-NEXT:    ret i64 [[TMP1]]
+//
+// SANITIZE-WITHOUT-ATTR-LABEL: define dso_local range(i64 0, -1) i64 @test11(
+// SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -2
+//
+// NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local range(i64 0, -1) i64 @test11(
+// NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -2
+//
+size_t test11(struct pr151236_struct *p) {
+  return __bdos(p->a) + __bdos((p->a));
+}
+
+// SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -262144, 262137) i64 @test12(
+// SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
+// SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i16, ptr [[COUNTED_BY_GEP]], align 4
+// SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp sgt i16 [[COUNTED_BY_LOAD]], -1
+// SANITIZE-WITH-ATTR-NEXT:    [[COUNT:%.*]] = sext i16 [[COUNTED_BY_LOAD]] to i64
+// SANITIZE-WITH-ATTR-NEXT:    [[ARRAY_SIZE:%.*]] = shl nsw i64 [[COUNT]], 3
+// SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = select i1 [[TMP0]], i64 [[ARRAY_SIZE]], i64 0
+// SANITIZE-WITH-ATTR-NEXT:    ret i64 [[TMP1]]
+//
+// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -262144, 262137) i64 @test12(
+// NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i16, ptr [[COUNTED_BY_GEP]], align 4
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNT:%.*]] = sext i16 [[COUNTED_BY_LOAD]] to i64
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAY_SIZE:%.*]] = shl nsw i64 [[COUNT]], 3
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp sgt i16 [[COUNTED_BY_LOAD]], -1
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = select i1 [[TMP0]], i64 [[ARRAY_SIZE]], i64 0
+// NO-SANITIZE-WITH-ATTR-NEXT:    ret i64 [[TMP1]]
+//
+// SANITIZE-WITHOUT-ATTR-LABEL: define dso_local range(i64 0, -1) i64 @test12(
+// SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -2
+//
+// NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local range(i64 0, -1) i64 @test12(
+// NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -2
+//
+size_t test12(struct pr151236_struct *p) {
+  return __bdos(p->a) + __bdos(((int *)p->a));
+}
diff --git a/clang/test/CodeGen/bounds-checking-debuginfo.c b/clang/test/CodeGen/bounds-checking-debuginfo.c
index 74c0666..bd7aedd 100644
--- a/clang/test/CodeGen/bounds-checking-debuginfo.c
+++ b/clang/test/CodeGen/bounds-checking-debuginfo.c
@@ -25,13 +25,13 @@ void d(double*);
 // CHECK-TRAP-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 10, !dbg [[DBG23]], !nosanitize [[META10]]
 // CHECK-TRAP-NEXT:    br i1 [[TMP1]], label %[[CONT:.*]], label %[[TRAP:.*]], !dbg [[DBG23]], !prof [[PROF27:![0-9]+]], !nosanitize [[META10]]
 // CHECK-TRAP:       [[TRAP]]:
-// CHECK-TRAP-NEXT:    call void @llvm.ubsantrap(i8 18) #[[ATTR3:[0-9]+]], !dbg [[DBG23]], !nosanitize [[META10]]
-// CHECK-TRAP-NEXT:    unreachable, !dbg [[DBG23]], !nosanitize [[META10]]
+// CHECK-TRAP-NEXT:    call void @llvm.ubsantrap(i8 18) #[[ATTR3:[0-9]+]], !dbg [[DBGTRAP:![0-9]+]], !nosanitize [[META10]]
+// CHECK-TRAP-NEXT:    unreachable, !dbg [[DBGTRAP]], !nosanitize [[META10]]
 // CHECK-TRAP:       [[CONT]]:
 // CHECK-TRAP-NEXT:    [[IDXPROM:%.*]] = sext i32 [[CALL]] to i64, !dbg [[DBG26:![0-9]+]]
 // CHECK-TRAP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x double], ptr [[A]], i64 0, i64 [[IDXPROM]], !dbg [[DBG26]]
 // CHECK-TRAP-NEXT:    [[TMP2:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !dbg [[DBG26]]
-// CHECK-TRAP-NEXT:    ret double [[TMP2]], !dbg [[DBG28:![0-9]+]]
+// CHECK-TRAP-NEXT:    ret double [[TMP2]], !dbg [[DBG30:![0-9]+]]
 //
 // CHECK-NOTRAP-LABEL: define dso_local double @f1(
 // CHECK-NOTRAP-SAME: i32 noundef [[B:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG4:![0-9]+]] {
@@ -93,7 +93,9 @@ double f1(int b, int i) {
 // CHECK-TRAP: [[META25]] = !DISubroutineType(types: null)
 // CHECK-TRAP: [[DBG26]] = !DILocation(line: 66, column: 10, scope: [[DBG4]])
 // CHECK-TRAP: [[PROF27]] = !{!"branch_weights", i32 1048575, i32 1}
-// CHECK-TRAP: [[DBG28]] = !DILocation(line: 66, column: 3, scope: [[DBG4]])
+// CHECK-TRAP: [[DBGTRAP]] = !DILocation(line: 0, scope: [[TRAPMSG:![0-9]+]], inlinedAt: [[DBG23]])
+// CHECK-TRAP: [[TRAPMSG]] = distinct !DISubprogram(name: "__clang_trap_msg$Undefined Behavior Sanitizer$Array index out of bounds", scope: [[META5]], file: [[META5]], type: [[META25]], flags: DIFlagArtificial, spFlags: DISPFlagDefinition, unit: [[META0]])
+// CHECK-TRAP: [[DBG30]] = !DILocation(line: 66, column: 3, scope: [[DBG4]])
 //.
 // CHECK-NOTRAP: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C11, file: [[META1:![0-9]+]], isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
 // CHECK-NOTRAP: [[META1]] = !DIFile(filename: "<stdin>", directory: {{.*}})
diff --git a/clang/test/CodeGen/builtin-maximumnum-minimumnum.c b/clang/test/CodeGen/builtin-maximumnum-minimumnum.c
new file mode 100644
index 0000000..ea9d2e7
--- /dev/null
+++ b/clang/test/CodeGen/builtin-maximumnum-minimumnum.c
@@ -0,0 +1,171 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -x c++ -std=c++20 -disable-llvm-passes -O3 -triple x86_64 %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK
+
+typedef _Float16 half8 __attribute__((ext_vector_type(8)));
+typedef __bf16 bf16x8 __attribute__((ext_vector_type(8)));
+typedef float float4 __attribute__((ext_vector_type(4)));
+typedef double double2 __attribute__((ext_vector_type(2)));
+typedef long double ldouble2 __attribute__((ext_vector_type(2)));
+
+// CHECK-LABEL: define dso_local noundef <8 x half> @_Z7pfmin16Dv8_DF16_S_(
+// CHECK-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <8 x half>, align 16
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <8 x half>, align 16
+// CHECK-NEXT:    store <8 x half> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2:![0-9]+]]
+// CHECK-NEXT:    store <8 x half> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x half>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x half>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[ELT_MINIMUMNUM:%.*]] = call <8 x half> @llvm.minimumnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]])
+// CHECK-NEXT:    ret <8 x half> [[ELT_MINIMUMNUM]]
+//
+half8 pfmin16(half8 a, half8 b) {
+	return __builtin_elementwise_minimumnum(a, b);
+}
+// CHECK-LABEL: define dso_local noundef <8 x bfloat> @_Z8pfmin16bDv8_DF16bS_(
+// CHECK-SAME: <8 x bfloat> noundef [[A:%.*]], <8 x bfloat> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <8 x bfloat>, align 16
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <8 x bfloat>, align 16
+// CHECK-NEXT:    store <8 x bfloat> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x bfloat> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[ELT_MINIMUMNUM:%.*]] = call <8 x bfloat> @llvm.minimumnum.v8bf16(<8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]])
+// CHECK-NEXT:    ret <8 x bfloat> [[ELT_MINIMUMNUM]]
+//
+bf16x8 pfmin16b(bf16x8 a, bf16x8 b) {
+	return __builtin_elementwise_minimumnum(a, b);
+}
+// CHECK-LABEL: define dso_local noundef <4 x float> @_Z7pfmin32Dv4_fS_(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT:    store <4 x float> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x float> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[ELT_MINIMUMNUM:%.*]] = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
+// CHECK-NEXT:    ret <4 x float> [[ELT_MINIMUMNUM]]
+//
+float4 pfmin32(float4 a, float4 b) {
+	return __builtin_elementwise_minimumnum(a, b);
+}
+// CHECK-LABEL: define dso_local noundef <2 x double> @_Z7pfmin64Dv2_dS_(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <2 x double>, align 16
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <2 x double>, align 16
+// CHECK-NEXT:    store <2 x double> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <2 x double> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[ELT_MINIMUMNUM:%.*]] = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
+// CHECK-NEXT:    ret <2 x double> [[ELT_MINIMUMNUM]]
+//
+double2 pfmin64(double2 a, double2 b) {
+	return __builtin_elementwise_minimumnum(a, b);
+}
+// CHECK-LABEL: define dso_local noundef <2 x x86_fp80> @_Z7pfmin80Dv2_eS_(
+// CHECK-SAME: ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP0:%.*]], ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32
+// CHECK-NEXT:    [[A:%.*]] = load <2 x x86_fp80>, ptr [[TMP0]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[B:%.*]] = load <2 x x86_fp80>, ptr [[TMP1]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <2 x x86_fp80> [[A]], ptr [[A_ADDR]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <2 x x86_fp80> [[B]], ptr [[B_ADDR]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load <2 x x86_fp80>, ptr [[A_ADDR]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = load <2 x x86_fp80>, ptr [[B_ADDR]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[ELT_MINIMUMNUM:%.*]] = call <2 x x86_fp80> @llvm.minimumnum.v2f80(<2 x x86_fp80> [[TMP2]], <2 x x86_fp80> [[TMP3]])
+// CHECK-NEXT:    ret <2 x x86_fp80> [[ELT_MINIMUMNUM]]
+//
+ldouble2 pfmin80(ldouble2 a, ldouble2 b) {
+	return __builtin_elementwise_minimumnum(a, b);
+}
+
+// CHECK-LABEL: define dso_local noundef <8 x half> @_Z7pfmax16Dv8_DF16_S_(
+// CHECK-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <8 x half>, align 16
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <8 x half>, align 16
+// CHECK-NEXT:    store <8 x half> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x half> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x half>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x half>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[ELT_MAXIMUMNUM:%.*]] = call <8 x half> @llvm.maximumnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]])
+// CHECK-NEXT:    ret <8 x half> [[ELT_MAXIMUMNUM]]
+//
+half8 pfmax16(half8 a, half8 b) {
+	return __builtin_elementwise_maximumnum(a, b);
+}
+// CHECK-LABEL: define dso_local noundef <8 x bfloat> @_Z8pfmax16bDv8_DF16bS_(
+// CHECK-SAME: <8 x bfloat> noundef [[A:%.*]], <8 x bfloat> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <8 x bfloat>, align 16
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <8 x bfloat>, align 16
+// CHECK-NEXT:    store <8 x bfloat> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <8 x bfloat> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[ELT_MAXIMUMNUM:%.*]] = call <8 x bfloat> @llvm.maximumnum.v8bf16(<8 x bfloat> [[TMP0]], <8 x bfloat> [[TMP1]])
+// CHECK-NEXT:    ret <8 x bfloat> [[ELT_MAXIMUMNUM]]
+//
+bf16x8 pfmax16b(bf16x8 a, bf16x8 b) {
+	return __builtin_elementwise_maximumnum(a, b);
+}
+// CHECK-LABEL: define dso_local noundef <4 x float> @_Z7pfmax32Dv4_fS_(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT:    store <4 x float> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <4 x float> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[ELT_MAXIMUMNUM:%.*]] = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
+// CHECK-NEXT:    ret <4 x float> [[ELT_MAXIMUMNUM]]
+//
+float4 pfmax32(float4 a, float4 b) {
+	return __builtin_elementwise_maximumnum(a, b);
+}
+// CHECK-LABEL: define dso_local noundef <2 x double> @_Z7pfmax64Dv2_dS_(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <2 x double>, align 16
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <2 x double>, align 16
+// CHECK-NEXT:    store <2 x double> [[A]], ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <2 x double> [[B]], ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[ELT_MAXIMUMNUM:%.*]] = call <2 x double> @llvm.maximumnum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
+// CHECK-NEXT:    ret <2 x double> [[ELT_MAXIMUMNUM]]
+//
+double2 pfmax64(double2 a, double2 b) {
+	return __builtin_elementwise_maximumnum(a, b);
+}
+
+// CHECK-LABEL: define dso_local noundef <2 x x86_fp80> @_Z7pfmax80Dv2_eS_(
+// CHECK-SAME: ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP0:%.*]], ptr noundef byval(<2 x x86_fp80>) align 32 [[TMP1:%.*]]) #[[ATTR2]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <2 x x86_fp80>, align 32
+// CHECK-NEXT:    [[A:%.*]] = load <2 x x86_fp80>, ptr [[TMP0]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[B:%.*]] = load <2 x x86_fp80>, ptr [[TMP1]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <2 x x86_fp80> [[A]], ptr [[A_ADDR]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    store <2 x x86_fp80> [[B]], ptr [[B_ADDR]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load <2 x x86_fp80>, ptr [[A_ADDR]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = load <2 x x86_fp80>, ptr [[B_ADDR]], align 32, !tbaa [[TBAA2]]
+// CHECK-NEXT:    [[ELT_MINIMUMNUM:%.*]] = call <2 x x86_fp80> @llvm.minimumnum.v2f80(<2 x x86_fp80> [[TMP2]], <2 x x86_fp80> [[TMP3]])
+// CHECK-NEXT:    ret <2 x x86_fp80> [[ELT_MINIMUMNUM]]
+//
+ldouble2 pfmax80(ldouble2 a, ldouble2 b) {
+	return __builtin_elementwise_minimumnum(a, b);
+}
+
+//.
+// CHECK: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
+// CHECK: [[META4]] = !{!"Simple C++ TBAA"}
+//.
diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c
index d8aff82..f201dfe 100644
--- a/clang/test/CodeGen/builtins-wasm.c
+++ b/clang/test/CodeGen/builtins-wasm.c
@@ -1,6 +1,6 @@
-// RUN: %clang_cc1 -triple wasm32-unknown-unknown -target-feature +reference-types -target-feature +simd128 -target-feature +relaxed-simd -target-feature +nontrapping-fptoint -target-feature +exception-handling -target-feature +bulk-memory -target-feature +atomics -target-feature +fp16 -flax-vector-conversions=none -O3 -emit-llvm -o - %s | FileCheck %s -check-prefixes WEBASSEMBLY,WEBASSEMBLY32
-// RUN: %clang_cc1 -triple wasm64-unknown-unknown -target-feature +reference-types -target-feature +simd128 -target-feature +relaxed-simd -target-feature +nontrapping-fptoint -target-feature +exception-handling -target-feature +bulk-memory -target-feature +atomics -target-feature +fp16 -flax-vector-conversions=none -O3 -emit-llvm -o - %s | FileCheck %s -check-prefixes WEBASSEMBLY,WEBASSEMBLY64
-// RUN: not %clang_cc1 -triple wasm64-unknown-unknown -target-feature +reference-types -target-feature +nontrapping-fptoint -target-feature +exception-handling -target-feature +bulk-memory -target-feature +atomics -flax-vector-conversions=none -O3 -emit-llvm -o - %s 2>&1 | FileCheck %s -check-prefixes MISSING-SIMD
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown -target-feature +reference-types -target-feature +simd128 -target-feature +relaxed-simd -target-feature +nontrapping-fptoint -target-feature +exception-handling -target-feature +bulk-memory -target-feature +atomics -target-feature +gc -target-feature +fp16 -flax-vector-conversions=none -O3 -emit-llvm -o - %s | FileCheck %s -check-prefixes WEBASSEMBLY,WEBASSEMBLY32
+// RUN: %clang_cc1 -triple wasm64-unknown-unknown -target-feature +reference-types -target-feature +simd128 -target-feature +relaxed-simd -target-feature +nontrapping-fptoint -target-feature +exception-handling -target-feature +bulk-memory -target-feature +atomics -target-feature +gc  -target-feature +fp16 -flax-vector-conversions=none -O3 -emit-llvm -o - %s | FileCheck %s -check-prefixes WEBASSEMBLY,WEBASSEMBLY64
+// RUN: not %clang_cc1 -triple wasm64-unknown-unknown -target-feature +reference-types -target-feature +nontrapping-fptoint -target-feature +exception-handling -target-feature +bulk-memory -target-feature +atomics -target-feature +gc -flax-vector-conversions=none -O3 -emit-llvm -o - %s 2>&1 | FileCheck %s -check-prefixes MISSING-SIMD
 
 // SIMD convenience types
 typedef signed char i8x16 __attribute((vector_size(16)));
@@ -751,3 +751,24 @@ void *tp (void) {
   return __builtin_thread_pointer ();
   // WEBASSEMBLY: call {{.*}} @llvm.thread.pointer.p0()
 }
+
+typedef void (*Fvoid)(void);
+typedef float (*Ffloats)(float, double, int);
+typedef void (*Fpointers)(Fvoid, Ffloats, void*, int*, int***, char[5]);
+
+void use(int);
+
+void test_function_pointer_signature_void(Fvoid func) {
+  // WEBASSEMBLY:  %0 = tail call i32 (ptr, ...) @llvm.wasm.ref.test.func(ptr %func, token poison)
+  use(__builtin_wasm_test_function_pointer_signature(func));
+}
+
+void test_function_pointer_signature_floats(Ffloats func) {
+  // WEBASSEMBLY:  tail call i32 (ptr, ...) @llvm.wasm.ref.test.func(ptr %func, float 0.000000e+00, token poison, float 0.000000e+00, double 0.000000e+00, i32 0)
+  use(__builtin_wasm_test_function_pointer_signature(func));
+}
+
+void test_function_pointer_signature_pointers(Fpointers func) {
+  // WEBASSEMBLY:  %0 = tail call i32 (ptr, ...) @llvm.wasm.ref.test.func(ptr %func, token poison, ptr null, ptr null, ptr null, ptr null, ptr null, ptr null)
+  use(__builtin_wasm_test_function_pointer_signature(func));
+}
diff --git a/clang/test/CodeGen/cfi-icall-generalize-debuginfo.c b/clang/test/CodeGen/cfi-icall-generalize-debuginfo.c
index 304b605..0ffc2b9 100644
--- a/clang/test/CodeGen/cfi-icall-generalize-debuginfo.c
+++ b/clang/test/CodeGen/cfi-icall-generalize-debuginfo.c
@@ -30,11 +30,11 @@ int** f(const char *a, const char **b) {
 // UNGENERALIZED-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[FP]], metadata !"_ZTSFPPiPKcPS2_E"), !dbg [[DBG34:![0-9]+]], !nosanitize [[META38:![0-9]+]]
 // UNGENERALIZED-NEXT:    br i1 [[TMP0]], label %[[CONT:.*]], label %[[TRAP:.*]], !dbg [[DBG34]], !prof [[PROF39:![0-9]+]], !nosanitize [[META38]]
 // UNGENERALIZED:       [[TRAP]]:
-// UNGENERALIZED-NEXT:    tail call void @llvm.ubsantrap(i8 2) #[[ATTR4:[0-9]+]], !dbg [[DBG34]], !nosanitize [[META38]]
-// UNGENERALIZED-NEXT:    unreachable, !dbg [[DBG34]], !nosanitize [[META38]]
+// UNGENERALIZED-NEXT:    tail call void @llvm.ubsantrap(i8 2) #[[ATTR4:[0-9]+]], !dbg [[DBGTRAP:![0-9]+]], !nosanitize [[META38]]
+// UNGENERALIZED-NEXT:    unreachable, !dbg [[DBGTRAP]], !nosanitize [[META38]]
 // UNGENERALIZED:       [[CONT]]:
 // UNGENERALIZED-NEXT:    [[CALL:%.*]] = tail call ptr [[FP]](ptr noundef null, ptr noundef null) #[[ATTR5:[0-9]+]], !dbg [[DBG37:![0-9]+]]
-// UNGENERALIZED-NEXT:    ret void, !dbg [[DBG40:![0-9]+]]
+// UNGENERALIZED-NEXT:    ret void, !dbg [[DBG42:![0-9]+]]
 //
 // GENERALIZED-LABEL: define dso_local void @g(
 // GENERALIZED-SAME: ptr noundef [[FP:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] !dbg [[DBG25:![0-9]+]] !type [[META31:![0-9]+]] !type [[META32:![0-9]+]] {
@@ -43,11 +43,11 @@ int** f(const char *a, const char **b) {
 // GENERALIZED-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[FP]], metadata !"_ZTSFPvPKvS_E.generalized"), !dbg [[DBG34:![0-9]+]], !nosanitize [[META38:![0-9]+]]
 // GENERALIZED-NEXT:    br i1 [[TMP0]], label %[[CONT:.*]], label %[[TRAP:.*]], !dbg [[DBG34]], !prof [[PROF39:![0-9]+]], !nosanitize [[META38]]
 // GENERALIZED:       [[TRAP]]:
-// GENERALIZED-NEXT:    tail call void @llvm.ubsantrap(i8 2) #[[ATTR4:[0-9]+]], !dbg [[DBG34]], !nosanitize [[META38]]
-// GENERALIZED-NEXT:    unreachable, !dbg [[DBG34]], !nosanitize [[META38]]
+// GENERALIZED-NEXT:    tail call void @llvm.ubsantrap(i8 2) #[[ATTR4:[0-9]+]], !dbg [[DBGTRAP:![0-9]+]], !nosanitize [[META38]]
+// GENERALIZED-NEXT:    unreachable, !dbg [[DBGTRAP]], !nosanitize [[META38]]
 // GENERALIZED:       [[CONT]]:
 // GENERALIZED-NEXT:    [[CALL:%.*]] = tail call ptr [[FP]](ptr noundef null, ptr noundef null) #[[ATTR5:[0-9]+]], !dbg [[DBG37:![0-9]+]]
-// GENERALIZED-NEXT:    ret void, !dbg [[DBG40:![0-9]+]]
+// GENERALIZED-NEXT:    ret void, !dbg [[DBG42:![0-9]+]]
 //
 void g(int** (*fp)(const char *, const char **)) {
   fp(0, 0);
@@ -90,7 +90,9 @@ void g(int** (*fp)(const char *, const char **)) {
 // UNGENERALIZED: [[DBG37]] = !DILocation(line: 53, column: 3, scope: [[DBG25]])
 // UNGENERALIZED: [[META38]] = !{}
 // UNGENERALIZED: [[PROF39]] = !{!"branch_weights", i32 1048575, i32 1}
-// UNGENERALIZED: [[DBG40]] = !DILocation(line: 54, column: 1, scope: [[DBG25]])
+// UNGENERALIZED: [[DBGTRAP]] = !DILocation(line: 0, scope: [[TRAPMSG:![0-9]+]], inlinedAt: [[DBG34]])
+// UNGENERALIZED: [[TRAPMSG]] = distinct !DISubprogram(name: "__clang_trap_msg$Undefined Behavior Sanitizer$Control flow integrity check failed", scope: [[META11]], file: [[META11]], type: [[META36]], flags: DIFlagArtificial, spFlags: DISPFlagDefinition, unit: [[META0]])
+// UNGENERALIZED: [[DBG42]] = !DILocation(line: 54, column: 1, scope: [[DBG25]])
 //.
 // GENERALIZED: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C11, file: [[META1:![0-9]+]], isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, retainedTypes: [[META2:![0-9]+]], splitDebugInlining: false, nameTableKind: None)
 // GENERALIZED: [[META1]] = !DIFile(filename: "{{.*}}<stdin>", directory: {{.*}})
@@ -128,5 +130,7 @@ void g(int** (*fp)(const char *, const char **)) {
 // GENERALIZED: [[DBG37]] = !DILocation(line: 53, column: 3, scope: [[DBG25]])
 // GENERALIZED: [[META38]] = !{}
 // GENERALIZED: [[PROF39]] = !{!"branch_weights", i32 1048575, i32 1}
-// GENERALIZED: [[DBG40]] = !DILocation(line: 54, column: 1, scope: [[DBG25]])
+// GENERALIZED: [[DBGTRAP]] = !DILocation(line: 0, scope: [[TRAPMSG:![0-9]+]], inlinedAt: [[DBG34]])
+// GENERALIZED: [[TRAPMSG]] = distinct !DISubprogram(name: "__clang_trap_msg$Undefined Behavior Sanitizer$Control flow integrity check failed", scope: [[META11]], file: [[META11]], type: [[META36]], flags: DIFlagArtificial, spFlags: DISPFlagDefinition, unit: [[META0]])
+// GENERALIZED: [[DBG42]] = !DILocation(line: 54, column: 1, scope: [[DBG25]])
 //.
diff --git a/clang/test/CodeGen/cfi-icall-normalize2-debuginfo.c b/clang/test/CodeGen/cfi-icall-normalize2-debuginfo.c
index a2f6ee0c..258c3bf 100644
--- a/clang/test/CodeGen/cfi-icall-normalize2-debuginfo.c
+++ b/clang/test/CodeGen/cfi-icall-normalize2-debuginfo.c
@@ -15,50 +15,50 @@
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[FN]], metadata !"_ZTSFvu3i32E.normalized"), !dbg [[DBG21:![0-9]+]], !nosanitize [[META25:![0-9]+]]
 // CHECK-NEXT:    br i1 [[TMP0]], label %[[CONT:.*]], label %[[TRAP:.*]], !dbg [[DBG21]], !prof [[PROF26:![0-9]+]], !nosanitize [[META25]]
 // CHECK:       [[TRAP]]:
-// CHECK-NEXT:    tail call void @llvm.ubsantrap(i8 2) #[[ATTR3:[0-9]+]], !dbg [[DBG21]], !nosanitize [[META25]]
-// CHECK-NEXT:    unreachable, !dbg [[DBG21]], !nosanitize [[META25]]
+// CHECK-NEXT:    tail call void @llvm.ubsantrap(i8 2) #[[ATTR3:[0-9]+]], !dbg [[DBGTRAP:![0-9]+]], !nosanitize [[META25]]
+// CHECK-NEXT:    unreachable, !dbg [[DBGTRAP]], !nosanitize [[META25]]
 // CHECK:       [[CONT]]:
 // CHECK-NEXT:    tail call void [[FN]](i32 noundef [[ARG]]) #[[ATTR4:[0-9]+]], !dbg [[DBG24:![0-9]+]]
-// CHECK-NEXT:    ret void, !dbg [[DBG27:![0-9]+]]
+// CHECK-NEXT:    ret void, !dbg [[DBG29:![0-9]+]]
 //
 void foo(void (*fn)(int), int arg) {
     fn(arg);
 }
 
 // CHECK-LABEL: define dso_local void @bar(
-// CHECK-SAME: ptr noundef [[FN:%.*]], i32 noundef [[ARG1:%.*]], i32 noundef [[ARG2:%.*]]) local_unnamed_addr #[[ATTR0]] !dbg [[DBG28:![0-9]+]] !type [[META38:![0-9]+]] !type [[META39:![0-9]+]] {
+// CHECK-SAME: ptr noundef [[FN:%.*]], i32 noundef [[ARG1:%.*]], i32 noundef [[ARG2:%.*]]) local_unnamed_addr #[[ATTR0]] !dbg [[DBG30:![0-9]+]] !type [[META40:![0-9]+]] !type [[META41:![0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:      #dbg_value(ptr [[FN]], [[META35:![0-9]+]], !DIExpression(), [[META40:![0-9]+]])
-// CHECK-NEXT:      #dbg_value(i32 [[ARG1]], [[META36:![0-9]+]], !DIExpression(), [[META40]])
-// CHECK-NEXT:      #dbg_value(i32 [[ARG2]], [[META37:![0-9]+]], !DIExpression(), [[META40]])
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[FN]], metadata !"_ZTSFvu3i32S_E.normalized"), !dbg [[DBG41:![0-9]+]], !nosanitize [[META25]]
-// CHECK-NEXT:    br i1 [[TMP0]], label %[[CONT:.*]], label %[[TRAP:.*]], !dbg [[DBG41]], !prof [[PROF26]], !nosanitize [[META25]]
+// CHECK-NEXT:      #dbg_value(ptr [[FN]], [[META37:![0-9]+]], !DIExpression(), [[META42:![0-9]+]])
+// CHECK-NEXT:      #dbg_value(i32 [[ARG1]], [[META38:![0-9]+]], !DIExpression(), [[META42]])
+// CHECK-NEXT:      #dbg_value(i32 [[ARG2]], [[META39:![0-9]+]], !DIExpression(), [[META42]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[FN]], metadata !"_ZTSFvu3i32S_E.normalized"), !dbg [[DBG43:![0-9]+]], !nosanitize [[META25]]
+// CHECK-NEXT:    br i1 [[TMP0]], label %[[CONT:.*]], label %[[TRAP:.*]], !dbg [[DBG43]], !prof [[PROF26]], !nosanitize [[META25]]
 // CHECK:       [[TRAP]]:
-// CHECK-NEXT:    tail call void @llvm.ubsantrap(i8 2) #[[ATTR3]], !dbg [[DBG41]], !nosanitize [[META25]]
-// CHECK-NEXT:    unreachable, !dbg [[DBG41]], !nosanitize [[META25]]
+// CHECK-NEXT:    tail call void @llvm.ubsantrap(i8 2) #[[ATTR3]], !dbg [[DBG45:![0-9]+]], !nosanitize [[META25]]
+// CHECK-NEXT:    unreachable, !dbg [[DBG45]], !nosanitize [[META25]]
 // CHECK:       [[CONT]]:
-// CHECK-NEXT:    tail call void [[FN]](i32 noundef [[ARG1]], i32 noundef [[ARG2]]) #[[ATTR4]], !dbg [[DBG42:![0-9]+]]
-// CHECK-NEXT:    ret void, !dbg [[DBG43:![0-9]+]]
+// CHECK-NEXT:    tail call void [[FN]](i32 noundef [[ARG1]], i32 noundef [[ARG2]]) #[[ATTR4]], !dbg [[DBG44:![0-9]+]]
+// CHECK-NEXT:    ret void, !dbg [[DBG46:![0-9]+]]
 //
 void bar(void (*fn)(int, int), int arg1, int arg2) {
     fn(arg1, arg2);
 }
 
 // CHECK-LABEL: define dso_local void @baz(
-// CHECK-SAME: ptr noundef [[FN:%.*]], i32 noundef [[ARG1:%.*]], i32 noundef [[ARG2:%.*]], i32 noundef [[ARG3:%.*]]) local_unnamed_addr #[[ATTR0]] !dbg [[DBG44:![0-9]+]] !type [[META55:![0-9]+]] !type [[META56:![0-9]+]] {
+// CHECK-SAME: ptr noundef [[FN:%.*]], i32 noundef [[ARG1:%.*]], i32 noundef [[ARG2:%.*]], i32 noundef [[ARG3:%.*]]) local_unnamed_addr #[[ATTR0]] !dbg [[DBG47:![0-9]+]] !type [[META58:![0-9]+]] !type [[META59:![0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:      #dbg_value(ptr [[FN]], [[META51:![0-9]+]], !DIExpression(), [[META57:![0-9]+]])
-// CHECK-NEXT:      #dbg_value(i32 [[ARG1]], [[META52:![0-9]+]], !DIExpression(), [[META57]])
-// CHECK-NEXT:      #dbg_value(i32 [[ARG2]], [[META53:![0-9]+]], !DIExpression(), [[META57]])
-// CHECK-NEXT:      #dbg_value(i32 [[ARG3]], [[META54:![0-9]+]], !DIExpression(), [[META57]])
-// CHECK-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[FN]], metadata !"_ZTSFvu3i32S_S_E.normalized"), !dbg [[DBG58:![0-9]+]], !nosanitize [[META25]]
-// CHECK-NEXT:    br i1 [[TMP0]], label %[[CONT:.*]], label %[[TRAP:.*]], !dbg [[DBG58]], !prof [[PROF26]], !nosanitize [[META25]]
+// CHECK-NEXT:      #dbg_value(ptr [[FN]], [[META54:![0-9]+]], !DIExpression(), [[META60:![0-9]+]])
+// CHECK-NEXT:      #dbg_value(i32 [[ARG1]], [[META55:![0-9]+]], !DIExpression(), [[META60]])
+// CHECK-NEXT:      #dbg_value(i32 [[ARG2]], [[META56:![0-9]+]], !DIExpression(), [[META60]])
+// CHECK-NEXT:      #dbg_value(i32 [[ARG3]], [[META57:![0-9]+]], !DIExpression(), [[META60]])
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[FN]], metadata !"_ZTSFvu3i32S_S_E.normalized"), !dbg [[DBG61:![0-9]+]], !nosanitize [[META25]]
+// CHECK-NEXT:    br i1 [[TMP0]], label %[[CONT:.*]], label %[[TRAP:.*]], !dbg [[DBG61]], !prof [[PROF26]], !nosanitize [[META25]]
 // CHECK:       [[TRAP]]:
-// CHECK-NEXT:    tail call void @llvm.ubsantrap(i8 2) #[[ATTR3]], !dbg [[DBG58]], !nosanitize [[META25]]
-// CHECK-NEXT:    unreachable, !dbg [[DBG58]], !nosanitize [[META25]]
+// CHECK-NEXT:    tail call void @llvm.ubsantrap(i8 2) #[[ATTR3]], !dbg [[DBG63:![0-9]+]], !nosanitize [[META25]]
+// CHECK-NEXT:    unreachable, !dbg [[DBG63]], !nosanitize [[META25]]
 // CHECK:       [[CONT]]:
-// CHECK-NEXT:    tail call void [[FN]](i32 noundef [[ARG1]], i32 noundef [[ARG2]], i32 noundef [[ARG3]]) #[[ATTR4]], !dbg [[DBG59:![0-9]+]]
-// CHECK-NEXT:    ret void, !dbg [[DBG60:![0-9]+]]
+// CHECK-NEXT:    tail call void [[FN]](i32 noundef [[ARG1]], i32 noundef [[ARG2]], i32 noundef [[ARG3]]) #[[ATTR4]], !dbg [[DBG62:![0-9]+]]
+// CHECK-NEXT:    ret void, !dbg [[DBG64:![0-9]+]]
 //
 void baz(void (*fn)(int, int, int), int arg1, int arg2, int arg3) {
     fn(arg1, arg2, arg3);
@@ -87,38 +87,42 @@ void baz(void (*fn)(int, int, int), int arg1, int arg2, int arg3) {
 // CHECK: [[DBG24]] = !DILocation(line: 25, column: 5, scope: [[DBG7]])
 // CHECK: [[META25]] = !{}
 // CHECK: [[PROF26]] = !{!"branch_weights", i32 1048575, i32 1}
-// CHECK: [[DBG27]] = !DILocation(line: 26, column: 1, scope: [[DBG7]])
-// CHECK: [[DBG28]] = distinct !DISubprogram(name: "bar", scope: [[META8]], file: [[META8]], line: 43, type: [[META29:![0-9]+]], scopeLine: 43, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META34:![0-9]+]])
-// CHECK: [[META29]] = !DISubroutineType(types: [[META30:![0-9]+]])
-// CHECK: [[META30]] = !{null, [[META31:![0-9]+]], [[META14]], [[META14]]}
-// CHECK: [[META31]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[META32:![0-9]+]], size: 64)
-// CHECK: [[META32]] = !DISubroutineType(types: [[META33:![0-9]+]])
-// CHECK: [[META33]] = !{null, [[META14]], [[META14]]}
-// CHECK: [[META34]] = !{[[META35]], [[META36]], [[META37]]}
-// CHECK: [[META35]] = !DILocalVariable(name: "fn", arg: 1, scope: [[DBG28]], file: [[META8]], line: 43, type: [[META31]])
-// CHECK: [[META36]] = !DILocalVariable(name: "arg1", arg: 2, scope: [[DBG28]], file: [[META8]], line: 43, type: [[META14]])
-// CHECK: [[META37]] = !DILocalVariable(name: "arg2", arg: 3, scope: [[DBG28]], file: [[META8]], line: 43, type: [[META14]])
-// CHECK: [[META38]] = !{i64 0, !"_ZTSFvPFvu3i32S_ES_S_E.normalized"}
-// CHECK: [[META39]] = !{i64 0, !"_ZTSFvPvu3i32S0_E.normalized.generalized"}
-// CHECK: [[META40]] = !DILocation(line: 0, scope: [[DBG28]])
-// CHECK: [[DBG41]] = !DILocation(line: 0, scope: [[META22]], inlinedAt: [[DBG42]])
-// CHECK: [[DBG42]] = !DILocation(line: 44, column: 5, scope: [[DBG28]])
-// CHECK: [[DBG43]] = !DILocation(line: 45, column: 1, scope: [[DBG28]])
-// CHECK: [[DBG44]] = distinct !DISubprogram(name: "baz", scope: [[META8]], file: [[META8]], line: 63, type: [[META45:![0-9]+]], scopeLine: 63, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META50:![0-9]+]])
-// CHECK: [[META45]] = !DISubroutineType(types: [[META46:![0-9]+]])
-// CHECK: [[META46]] = !{null, [[META47:![0-9]+]], [[META14]], [[META14]], [[META14]]}
-// CHECK: [[META47]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[META48:![0-9]+]], size: 64)
+// CHECK: [[DBGTRAP]] = !DILocation(line: 0, scope: [[TRAPMSG:![0-9]+]], inlinedAt: [[DBG21]])
+// CHECK: [[TRAPMSG]] = distinct !DISubprogram(name: "__clang_trap_msg$Undefined Behavior Sanitizer$Control flow integrity check failed", scope: [[META8]], file: [[META8]], type: [[META23]], flags: DIFlagArtificial, spFlags: DISPFlagDefinition, unit: [[META0]])
+// CHECK: [[DBG29]] = !DILocation(line: 26, column: 1, scope: [[DBG7]])
+// CHECK: [[DBG30]] = distinct !DISubprogram(name: "bar", scope: [[META8]], file: [[META8]], line: 43, type: [[META31:![0-9]+]], scopeLine: 43, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META36:![0-9]+]])
+// CHECK: [[META31]] = !DISubroutineType(types: [[META32:![0-9]+]])
+// CHECK: [[META32]] = !{null, [[META33:![0-9]+]], [[META14]], [[META14]]}
+// CHECK: [[META33]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[META34:![0-9]+]], size: 64)
+// CHECK: [[META34]] = !DISubroutineType(types: [[META35:![0-9]+]])
+// CHECK: [[META35]] = !{null, [[META14]], [[META14]]}
+// CHECK: [[META36]] = !{[[META37]], [[META38]], [[META39]]}
+// CHECK: [[META37]] = !DILocalVariable(name: "fn", arg: 1, scope: [[DBG30]], file: [[META8]], line: 43, type: [[META33]])
+// CHECK: [[META38]] = !DILocalVariable(name: "arg1", arg: 2, scope: [[DBG30]], file: [[META8]], line: 43, type: [[META14]])
+// CHECK: [[META39]] = !DILocalVariable(name: "arg2", arg: 3, scope: [[DBG30]], file: [[META8]], line: 43, type: [[META14]])
+// CHECK: [[META40]] = !{i64 0, !"_ZTSFvPFvu3i32S_ES_S_E.normalized"}
+// CHECK: [[META41]] = !{i64 0, !"_ZTSFvPvu3i32S0_E.normalized.generalized"}
+// CHECK: [[META42]] = !DILocation(line: 0, scope: [[DBG30]])
+// CHECK: [[DBG43]] = !DILocation(line: 0, scope: [[META22]], inlinedAt: [[DBG44]])
+// CHECK: [[DBG44]] = !DILocation(line: 44, column: 5, scope: [[DBG30]])
+// CHECK: [[DBG45]] = !DILocation(line: 0, scope: [[TRAPMSG]], inlinedAt: [[DBG43]])
+// CHECK: [[DBG46]] = !DILocation(line: 45, column: 1, scope: [[DBG30]])
+// CHECK: [[DBG47]] = distinct !DISubprogram(name: "baz", scope: [[META8]], file: [[META8]], line: 63, type: [[META48:![0-9]+]], scopeLine: 63, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META53:![0-9]+]])
 // CHECK: [[META48]] = !DISubroutineType(types: [[META49:![0-9]+]])
-// CHECK: [[META49]] = !{null, [[META14]], [[META14]], [[META14]]}
-// CHECK: [[META50]] = !{[[META51]], [[META52]], [[META53]], [[META54]]}
-// CHECK: [[META51]] = !DILocalVariable(name: "fn", arg: 1, scope: [[DBG44]], file: [[META8]], line: 63, type: [[META47]])
-// CHECK: [[META52]] = !DILocalVariable(name: "arg1", arg: 2, scope: [[DBG44]], file: [[META8]], line: 63, type: [[META14]])
-// CHECK: [[META53]] = !DILocalVariable(name: "arg2", arg: 3, scope: [[DBG44]], file: [[META8]], line: 63, type: [[META14]])
-// CHECK: [[META54]] = !DILocalVariable(name: "arg3", arg: 4, scope: [[DBG44]], file: [[META8]], line: 63, type: [[META14]])
-// CHECK: [[META55]] = !{i64 0, !"_ZTSFvPFvu3i32S_S_ES_S_S_E.normalized"}
-// CHECK: [[META56]] = !{i64 0, !"_ZTSFvPvu3i32S0_S0_E.normalized.generalized"}
-// CHECK: [[META57]] = !DILocation(line: 0, scope: [[DBG44]])
-// CHECK: [[DBG58]] = !DILocation(line: 0, scope: [[META22]], inlinedAt: [[DBG59]])
-// CHECK: [[DBG59]] = !DILocation(line: 64, column: 5, scope: [[DBG44]])
-// CHECK: [[DBG60]] = !DILocation(line: 65, column: 1, scope: [[DBG44]])
+// CHECK: [[META49]] = !{null, [[META50:![0-9]+]], [[META14]], [[META14]], [[META14]]}
+// CHECK: [[META50]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[META51:![0-9]+]], size: 64)
+// CHECK: [[META51]] = !DISubroutineType(types: [[META52:![0-9]+]])
+// CHECK: [[META52]] = !{null, [[META14]], [[META14]], [[META14]]}
+// CHECK: [[META53]] = !{[[META54]], [[META55]], [[META56]], [[META57]]}
+// CHECK: [[META54]] = !DILocalVariable(name: "fn", arg: 1, scope: [[DBG47]], file: [[META8]], line: 63, type: [[META50]])
+// CHECK: [[META55]] = !DILocalVariable(name: "arg1", arg: 2, scope: [[DBG47]], file: [[META8]], line: 63, type: [[META14]])
+// CHECK: [[META56]] = !DILocalVariable(name: "arg2", arg: 3, scope: [[DBG47]], file: [[META8]], line: 63, type: [[META14]])
+// CHECK: [[META57]] = !DILocalVariable(name: "arg3", arg: 4, scope: [[DBG47]], file: [[META8]], line: 63, type: [[META14]])
+// CHECK: [[META58]] = !{i64 0, !"_ZTSFvPFvu3i32S_S_ES_S_S_E.normalized"}
+// CHECK: [[META59]] = !{i64 0, !"_ZTSFvPvu3i32S0_S0_E.normalized.generalized"}
+// CHECK: [[META60]] = !DILocation(line: 0, scope: [[DBG47]])
+// CHECK: [[DBG61]] = !DILocation(line: 0, scope: [[META22]], inlinedAt: [[DBG62]])
+// CHECK: [[DBG62]] = !DILocation(line: 64, column: 5, scope: [[DBG47]])
+// CHECK: [[DBG63]] = !DILocation(line: 0, scope: [[TRAPMSG]], inlinedAt: [[DBG61]])
+// CHECK: [[DBG64]] = !DILocation(line: 65, column: 1, scope: [[DBG47]])
 //.
diff --git a/clang/test/CodeGen/ms_struct-long-double.c b/clang/test/CodeGen/ms_struct-long-double.c
index 9b3ea79..eaab217 100644
--- a/clang/test/CodeGen/ms_struct-long-double.c
+++ b/clang/test/CodeGen/ms_struct-long-double.c
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -emit-llvm-only -triple i686-windows-gnu -fdump-record-layouts %s | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm-only -triple i686-windows-cygnus -fdump-record-layouts %s | FileCheck %s
 // RUN: %clang_cc1 -emit-llvm-only -triple i686-linux -fdump-record-layouts -Wno-incompatible-ms-struct %s | FileCheck %s
 // RUN: not %clang_cc1 -emit-llvm-only -triple i686-linux -fdump-record-layouts %s 2>&1 | FileCheck %s -check-prefix=ERROR
 
diff --git a/clang/test/CodeGen/ubsan-trap-debugloc.c b/clang/test/CodeGen/ubsan-trap-debugloc.c
index 87cbfad..2f5258a 100644
--- a/clang/test/CodeGen/ubsan-trap-debugloc.c
+++ b/clang/test/CodeGen/ubsan-trap-debugloc.c
@@ -20,5 +20,8 @@ void bar(volatile int a) __attribute__((optnone)) {
 // CHECK: [[LOC]] = !DILocation(line: 0
 
 // With optimisations disabled the traps are not merged and retain accurate debug locations
-// CHECK: [[LOC2]] = !DILocation(line: 15, column: 9
-// CHECK: [[LOC3]] = !DILocation(line: 16, column: 9
+ // CHECK-DAG: [[SRC2:![0-9]+]] = !DILocation(line: 15, column: 9,
+ // CHECK-DAG: [[SRC3:![0-9]+]] = !DILocation(line: 16, column: 9,
+ // CHECK-DAG: [[LOC2]] = !DILocation(line: 0, scope: [[SCOPE2:![0-9]+]], inlinedAt: [[SRC2]])
+ // CHECK-DAG: [[LOC3]] = !DILocation(line: 0, scope: [[SCOPE3:![0-9]+]], inlinedAt: [[SRC3]])
+
diff --git a/clang/test/CodeGen/ubsan-trap-reason-add-overflow.c b/clang/test/CodeGen/ubsan-trap-reason-add-overflow.c
new file mode 100644
index 0000000..225778d
--- /dev/null
+++ b/clang/test/CodeGen/ubsan-trap-reason-add-overflow.c
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -triple arm64-apple-macosx14.0.0 -O0 -debug-info-kind=standalone -dwarf-version=5 \
+// RUN: -fsanitize=signed-integer-overflow -fsanitize-trap=signed-integer-overflow -emit-llvm %s -o - | FileCheck %s
+
+int add_overflow(int a, int b) { return a + b; }
+
+// CHECK-LABEL: @add_overflow
+// CHECK: call void @llvm.ubsantrap(i8 0) {{.*}}!dbg [[LOC:![0-9]+]]
+// CHECK: [[LOC]] = !DILocation(line: 0, scope: [[MSG:![0-9]+]], {{.+}})
+// CHECK: [[MSG]] = distinct !DISubprogram(name: "__clang_trap_msg$Undefined Behavior Sanitizer$Integer addition overflowed"
diff --git a/clang/test/CodeGen/ubsan-trap-reason-alignment-assumption.c b/clang/test/CodeGen/ubsan-trap-reason-alignment-assumption.c
new file mode 100644
index 0000000..3247ceb
--- /dev/null
+++ b/clang/test/CodeGen/ubsan-trap-reason-alignment-assumption.c
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 -triple arm64-apple-macosx14.0.0 -O0 -debug-info-kind=standalone -dwarf-version=5 \
+// RUN: -fsanitize=alignment -fsanitize-trap=alignment -emit-llvm %s -o - | FileCheck %s
+
+#include <stdint.h>
+int32_t *get_int(void) __attribute__((assume_aligned(16)));
+
+void retrieve_int(void) {
+  int *i = get_int();
+  *i = 7;
+}
+
+// CHECK-LABEL: @retrieve_int
+// CHECK: call void @llvm.ubsantrap(i8 23) {{.*}}!dbg [[LOC:![0-9]+]]
+// CHECK: [[LOC]] = !DILocation(line: 0, scope: [[MSG:![0-9]+]], {{.+}})
+// CHECK: [[MSG]] = distinct !DISubprogram(name: "__clang_trap_msg$Undefined Behavior Sanitizer$Alignment assumption violated"
diff --git a/clang/test/CodeGen/ubsan-trap-reason-builtin-unreachable.c b/clang/test/CodeGen/ubsan-trap-reason-builtin-unreachable.c
new file mode 100644
index 0000000..97bd690
--- /dev/null
+++ b/clang/test/CodeGen/ubsan-trap-reason-builtin-unreachable.c
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -triple arm64-apple-macosx14.0.0 -O0 -debug-info-kind=standalone -dwarf-version=5 \
+// RUN: -fsanitize=unreachable -fsanitize-trap=unreachable -emit-llvm %s -o - | FileCheck %s
+
+int call_builtin_unreachable(void) { __builtin_unreachable(); }
+
+// CHECK-LABEL: @call_builtin_unreachable
+// CHECK: call void @llvm.ubsantrap(i8 1) {{.*}}!dbg [[LOC:![0-9]+]]
+// CHECK: [[LOC]] = !DILocation(line: 0, scope: [[MSG:![0-9]+]], {{.+}})
+// CHECK: [[MSG]] = distinct !DISubprogram(name: "__clang_trap_msg$Undefined Behavior Sanitizer$_builtin_unreachable(), execution reached an unreachable program point"
diff --git a/clang/test/CodeGen/ubsan-trap-reason-cfi-check-fail.c b/clang/test/CodeGen/ubsan-trap-reason-cfi-check-fail.c
new file mode 100644
index 0000000..9304f51
--- /dev/null
+++ b/clang/test/CodeGen/ubsan-trap-reason-cfi-check-fail.c
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -triple arm64-apple-macosx14.0.0 -O0 -debug-info-kind=standalone -dwarf-version=5 \
+// RUN: -fsanitize=cfi-icall -fsanitize-trap=cfi-icall -emit-llvm %s -o - | FileCheck %s
+
+typedef int (*fp_t)(int);
+
+int good(int x) { return x + 1; }
+
+int bad(void) { return 0; }
+
+int cfi_trigger(int a) {
+  fp_t p = good;
+  int r1 = p(a);
+
+  p = (fp_t)(void *)bad;
+  int r2 = p(a);
+
+  return r1 + r2;
+}
+
+// CHECK-LABEL: @good
+// CHECK-LABEL: @bad
+// CHECK-LABEL: @cfi_trigger
+// CHECK: call void @llvm.ubsantrap(i8 2) {{.*}}!dbg [[LOC:![0-9]+]]
+// CHECK: [[LOC]] = !DILocation(line: 0, scope: [[MSG:![0-9]+]], {{.+}})
+// CHECK: [[MSG]] = distinct !DISubprogram(name: "__clang_trap_msg$Undefined Behavior Sanitizer$Control flow integrity check failed"
diff --git a/clang/test/CodeGen/ubsan-trap-reason-crash.cpp b/clang/test/CodeGen/ubsan-trap-reason-crash.cpp
new file mode 100644
index 0000000..6add9bf
--- /dev/null
+++ b/clang/test/CodeGen/ubsan-trap-reason-crash.cpp
@@ -0,0 +1,20 @@
+// FIXME: We should emit a trap message for this case too.
+// But sometimes Clang will emit a ubsan trap into the prologue of a function,
+// at which point the debug-info locations haven't been set up yet and
+// can't hook up our artificial inline frame. [Issue #150707]
+
+// RUN: %clang_cc1 -triple arm64-apple-macosx14.0.0 -O0 -debug-info-kind=standalone -dwarf-version=5 \
+// RUN: -fsanitize=null -fsanitize-trap=null -emit-llvm %s -o - | FileCheck %s
+
+struct Foo {
+  void target() {}
+} f;
+
+void caller() {
+  f.target();
+}
+
+
+// CHECK-LABEL: @_Z6callerv
+// CHECK: call void @llvm.ubsantrap(i8 22){{.*}}!nosanitize
+// CHECK-NOT: __clang_trap_msg
diff --git a/clang/test/CodeGen/ubsan-trap-reason-div-rem-overflow.c b/clang/test/CodeGen/ubsan-trap-reason-div-rem-overflow.c
new file mode 100644
index 0000000..d0b21dd
--- /dev/null
+++ b/clang/test/CodeGen/ubsan-trap-reason-div-rem-overflow.c
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -triple arm64-apple-macosx14.0.0 -O0 -debug-info-kind=standalone -dwarf-version=5 \
+// RUN: -fsanitize=signed-integer-overflow -fsanitize-trap=signed-integer-overflow -emit-llvm %s -o - | FileCheck %s
+
+int div_rem_overflow(int a, int b) { return a / b; }
+
+// CHECK-LABEL: @div_rem_overflow
+// CHECK: call void @llvm.ubsantrap(i8 3) {{.*}}!dbg [[LOC:![0-9]+]]
+// CHECK: [[LOC]] = !DILocation(line: 0, scope: [[MSG:![0-9]+]], {{.+}})
+// CHECK: [[MSG]] = distinct !DISubprogram(name: "__clang_trap_msg$Undefined Behavior Sanitizer$Integer divide or remainder overflowed"
diff --git a/clang/test/CodeGen/ubsan-trap-reason-dynamic-type-cache-miss.cpp b/clang/test/CodeGen/ubsan-trap-reason-dynamic-type-cache-miss.cpp
new file mode 100644
index 0000000..f89fbdcf
--- /dev/null
+++ b/clang/test/CodeGen/ubsan-trap-reason-dynamic-type-cache-miss.cpp
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -triple arm64-apple-macosx14.0.0 -O0 -debug-info-kind=standalone -dwarf-version=5 \
+// RUN: -fsanitize=vptr -fsanitize-trap=vptr -emit-llvm %s -o - | FileCheck %s
+
+struct A {
+  virtual void foo();
+};
+struct B {
+  virtual void bar();
+};
+
+void A::foo() {}
+void B::bar() {}
+
+int dynamic_type_cache_miss() {
+  B b;
+  A &a = reinterpret_cast<A &>(b);
+  a.foo();
+  return 0;
+}
+
+// CHECK-LABEL: @_ZN1A3fooEv
+// CHECK-LABEL: @_ZN1B3barEv
+// CHECK-LABEL: @_Z23dynamic_type_cache_missv
+// CHECK: call void @llvm.ubsantrap(i8 4) {{.*}}!dbg [[LOC:![0-9]+]]
+// CHECK: [[LOC]] = !DILocation(line: 0, scope: [[MSG:![0-9]+]], {{.+}})
+// CHECK: [[MSG]] = distinct !DISubprogram(name: "__clang_trap_msg$Undefined Behavior Sanitizer$Dynamic type cache miss, member call made on an object whose dynamic type differs from the expected type"
diff --git a/clang/test/CodeGen/ubsan-trap-reason-flag.c b/clang/test/CodeGen/ubsan-trap-reason-flag.c
new file mode 100644
index 0000000..5cc16d1
--- /dev/null
+++ b/clang/test/CodeGen/ubsan-trap-reason-flag.c
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -triple arm64-apple-macosx14.0.0 -O0 -debug-info-kind=standalone -dwarf-version=5 \
+// RUN: -fsanitize=signed-integer-overflow -fsanitize-trap=signed-integer-overflow -emit-llvm %s -o - \
+// RUN: | FileCheck %s --check-prefix=ANNOTATE
+
+// RUN: %clang_cc1 -triple arm64-apple-macosx14.0.0 -O0 -debug-info-kind=standalone -dwarf-version=5 \
+// RUN: -fsanitize=signed-integer-overflow -fsanitize-trap=signed-integer-overflow \
+// RUN: -fsanitize-debug-trap-reasons -emit-llvm %s -o - | FileCheck %s --check-prefix=ANNOTATE
+
+// RUN: %clang_cc1 -triple arm64-apple-macosx14.0.0 -O0 -debug-info-kind=standalone -dwarf-version=5 \
+// RUN: -fsanitize=signed-integer-overflow -fsanitize-trap=signed-integer-overflow \
+// RUN: -fno-sanitize-debug-trap-reasons -emit-llvm %s -o - | FileCheck %s --check-prefix=NO-ANNOTATE
+
+int add_overflow(int a, int b) { return a + b; }
+
+// ANNOTATE-LABEL: @add_overflow
+// ANNOTATE: call void @llvm.ubsantrap(i8 0) {{.*}}!dbg [[LOC:![0-9]+]]
+// ANNOTATE: [[LOC]] = !DILocation(line: 0, scope: [[MSG:![0-9]+]], {{.+}})
+// ANNOTATE: [[MSG]] = distinct !DISubprogram(name: "__clang_trap_msg$Undefined Behavior Sanitizer$Integer addition overflowed"
+
+// NO-ANNOTATE-LABEL: @add_overflow
+// NO-ANNOTATE: call void @llvm.ubsantrap(i8 0) {{.*}}!dbg [[LOC:![0-9]+]]
+// NO-ANNOTATE-NOT: __clang_trap_msg
diff --git a/clang/test/CodeGen/ubsan-trap-reason-float-cast-overflow.c b/clang/test/CodeGen/ubsan-trap-reason-float-cast-overflow.c
new file mode 100644
index 0000000..079a191e
--- /dev/null
+++ b/clang/test/CodeGen/ubsan-trap-reason-float-cast-overflow.c
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -triple arm64-apple-macosx14.0.0 -O0 -debug-info-kind=standalone -dwarf-version=5 \
+// RUN: -fsanitize=float-cast-overflow -fsanitize-trap=float-cast-overflow -emit-llvm %s -o - | FileCheck %s
+
+int float_cast_overflow(float x) { return (int)x; }
+
+// CHECK-LABEL: @float_cast_overflow
+// CHECK: call void @llvm.ubsantrap(i8 5) {{.*}}!dbg [[LOC:![0-9]+]]
+// CHECK: [[LOC]] = !DILocation(line: 0, scope: [[MSG:![0-9]+]], {{.+}})
+// CHECK: [[MSG]] = distinct !DISubprogram(name: "__clang_trap_msg$Undefined Behavior Sanitizer$Floating-point to integer conversion overflowed"
diff --git a/clang/test/CodeGen/ubsan-trap-reason-function-type-mismatch.c b/clang/test/CodeGen/ubsan-trap-reason-function-type-mismatch.c
new file mode 100644
index 0000000..1727f9c
--- /dev/null
+++ b/clang/test/CodeGen/ubsan-trap-reason-function-type-mismatch.c
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -triple arm64-apple-macosx14.0.0 -O0 -debug-info-kind=standalone -dwarf-version=5 \
+// RUN: -fsanitize=function -fsanitize-trap=function -emit-llvm %s -o - | FileCheck %s
+
+void target(void) {}
+
+int function_type_mismatch(void) {
+  int (*fp_int)(int);
+
+  fp_int = (int (*)(int))(void *)target;
+
+  return fp_int(42);
+}
+
+// CHECK-LABEL: @target
+// CHECK-LABEL: @function_type_mismatch
+// CHECK: call void @llvm.ubsantrap(i8 6) {{.*}}!dbg [[LOC:![0-9]+]]
+// CHECK: [[LOC]] = !DILocation(line: 0, scope: [[MSG:![0-9]+]], {{.+}})
+// CHECK: [[MSG]] = distinct !DISubprogram(name: "__clang_trap_msg$Undefined Behavior Sanitizer$Function called with mismatched signature"
diff --git a/clang/test/CodeGen/ubsan-trap-reason-implicit-conversion.c b/clang/test/CodeGen/ubsan-trap-reason-implicit-conversion.c
new file mode 100644
index 0000000..43c091d
--- /dev/null
+++ b/clang/test/CodeGen/ubsan-trap-reason-implicit-conversion.c
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 -triple arm64-apple-macosx14.0.0 -O0 -debug-info-kind=standalone -dwarf-version=5 \
+// RUN: -fsanitize=implicit-unsigned-integer-truncation -fsanitize-trap=implicit-unsigned-integer-truncation -emit-llvm %s -o - | FileCheck %s
+
+unsigned long long big;
+
+unsigned implicit_conversion(void) { return big; }
+
+// CHECK-LABEL: @implicit_conversion
+// CHECK: call void @llvm.ubsantrap(i8 7) {{.*}}!dbg [[LOC:![0-9]+]]
+// CHECK: [[LOC]] = !DILocation(line: 0, scope: [[MSG:![0-9]+]], {{.+}})
+// CHECK: [[MSG]] = distinct !DISubprogram(name: "__clang_trap_msg$Undefined Behavior Sanitizer$Implicit integer conversion overflowed or lost data"
diff --git a/clang/test/CodeGen/ubsan-trap-reason-invalid-builtin.c b/clang/test/CodeGen/ubsan-trap-reason-invalid-builtin.c
new file mode 100644
index 0000000..56cf674
--- /dev/null
+++ b/clang/test/CodeGen/ubsan-trap-reason-invalid-builtin.c
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -triple arm64-apple-macosx14.0.0 -O0 -debug-info-kind=standalone -dwarf-version=5 \
+// RUN: -fsanitize=builtin -fsanitize-trap=builtin -emit-llvm %s -o - | FileCheck %s
+
+unsigned invalid_builtin(unsigned x) { return __builtin_clz(x); }
+
+// CHECK-LABEL: @invalid_builtin
+// CHECK: call void @llvm.ubsantrap(i8 8) {{.*}}!dbg [[LOC:![0-9]+]]
+// CHECK: [[LOC]] = !DILocation(line: 0, scope: [[MSG:![0-9]+]], {{.+}})
+// CHECK: [[MSG]] = distinct !DISubprogram(name: "__clang_trap_msg$Undefined Behavior Sanitizer$Invalid use of builtin function"
diff --git a/clang/test/CodeGen/ubsan-trap-reason-invalid-objc-cast.m b/clang/test/CodeGen/ubsan-trap-reason-invalid-objc-cast.m
new file mode 100644
index 0000000..ed2d5ff
--- /dev/null
+++ b/clang/test/CodeGen/ubsan-trap-reason-invalid-objc-cast.m
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -triple arm64-apple-macosx14.0.0 -O0 -debug-info-kind=standalone -dwarf-version=5 \
+// RUN: -fsanitize=objc-cast -fsanitize-trap=objc-cast -emit-llvm %s -o - | FileCheck %s
+
+@interface NSFastEnumerationState
+@end
+
+#define NSUInteger unsigned int
+
+@interface NSArray
++(NSArray*) arrayWithObjects: (id) first, ...;
+- (NSUInteger) countByEnumeratingWithState:(NSFastEnumerationState *) state 
+                                   objects:(id[]) buffer 
+                                     count:(NSUInteger) len;
+-(unsigned) count;
+@end
+@interface NSString
+-(const char*) cString;
+@end
+
+void receive_NSString(NSString*);
+
+void t0(void) {
+  NSArray *array = [NSArray arrayWithObjects: @"0", @"1", (void*)0];
+  for (NSString *i in array) {
+    receive_NSString(i);
+  }
+}
+
+// CHECK-LABEL: @t0
+// CHECK: call void @llvm.ubsantrap(i8 9) {{.*}}!dbg [[LOC:![0-9]+]]
+// CHECK: [[LOC]] = !DILocation(line: 0, scope: [[MSG:![0-9]+]], {{.+}})
+// CHECK: [[MSG]] = distinct !DISubprogram(name: "__clang_trap_msg$Undefined Behavior Sanitizer$Invalid Objective-C cast"
diff --git a/clang/test/CodeGen/ubsan-trap-reason-load-invalid-value.c b/clang/test/CodeGen/ubsan-trap-reason-load-invalid-value.c
new file mode 100644
index 0000000..4aad832
--- /dev/null
+++ b/clang/test/CodeGen/ubsan-trap-reason-load-invalid-value.c
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -triple arm64-apple-macosx14.0.0 -O0 -debug-info-kind=standalone -dwarf-version=5 \
+// RUN: -fsanitize=bool -fsanitize-trap=bool -emit-llvm %s -o - | FileCheck %s
+
+#include <stdbool.h>
+
+unsigned char bad_byte;
+
+bool load_invalid_value(void) { return *((bool *)&bad_byte); }
+
+// CHECK-LABEL: @load_invalid_value
+// CHECK: call void @llvm.ubsantrap(i8 10) {{.*}}!dbg [[LOC:![0-9]+]]
+// CHECK: [[LOC]] = !DILocation(line: 0, scope: [[MSG:![0-9]+]], {{.+}})
+// CHECK: [[MSG]] = distinct !DISubprogram(name: "__clang_trap_msg$Undefined Behavior Sanitizer$Loaded an invalid or uninitialized value for the type"
diff --git a/clang/test/CodeGen/ubsan-trap-reason-missing-return.cpp b/clang/test/CodeGen/ubsan-trap-reason-missing-return.cpp
new file mode 100644
index 0000000..2818d9d
--- /dev/null
+++ b/clang/test/CodeGen/ubsan-trap-reason-missing-return.cpp
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -triple arm64-apple-macosx14.0.0 -O0 -debug-info-kind=standalone -dwarf-version=5 \
+// RUN: -fsanitize=return -fsanitize-trap=return -emit-llvm %s -o - | FileCheck %s
+
+int missing_return(int x) {
+  if (x > 0)
+    return x;
+}
+
+// CHECK-LABEL: @_Z14missing_return
+// CHECK: call void @llvm.ubsantrap(i8 11) {{.*}}!dbg [[LOC:![0-9]+]]
+// CHECK: [[LOC]] = !DILocation(line: 0, scope: [[MSG:![0-9]+]], {{.+}})
+// CHECK: [[MSG]] = distinct !DISubprogram(name: "__clang_trap_msg$Undefined Behavior Sanitizer$Execution reached the end of a value-returning function without returning a value"
diff --git a/clang/test/CodeGen/ubsan-trap-reason-mul-overflow.c b/clang/test/CodeGen/ubsan-trap-reason-mul-overflow.c
new file mode 100644
index 0000000..cf9a0b4
--- /dev/null
+++ b/clang/test/CodeGen/ubsan-trap-reason-mul-overflow.c
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -triple arm64-apple-macosx14.0.0 -O0 -debug-info-kind=standalone -dwarf-version=5 \
+// RUN: -fsanitize=signed-integer-overflow -fsanitize-trap=signed-integer-overflow -emit-llvm %s -o - | FileCheck %s
+
+int mul_overflow(int a, int b) { return a * b; }
+
+// CHECK-LABEL: @mul_overflow
+// CHECK: call void @llvm.ubsantrap(i8 12) {{.*}}!dbg [[LOC:![0-9]+]]
+// CHECK: [[LOC]] = !DILocation(line: 0, scope: [[MSG:![0-9]+]], {{.+}})
+// CHECK: [[MSG]] = distinct !DISubprogram(name: "__clang_trap_msg$Undefined Behavior Sanitizer$Integer multiplication overflowed"
diff --git a/clang/test/CodeGen/ubsan-trap-reason-negate-overflow.c b/clang/test/CodeGen/ubsan-trap-reason-negate-overflow.c
new file mode 100644
index 0000000..5534679
--- /dev/null
+++ b/clang/test/CodeGen/ubsan-trap-reason-negate-overflow.c
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -triple arm64-apple-macosx14.0.0 -O0 -debug-info-kind=standalone -dwarf-version=5 \
+// RUN: -fsanitize=signed-integer-overflow -fsanitize-trap=signed-integer-overflow -emit-llvm %s -o - | FileCheck %s
+
+int negate_overflow() {
+  int x;
+  return -x;
+}
+
+// CHECK-LABEL: @negate_overflow
+// CHECK: call void @llvm.ubsantrap(i8 13) {{.*}}!dbg [[LOC:![0-9]+]]
+// CHECK: [[LOC]] = !DILocation(line: 0, scope: [[MSG:![0-9]+]], {{.+}})
+// CHECK: [[MSG]] = distinct !DISubprogram(name: "__clang_trap_msg$Undefined Behavior Sanitizer$Integer negation overflowed"
diff --git a/clang/test/CodeGen/ubsan-trap-reason-nonnull-arg.c b/clang/test/CodeGen/ubsan-trap-reason-nonnull-arg.c
new file mode 100644
index 0000000..1f0f450
--- /dev/null
+++ b/clang/test/CodeGen/ubsan-trap-reason-nonnull-arg.c
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -triple arm64-apple-macosx14.0.0 -O0 -debug-info-kind=standalone -dwarf-version=5 \
+// RUN: -fsanitize=nonnull-attribute -fsanitize-trap=nonnull-attribute -emit-llvm %s -o - | FileCheck %s
+
+__attribute__((nonnull)) void nonnull_arg(int *p) { (void)p; }
+
+void trigger_nonnull_arg() { nonnull_arg(0); }
+
+// CHECK-LABEL: @nonnull_arg
+// CHECK-LABEL: @trigger_nonnull_arg
+// CHECK: call void @llvm.ubsantrap(i8 16) {{.*}}!dbg [[LOC:![0-9]+]]
+// CHECK: [[LOC]] = !DILocation(line: 0, scope: [[MSG:![0-9]+]], {{.+}})
+// CHECK: [[MSG]] = distinct !DISubprogram(name: "__clang_trap_msg$Undefined Behavior Sanitizer$Passing null pointer as an argument which is declared to never be null"
diff --git a/clang/test/CodeGen/ubsan-trap-reason-nonnull-return.c b/clang/test/CodeGen/ubsan-trap-reason-nonnull-return.c
new file mode 100644
index 0000000..1197b4d
--- /dev/null
+++ b/clang/test/CodeGen/ubsan-trap-reason-nonnull-return.c
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -triple arm64-apple-macosx14.0.0 -O0 -debug-info-kind=standalone -dwarf-version=5 \
+// RUN: -fsanitize=returns-nonnull-attribute -fsanitize-trap=returns-nonnull-attribute -emit-llvm %s -o - | FileCheck %s
+
+__attribute__((returns_nonnull)) int *must_return_nonnull(int bad) {
+  if (bad)
+    return 0;
+  static int x = 1;
+  return &x;
+}
+
+// CHECK-LABEL: @must_return_nonnull
+// CHECK: call void @llvm.ubsantrap(i8 17) {{.*}}!dbg [[LOC:![0-9]+]]
+// CHECK: [[LOC]] = !DILocation(line: 0, scope: [[MSG:![0-9]+]], {{.+}})
+// CHECK: [[MSG]] = distinct !DISubprogram(name: "__clang_trap_msg$Undefined Behavior Sanitizer$Returning null pointer from a function which is declared to never return null"
diff --git a/clang/test/CodeGen/ubsan-trap-reason-nullability-arg.c b/clang/test/CodeGen/ubsan-trap-reason-nullability-arg.c
new file mode 100644
index 0000000..2bc71de
--- /dev/null
+++ b/clang/test/CodeGen/ubsan-trap-reason-nullability-arg.c
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -triple arm64-apple-macosx14.0.0 -O0 -debug-info-kind=standalone -dwarf-version=5 \
+// RUN: -fsanitize=nullability-arg -fsanitize-trap=nullability-arg -emit-llvm %s -o - | FileCheck %s
+
+#include <stddef.h>
+
+int nullability_arg(int *_Nonnull p) { return *p; }
+
+int trigger_nullability_arg(void) { return nullability_arg(NULL); }
+
+// CHECK-LABEL: @nullability_arg
+// CHECK-LABEL: @trigger_nullability_arg
+// CHECK: call void @llvm.ubsantrap(i8 14) {{.*}}!dbg [[LOC:![0-9]+]]
+// CHECK: [[LOC]] = !DILocation(line: 0, scope: [[MSG:![0-9]+]], {{.+}})
+// CHECK: [[MSG]] = distinct !DISubprogram(name: "__clang_trap_msg$Undefined Behavior Sanitizer$Passing null as an argument which is annotated with _Nonnull"
diff --git a/clang/test/CodeGen/ubsan-trap-reason-nullability-return.c b/clang/test/CodeGen/ubsan-trap-reason-nullability-return.c
new file mode 100644
index 0000000..3d64c5a
--- /dev/null
+++ b/clang/test/CodeGen/ubsan-trap-reason-nullability-return.c
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -triple arm64-apple-macosx14.0.0 -O0 -debug-info-kind=standalone -dwarf-version=5 \
+// RUN: -fsanitize=nullability-return -fsanitize-trap=nullability-return -emit-llvm %s -o - | FileCheck %s
+
+#include <stdbool.h>
+#include <stddef.h>
+
+int *_Nonnull nullability_return(bool fail) {
+  if (fail)
+    return NULL;
+
+  static int x = 0;
+  return &x;
+}
+
+// CHECK-LABEL: @nullability_return
+// CHECK: call void @llvm.ubsantrap(i8 15) {{.*}}!dbg [[LOC:![0-9]+]]
+// CHECK: [[LOC]] = !DILocation(line: 0, scope: [[MSG:![0-9]+]], {{.+}})
+// CHECK: [[MSG]] = distinct !DISubprogram(name: "__clang_trap_msg$Undefined Behavior Sanitizer$Returning null from a function with a return type annotated with _Nonnull"
diff --git a/clang/test/CodeGen/ubsan-trap-reason-out-of-bounds.c b/clang/test/CodeGen/ubsan-trap-reason-out-of-bounds.c
new file mode 100644
index 0000000..979886d
--- /dev/null
+++ b/clang/test/CodeGen/ubsan-trap-reason-out-of-bounds.c
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -triple arm64-apple-macosx14.0.0 -O0 -debug-info-kind=standalone -dwarf-version=5 \
+// RUN: -fsanitize=array-bounds -fsanitize-trap=array-bounds -emit-llvm %s -o - | FileCheck %s
+
+int out_of_bounds() {
+  int a[1] = {0};
+  return a[1];
+}
+
+// CHECK-LABEL: @out_of_bounds
+// CHECK: call void @llvm.ubsantrap(i8 18) {{.*}}!dbg [[LOC:![0-9]+]]
+// CHECK: [[LOC]] = !DILocation(line: 0, scope: [[MSG:![0-9]+]], {{.+}})
+// CHECK: [[MSG]] = distinct !DISubprogram(name: "__clang_trap_msg$Undefined Behavior Sanitizer$Array index out of bounds"
diff --git a/clang/test/CodeGen/ubsan-trap-reason-pointer-overflow.c b/clang/test/CodeGen/ubsan-trap-reason-pointer-overflow.c
new file mode 100644
index 0000000..41cb487
--- /dev/null
+++ b/clang/test/CodeGen/ubsan-trap-reason-pointer-overflow.c
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -triple arm64-apple-macosx14.0.0 -O0 -debug-info-kind=standalone -dwarf-version=5 \
+// RUN: -fsanitize=pointer-overflow -fsanitize-trap=pointer-overflow -emit-llvm %s -o - | FileCheck %s
+
+#include <stddef.h>
+#include <stdint.h>
+
+int *pointer_overflow(void) {
+  int buf[4];
+  volatile size_t n = (SIZE_MAX / sizeof(int)) - 1;
+  return buf + n;
+}
+
+// CHECK-LABEL: @pointer_overflow
+// CHECK: call void @llvm.ubsantrap(i8 19) {{.*}}!dbg [[LOC:![0-9]+]]
+// CHECK: [[LOC]] = !DILocation(line: 0, scope: [[MSG:![0-9]+]], {{.+}})
+// CHECK: [[MSG]] = distinct !DISubprogram(name: "__clang_trap_msg$Undefined Behavior Sanitizer$Pointer arithmetic overflowed bounds"
diff --git a/clang/test/CodeGen/ubsan-trap-reason-shift-out-of-bounds.c b/clang/test/CodeGen/ubsan-trap-reason-shift-out-of-bounds.c
new file mode 100644
index 0000000..1a7465d
--- /dev/null
+++ b/clang/test/CodeGen/ubsan-trap-reason-shift-out-of-bounds.c
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -triple arm64-apple-macosx14.0.0 -O0 -debug-info-kind=standalone -dwarf-version=5 \
+// RUN: -fsanitize=shift-base -fsanitize-trap=shift-base -emit-llvm %s -o - | FileCheck %s
+
+int shift_out_of_bounds(void) {
+  int sh = 32;
+  return 1 << sh;
+}
+
+// CHECK-LABEL: @shift_out_of_bounds
+// CHECK: call void @llvm.ubsantrap(i8 20) {{.*}}!dbg [[LOC:![0-9]+]]
+// CHECK: [[LOC]] = !DILocation(line: 0, scope: [[MSG:![0-9]+]], {{.+}})
+// CHECK: [[MSG]] = distinct !DISubprogram(name: "__clang_trap_msg$Undefined Behavior Sanitizer$Shift exponent is too large for the type"
diff --git a/clang/test/CodeGen/ubsan-trap-reason-sub-overflow.c b/clang/test/CodeGen/ubsan-trap-reason-sub-overflow.c
new file mode 100644
index 0000000..62aa7fc
--- /dev/null
+++ b/clang/test/CodeGen/ubsan-trap-reason-sub-overflow.c
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -triple arm64-apple-macosx14.0.0 -O0 -debug-info-kind=standalone -dwarf-version=5 \
+// RUN: -fsanitize=signed-integer-overflow -fsanitize-trap=signed-integer-overflow -emit-llvm %s -o - | FileCheck %s
+
+int sub_overflow(int a, int b) { return a - b; }
+
+// CHECK-LABEL: @sub_overflow
+// CHECK: call void @llvm.ubsantrap(i8 21) {{.*}}!dbg [[LOC:![0-9]+]]
+// CHECK: [[LOC]] = !DILocation(line: 0, scope: [[MSG:![0-9]+]], {{.+}})
+// CHECK: [[MSG]] = distinct !DISubprogram(name: "__clang_trap_msg$Undefined Behavior Sanitizer$Integer subtraction overflowed"
diff --git a/clang/test/CodeGen/ubsan-trap-reason-type-mismatch.c b/clang/test/CodeGen/ubsan-trap-reason-type-mismatch.c
new file mode 100644
index 0000000..802ec91
--- /dev/null
+++ b/clang/test/CodeGen/ubsan-trap-reason-type-mismatch.c
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -triple arm64-apple-macosx14.0.0 -O0 -debug-info-kind=standalone -dwarf-version=5 \
+// RUN: -fsanitize=alignment -fsanitize-trap=alignment -emit-llvm %s -o - | FileCheck %s
+
+int type_mismatch(int *p) { return *p; }
+
+// CHECK-LABEL: @type_mismatch
+// CHECK: call void @llvm.ubsantrap(i8 22) {{.*}}!dbg [[LOC:![0-9]+]]
+// CHECK: [[LOC]] = !DILocation(line: 0, scope: [[MSG:![0-9]+]], {{.+}})
+// CHECK: [[MSG]] = distinct !DISubprogram(name: "__clang_trap_msg$Undefined Behavior Sanitizer$Type mismatch in operation"
diff --git a/clang/test/CodeGen/ubsan-trap-reason-vla-bound-not-positive.c b/clang/test/CodeGen/ubsan-trap-reason-vla-bound-not-positive.c
new file mode 100644
index 0000000..ad9c408
--- /dev/null
+++ b/clang/test/CodeGen/ubsan-trap-reason-vla-bound-not-positive.c
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -triple arm64-apple-macosx14.0.0 -O0 -debug-info-kind=standalone -dwarf-version=5 \
+// RUN: -fsanitize=vla-bound -fsanitize-trap=vla-bound -emit-llvm %s -o - | FileCheck %s
+
+int n = 0;
+
+int vla_bound_not_positive(void) {
+  int a[n];
+  return sizeof a;
+}
+
+// CHECK-LABEL: @vla_bound_not_positive
+// CHECK: call void @llvm.ubsantrap(i8 24) {{.*}}!dbg [[LOC:![0-9]+]]
+// CHECK: [[LOC]] = !DILocation(line: 0, scope: [[MSG:![0-9]+]], {{.+}})
+// CHECK: [[MSG]] = distinct !DISubprogram(name: "__clang_trap_msg$Undefined Behavior Sanitizer$Variable length array bound evaluates to non-positive value"
diff --git a/clang/test/CodeGenCUDA/bf16.cu b/clang/test/CodeGenCUDA/bf16.cu
index 1247438..701540e 100644
--- a/clang/test/CodeGenCUDA/bf16.cu
+++ b/clang/test/CodeGenCUDA/bf16.cu
@@ -35,8 +35,8 @@ __device__ __bf16 external_func( __bf16 in);
 // CHECK:        .param .align 2 .b8 _Z9test_callDF16b_param_0[2]
 __device__ __bf16 test_call( __bf16 in) {
 // CHECK:        ld.param.b16    %[[R:rs[0-9]+]], [_Z9test_callDF16b_param_0];
-// CHECK:        st.param.b16    [param0], %[[R]];
 // CHECK:        .param .align 2 .b8 retval0[2];
+// CHECK:        st.param.b16    [param0], %[[R]];
 // CHECK:        call.uni (retval0), _Z13external_funcDF16b, (param0);
 // CHECK:        ld.param.b16    %[[RET:rs[0-9]+]], [retval0];
   return external_func(in);
diff --git a/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp b/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
index 3af5a21..1e977dd 100644
--- a/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
+++ b/clang/test/CodeGenCXX/builtin-amdgcn-fence.cpp
@@ -105,7 +105,7 @@ void test_mixed() {
   __builtin_amdgcn_fence( __ATOMIC_SEQ_CST, "workgroup", "local", "local", "global", "local", "local");
 }
 //.
-// CHECK: [[META3]] = !{!"amdgpu-as", !"local"}
-// CHECK: [[META4]] = !{!"amdgpu-as", !"global"}
+// CHECK: [[META3]] = !{!"amdgpu-synchronize-as", !"local"}
+// CHECK: [[META4]] = !{!"amdgpu-synchronize-as", !"global"}
 // CHECK: [[META5]] = !{[[META4]], [[META3]]}
 //.
diff --git a/clang/test/CodeGenCXX/debug-info-gline-tables-only.cpp b/clang/test/CodeGenCXX/debug-info-gline-tables-only.cpp
index 9b86a49..192169b 100644
--- a/clang/test/CodeGenCXX/debug-info-gline-tables-only.cpp
+++ b/clang/test/CodeGenCXX/debug-info-gline-tables-only.cpp
@@ -14,9 +14,11 @@ class E : public C {
   // CHECK-NOT: DW_TAG_reference type
   void x(const D& d);
 };
+// CHECK-NOT: DW_TAG_structure_type
 struct F {
   enum X { };
   void func(X);
+  // CHECK-NOT: DW_TAG_member
   virtual ~F();
 };
 F::~F() {
diff --git a/clang/test/CodeGenCXX/delete.cpp b/clang/test/CodeGenCXX/delete.cpp
index d5b0dc6..21b9f8c 100644
--- a/clang/test/CodeGenCXX/delete.cpp
+++ b/clang/test/CodeGenCXX/delete.cpp
@@ -76,27 +76,45 @@ namespace test1 {
     ~A();
   };
 
-  // CHECK-LABEL: define{{.*}} void @_ZN5test14testEPA10_A20_NS_1AE(
-  void test(A (*arr)[10][20]) {
+  // CHECK-LABEL: define{{.*}} void @_ZN5test11fEPA10_A20_NS_1AE(
+  void f(A (*arr)[10][20]) {
     delete [] arr;
     // CHECK:      icmp eq ptr [[PTR:%.*]], null
     // CHECK-NEXT: br i1
 
-    // CHECK:      [[BEGIN:%.*]] = getelementptr inbounds [10 x [20 x [[A:%.*]]]], ptr [[PTR]], i32 0, i32 0, i32 0
-    // CHECK-NEXT: [[ALLOC:%.*]] = getelementptr inbounds i8, ptr [[BEGIN]], i64 -8
+    // CHECK:      [[ALLOC:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 -8
     // CHECK-NEXT: [[COUNT:%.*]] = load i64, ptr [[ALLOC]]
-    // CHECK:      [[END:%.*]] = getelementptr inbounds [[A]], ptr [[BEGIN]], i64 [[COUNT]]
-    // CHECK-NEXT: [[ISEMPTY:%.*]] = icmp eq ptr [[BEGIN]], [[END]]
+    // CHECK:      [[END:%.*]] = getelementptr inbounds [[A:%.*]], ptr [[PTR]], i64 [[COUNT]]
+    // CHECK-NEXT: [[ISEMPTY:%.*]] = icmp eq ptr [[PTR]], [[END]]
     // CHECK-NEXT: br i1 [[ISEMPTY]],
     // CHECK:      [[PAST:%.*]] = phi ptr [ [[END]], {{%.*}} ], [ [[CUR:%.*]], {{%.*}} ]
     // CHECK-NEXT: [[CUR:%.*]] = getelementptr inbounds [[A]], ptr [[PAST]], i64 -1
     // CHECK-NEXT: call void @_ZN5test11AD1Ev(ptr {{[^,]*}} [[CUR]])
-    // CHECK-NEXT: [[ISDONE:%.*]] = icmp eq ptr [[CUR]], [[BEGIN]]
+    // CHECK-NEXT: [[ISDONE:%.*]] = icmp eq ptr [[CUR]], [[PTR]]
     // CHECK-NEXT: br i1 [[ISDONE]]
     // CHECK:      [[MUL:%.*]] = mul i64 4, [[COUNT]]
     // CHECK-NEXT: [[SIZE:%.*]] = add i64 [[MUL]], 8
     // CHECK-NEXT: call void @_ZdaPvm(ptr noundef [[ALLOC]], i64 noundef [[SIZE]])
   }
+
+  // CHECK-LABEL: define{{.*}} void @_ZN5test11gEPA_NS_1AE(
+  void g(A (*arr)[]) {
+    delete [] arr;
+    // CHECK:      icmp eq ptr [[PTR:%.*]], null
+    // CHECK-NEXT: br i1
+
+    // CHECK:      [[ALLOC:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 -8
+    // CHECK-NEXT: [[COUNT:%.*]] = load i64, ptr [[ALLOC]]
+    // CHECK:      [[END:%.*]] = getelementptr inbounds [[A:%.*]], ptr [[PTR]], i64 [[COUNT]]
+    // CHECK-NEXT: [[ISEMPTY:%.*]] = icmp eq ptr [[PTR]], [[END]]
+    // CHECK-NEXT: br i1 [[ISEMPTY]],
+    // CHECK:      [[PAST:%.*]] = phi ptr [ [[END]], {{%.*}} ], [ [[CUR:%.*]], {{%.*}} ]
+    // CHECK-NEXT: [[CUR:%.*]] = getelementptr inbounds [[A]], ptr [[PAST]], i64 -1
+    // CHECK-NEXT: call void @_ZN5test11AD1Ev(ptr {{[^,]*}} [[CUR]])
+    // CHECK-NEXT: [[ISDONE:%.*]] = icmp eq ptr [[CUR]], [[PTR]]
+    // CHECK-NEXT: br i1 [[ISDONE]]
+    // CHECK:      call void @_ZdaPv(ptr noundef [[ALLOC]])
+  }
 }
 
 namespace test2 {
diff --git a/clang/test/CodeGenCXX/microsoft-abi-eh-async.cpp b/clang/test/CodeGenCXX/microsoft-abi-eh-async.cpp
new file mode 100644
index 0000000..b831737
--- /dev/null
+++ b/clang/test/CodeGenCXX/microsoft-abi-eh-async.cpp
@@ -0,0 +1,209 @@
+// REQUIRES: x86-registered-target
+
+// RUN: %clang_cl -c --target=x86_64-windows-msvc /EHa -O2 /GS- \
+// RUN:   -Xclang=-import-call-optimization \
+// RUN:   /clang:-S /clang:-o- -- %s 2>&1 \
+// RUN:   | FileCheck %s
+
+#ifdef __clang__
+#define NO_TAIL __attribute((disable_tail_calls))
+#else
+#define NO_TAIL
+#endif
+
+void might_throw();
+void other_func(int x);
+
+void does_not_throw() noexcept(true);
+
+extern "C" void __declspec(dllimport) some_dll_import();
+
+class HasDtor {
+    int x;
+    char foo[40];
+
+public:
+    explicit HasDtor(int x);
+    ~HasDtor();
+};
+
+class BadError {
+public:
+    int errorCode;
+};
+
+void normal_has_regions() {
+    // CHECK-LABEL: .def "?normal_has_regions@@YAXXZ"
+    // CHECK: .seh_endprologue
+
+    // <-- state -1 (none)
+    {
+        HasDtor hd{42};
+
+        // <-- state goes from -1 to 0
+        // because state changes, we expect the HasDtor::HasDtor() call to have a NOP
+        // CHECK: call "??0HasDtor@@QEAA@H@Z"
+        // CHECK-NEXT: nop
+
+        might_throw();
+        // CHECK: call "?might_throw@@YAXXZ"
+        // CHECK-NEXT: nop
+
+        // <-- state goes from 0 to -1 because we're about to call HasDtor::~HasDtor()
+        // CHECK: call "??1HasDtor@@QEAA@XZ"
+        // <-- state -1
+    }
+
+    // <-- state -1
+    other_func(10);
+    // CHECK: call "?other_func@@YAXH@Z"
+    // CHECK-NEXT: nop
+    // CHECK: .seh_startepilogue
+
+    // <-- state -1
+}
+
+// This tests a tail call to a destructor.
+void case_dtor_arg_empty_body(HasDtor x)
+{
+    // CHECK-LABEL: .def "?case_dtor_arg_empty_body@@YAXVHasDtor@@@Z"
+    // CHECK: jmp "??1HasDtor@@QEAA@XZ"
+}
+
+int case_dtor_arg_empty_with_ret(HasDtor x)
+{
+    // CHECK-LABEL: .def "?case_dtor_arg_empty_with_ret@@YAHVHasDtor@@@Z"
+    // CHECK: .seh_endprologue
+
+    // CHECK: call "??1HasDtor@@QEAA@XZ"
+    // CHECK-NOT: nop
+
+    // The call to HasDtor::~HasDtor() should NOT have a NOP because the
+    // following "mov eax, 100" instruction is in the same EH state.
+
+    return 100;
+
+    // CHECK: mov eax, 100
+    // CHECK: .seh_startepilogue
+    // CHECK: .seh_endepilogue
+    // CHECK: .seh_endproc
+}
+
+int case_noexcept_dtor(HasDtor x) noexcept(true)
+{
+    // CHECK: .def "?case_noexcept_dtor@@YAHVHasDtor@@@Z"
+    // CHECK: call "??1HasDtor@@QEAA@XZ"
+    // CHECK-NEXT: mov eax, 100
+    // CHECK: .seh_startepilogue
+    return 100;
+}
+
+void case_except_simple_call() NO_TAIL
+{
+    does_not_throw();
+}
+// CHECK-LABEL: .def "?case_except_simple_call@@YAXXZ"
+// CHECK: .seh_endprologue
+// CHECK-NEXT: call "?does_not_throw@@YAXXZ"
+// CHECK-NEXT: nop
+// CHECK-NEXT: .seh_startepilogue
+// CHECK: .seh_endproc
+
+void case_noexcept_simple_call() noexcept(true) NO_TAIL
+{
+    does_not_throw();
+}
+// CHECK-LABEL: .def "?case_noexcept_simple_call@@YAXXZ"
+// CHECK: .seh_endprologue
+// CHECK-NEXT: call "?does_not_throw@@YAXXZ"
+// CHECK-NEXT: nop
+// CHECK-NEXT: .seh_startepilogue
+// CHECK: .seh_endepilogue
+// CHECK-NEXT: ret
+// CHECK-NEXT: .seh_endproc
+
+// This tests that the destructor is called right before SEH_BeginEpilogue,
+// but in a function that has a return value. Loading the return value
+// counts as a real instruction, so there is no need for a NOP after the
+// dtor call.
+int case_dtor_arg_calls_no_throw(HasDtor x)
+{
+    does_not_throw(); // no NOP expected
+    return 100;
+}
+// CHECK-LABEL: .def "?case_dtor_arg_calls_no_throw@@YAHVHasDtor@@@Z"
+// CHECK: .seh_endprologue
+// CHECK: "?does_not_throw@@YAXXZ"
+// CHECK-NEXT: nop
+// CHECK: "??1HasDtor@@QEAA@XZ"
+// CHECK-NEXT: mov eax, 100
+// CHECK: .seh_startepilogue
+// CHECK: .seh_endproc
+
+// Check the behavior of CALLs that are at the end of MBBs. If a CALL is within
+// a non-null EH state (state -1) and is at the end of an MBB, then we expect
+// to find an EH_LABEL after the CALL. This causes us to insert a NOP, which
+// is the desired result.
+void case_dtor_runs_after_join(int x) {
+    // CHECK-LABEL: .def "?case_dtor_runs_after_join@@YAXH@Z"
+    // CHECK: .seh_endprologue
+
+    // <-- EH state -1
+
+    // ctor call does not need a NOP, because it has real instructions after it
+    HasDtor hd{42};
+    // CHECK: call "??0HasDtor@@QEAA@H@Z"
+    // CHECK-NEXT: nop
+    // CHECK: test
+
+    // <-- EH state transition from -1 0
+    if (x) {
+        might_throw(); // <-- NOP expected (at end of BB w/ EH_LABEL)
+        // CHECK: call "?might_throw@@YAXXZ"
+        // CHECK-NEXT: nop
+    } else {
+        other_func(10); // <-- NOP expected (at end of BB w/ EH_LABEL)
+        // CHECK: call "?other_func@@YAXH@Z"
+        // CHECK-NEXT: nop
+    }
+    does_not_throw();
+    // <-- EH state transition 0 to -1
+    // ~HasDtor() runs
+
+    // CHECK: .seh_endproc
+
+    // CHECK: "$ip2state$?case_dtor_runs_after_join@@YAXH@Z":
+    // CHECK-NEXT: .long [[func_begin:.Lfunc_begin([0-9]+)@IMGREL]]
+    // CHECK-NEXT: .long -1
+    // CHECK-NEXT: .long [[tmp1:.Ltmp([0-9]+)]]@IMGREL
+    // CHECK-NEXT: .long 0
+    // CHECK-NEXT: .long [[tmp2:.Ltmp([0-9]+)]]@IMGREL
+    // CHECK-NEXT: .long -1
+}
+
+
+// Check the behavior of NOP padding around tail calls.
+// We do not expect to insert NOPs around tail calls.
+// However, the first call (to other_func()) does get a NOP
+// because it comes before .seh_startepilogue.
+void case_tail_call_no_eh(bool b) {
+    // tail call; no NOP padding after JMP
+    if (b) {
+        does_not_throw();
+        // <-- no NOP here
+        return;
+    }
+
+    other_func(20);
+    // <-- NOP does get inserted here
+}
+// CHECK-LABEL: .def "?case_tail_call_no_eh@@YAX_N@Z"
+// CHECK: test
+// CHECK-NEXT: je .LBB
+// CHECK: jmp "?does_not_throw@@YAXXZ"
+// CHECK-SAME: TAILCALL
+// CHECK-NEXT: .LBB
+// CHECK-NEXT: mov ecx, 20
+// CHECK-NEXT: jmp "?other_func@@YAXH@Z"
+// CHECK-SAME: TAILCALL
+// CHECK-NEXT: # -- End function
diff --git a/clang/test/CodeGenCXX/microsoft-abi-eh-disabled.cpp b/clang/test/CodeGenCXX/microsoft-abi-eh-disabled.cpp
new file mode 100644
index 0000000..744f863
--- /dev/null
+++ b/clang/test/CodeGenCXX/microsoft-abi-eh-disabled.cpp
@@ -0,0 +1,140 @@
+// REQUIRES: x86-registered-target
+// RUN: %clang_cl -c --target=x86_64-windows-msvc -EHs-c- -O2 -GS- \
+// RUN:   -Xclang=-import-call-optimization \
+// RUN:   -clang:-S -clang:-o- -- %s 2>&1 \
+// RUN:   | FileCheck %s
+
+#ifdef __clang__
+#define NO_TAIL __attribute((disable_tail_calls))
+#else
+#define NO_TAIL
+#endif
+
+void might_throw();
+void other_func(int x);
+
+void does_not_throw() noexcept(true);
+
+extern "C" void __declspec(dllimport) some_dll_import();
+
+class HasDtor {
+    int x;
+    char foo[40];
+
+public:
+    explicit HasDtor(int x);
+    ~HasDtor();
+};
+
+void normal_has_regions() {
+    {
+        HasDtor hd{42};
+
+        // because state changes, we expect the HasDtor::HasDtor() call to have a NOP
+        might_throw();
+    }
+
+    other_func(10);
+}
+// CHECK-LABEL: .def "?normal_has_regions@@YAXXZ"
+// CHECK: .seh_endprologue
+// CHECK: call "??0HasDtor@@QEAA@H@Z"
+// CHECK-NEXT: call "?might_throw@@YAXXZ"
+// CHECK-NEXT: mov
+// CHECK: call "??1HasDtor@@QEAA@XZ"
+// CHECK-NEXT: mov ecx, 10
+// CHECK-NEXT: call "?other_func@@YAXH@Z"
+// CHECK-NEXT: nop
+// CHECK-NEXT: .seh_startepilogue
+// CHECK-NOT: "$ip2state$?normal_has_regions@@YAXXZ"
+
+// This tests a tail call to a destructor.
+void case_dtor_arg_empty_body(HasDtor x)
+{
+}
+// CHECK-LABEL: .def "?case_dtor_arg_empty_body@@YAXVHasDtor@@@Z"
+// CHECK: jmp "??1HasDtor@@QEAA@XZ"
+
+int case_dtor_arg_empty_with_ret(HasDtor x)
+{
+    // The call to HasDtor::~HasDtor() should NOT have a NOP because the
+    // following "mov eax, 100" instruction is in the same EH state.
+    return 100;
+}
+// CHECK-LABEL: .def "?case_dtor_arg_empty_with_ret@@YAHVHasDtor@@@Z"
+// CHECK: .seh_endprologue
+// CHECK: call "??1HasDtor@@QEAA@XZ"
+// CHECK-NOT: nop
+// CHECK: mov eax, 100
+// CHECK: .seh_startepilogue
+// CHECK: .seh_endepilogue
+// CHECK: .seh_endproc
+
+void case_except_simple_call() NO_TAIL
+{
+    does_not_throw();
+}
+
+// This tests that the destructor is called right before SEH_BeginEpilogue,
+// but in a function that has a return value.
+int case_dtor_arg_calls_no_throw(HasDtor x)
+{
+    does_not_throw(); // no NOP expected
+    return 100;
+}
+
+// Check the behavior of CALLs that are at the end of MBBs. If a CALL is within
+// a non-null EH state (state -1) and is at the end of an MBB, then we expect
+// to find an EH_LABEL after the CALL. This causes us to insert a NOP, which
+// is the desired result.
+void case_dtor_runs_after_join(int x) {
+
+    // ctor call does not need a NOP, because it has real instructions after it
+    HasDtor hd{42};
+
+    if (x) {
+        might_throw();
+    } else {
+        other_func(10);
+    }
+    does_not_throw();
+    // ~HasDtor() runs
+}
+
+// CHECK-LABEL: .def "?case_dtor_runs_after_join@@YAXH@Z"
+// CHECK: .seh_endprologue
+// CHECK: call "??0HasDtor@@QEAA@H@Z"
+// CHECK-NEXT: test
+// CHECK: call "?might_throw@@YAXXZ"
+// CHECK-NEXT: jmp
+// CHECK: call "?other_func@@YAXH@Z"
+// CHECK-NEXT: .LBB
+// CHECK: call "?does_not_throw@@YAXXZ"
+// CHECK-NEXT: lea
+// CHECK-NEXT: call "??1HasDtor@@QEAA@XZ"
+// CHECK-NEXT: nop
+// CHECK-NEXT: .seh_startepilogue
+// CHECK-NOT: "$ip2state$?case_dtor_runs_after_join@@YAXH@Z":
+
+
+// Check the behavior of NOP padding around tail calls.
+// We do not expect to insert NOPs around tail calls.
+// However, the first call (to other_func()) does get a NOP
+// because it comes before .seh_startepilogue.
+void case_tail_call_no_eh() {
+    // ordinary call
+    other_func(10);
+
+    // tail call; no NOP padding after JMP
+    does_not_throw();
+}
+
+// CHECK-LABEL: .def "?case_tail_call_no_eh@@YAXXZ"
+// CHECK: .seh_endprologue
+// CHECK: call "?other_func@@YAXH@Z"
+// CHECK-NEXT: nop
+// CHECK-NEXT: .seh_startepilogue
+// CHECK: .seh_endepilogue
+// CHECK: jmp "?does_not_throw@@YAXXZ"
+// CHECK-NOT: nop
+// CHECK: .seh_endproc
diff --git a/clang/test/CodeGenCXX/microsoft-abi-eh-ip2state.cpp b/clang/test/CodeGenCXX/microsoft-abi-eh-ip2state.cpp
new file mode 100644
index 0000000..0b7b406
--- /dev/null
+++ b/clang/test/CodeGenCXX/microsoft-abi-eh-ip2state.cpp
@@ -0,0 +1,242 @@
+// REQUIRES: x86-registered-target
+// RUN: %clang_cl -c --target=x86_64-windows-msvc -O2 -EHsc -GS- \
+// RUN:   -Xclang=-import-call-optimization \
+// RUN:   -clang:-S -clang:-o- -- %s 2>&1 \
+// RUN:   | FileCheck %s
+
+#ifdef __clang__
+#define NO_TAIL __attribute((disable_tail_calls))
+#else
+#define NO_TAIL
+#endif
+
+void might_throw();
+void other_func(int x);
+
+void does_not_throw() noexcept(true);
+
+extern "C" void __declspec(dllimport) some_dll_import();
+
+class HasDtor {
+    int x;
+    char foo[40];
+
+public:
+    explicit HasDtor(int x);
+    ~HasDtor();
+};
+
+class BadError {
+public:
+    int errorCode;
+};
+
+// Verify that when NOP padding for IP2State is active *and* Import Call
+// Optimization is active that we see both forms of NOP padding.
+void case_calls_dll_import() NO_TAIL {
+    some_dll_import();
+}
+// CHECK-LABEL: .def "?case_calls_dll_import@@YAXXZ"
+// CHECK: .seh_endprologue
+// CHECK: .Limpcall{{[0-9]+}}:
+// CHECK-NEXT: rex64
+// CHECK-NEXT: call __imp_some_dll_import
+// CHECK-NEXT: nop dword ptr {{\[.*\]}}
+// CHECK-NEXT: nop
+// CHECK-NEXT: .seh_startepilogue
+
+void normal_has_regions() {
+
+    // <-- state -1 (none)
+    {
+        HasDtor hd{42};
+
+        // <-- state goes from -1 to 0
+        // because state changes, we expect the HasDtor::HasDtor() call to have a NOP
+
+        might_throw();
+
+        // <-- state goes from 0 to -1 because we're about to call HasDtor::~HasDtor()
+        // <-- state -1
+    }
+
+    // <-- state -1
+    other_func(10);
+
+    // <-- state -1
+}
+// CHECK-LABEL: .def "?normal_has_regions@@YAXXZ"
+// CHECK: .seh_endprologue
+// CHECK: call "??0HasDtor@@QEAA@H@Z"
+// CHECK-NEXT: nop
+// CHECK: call "?might_throw@@YAXXZ"
+// CHECK-NEXT: nop
+// CHECK: call "??1HasDtor@@QEAA@XZ"
+// CHECK: call "?other_func@@YAXH@Z"
+// CHECK-NEXT: nop
+// CHECK: .seh_startepilogue
+
+// This tests a tail call to a destructor.
+void case_dtor_arg_empty_body(HasDtor x)
+{
+}
+// CHECK-LABEL: .def "?case_dtor_arg_empty_body@@YAXVHasDtor@@@Z"
+// CHECK: jmp "??1HasDtor@@QEAA@XZ"
+
+int case_dtor_arg_empty_with_ret(HasDtor x)
+{
+    // CHECK-LABEL: .def "?case_dtor_arg_empty_with_ret@@YAHVHasDtor@@@Z"
+    // CHECK: .seh_endprologue
+
+    // CHECK: call "??1HasDtor@@QEAA@XZ"
+    // CHECK-NOT: nop
+
+    // The call to HasDtor::~HasDtor() should NOT have a NOP because the
+    // following "mov eax, 100" instruction is in the same EH state.
+
+    return 100;
+
+    // CHECK: mov eax, 100
+    // CHECK: .seh_startepilogue
+    // CHECK: .seh_endepilogue
+    // CHECK: .seh_endproc
+}
+
+int case_noexcept_dtor(HasDtor x) noexcept(true)
+{
+    // CHECK: .def "?case_noexcept_dtor@@YAHVHasDtor@@@Z"
+    // CHECK: call "??1HasDtor@@QEAA@XZ"
+    // CHECK-NEXT: mov eax, 100
+    // CHECK-NEXT: .seh_startepilogue
+    return 100;
+}
+
+// Simple call of a function that can throw
+void case_except_simple_call() NO_TAIL
+{
+    might_throw();
+}
+// CHECK-LABEL: .def "?case_except_simple_call@@YAXXZ"
+// CHECK: .seh_endprologue
+// CHECK-NEXT: call "?might_throw@@YAXXZ"
+// CHECK-NEXT: nop
+// CHECK-NEXT: .seh_startepilogue
+
+// Simple call of a function that cannot throw, in a noexcept context.
+void case_noexcept_simple_call() noexcept(true) NO_TAIL
+{
+    does_not_throw();
+}
+// CHECK-LABEL: .def "?case_noexcept_simple_call@@YAXXZ"
+// CHECK: .seh_endprologue
+// CHECK-NEXT: call "?does_not_throw@@YAXXZ"
+// CHECK-NEXT: nop
+// CHECK-NEXT: .seh_startepilogue
+
+
+// This tests that the destructor is called right before SEH_BeginEpilogue,
+// but in a function that has a return value.
+int case_dtor_arg_calls_no_throw(HasDtor x)
+{
+    does_not_throw(); // no NOP expected
+    return 100;
+}
+
+// Check the behavior of CALLs that are at the end of MBBs. If a CALL is within
+// a non-null EH state (state -1) and is at the end of an MBB, then we expect
+// to find an EH_LABEL after the CALL. This causes us to insert a NOP, which
+// is the desired result.
+void case_dtor_runs_after_join(int x) {
+    // CHECK-LABEL: .def "?case_dtor_runs_after_join@@YAXH@Z"
+    // CHECK: .seh_endprologue
+
+    // <-- EH state -1
+
+    // ctor call does not need a NOP, because it has real instructions after it
+    HasDtor hd{42};
+    // CHECK: call "??0HasDtor@@QEAA@H@Z"
+    // CHECK-NEXT: test
+
+    // <-- EH state transition from -1 0
+    if (x) {
+        might_throw(); // <-- NOP expected (at end of BB w/ EH_LABEL)
+        // CHECK: call "?might_throw@@YAXXZ"
+        // CHECK-NEXT: nop
+    } else {
+        other_func(10); // <-- NOP expected (at end of BB w/ EH_LABEL)
+        // CHECK: call "?other_func@@YAXH@Z"
+        // CHECK-NEXT: nop
+    }
+    does_not_throw();
+    // <-- EH state transition 0 to -1
+    // ~HasDtor() runs
+
+    // CHECK: .seh_endproc
+
+    // CHECK: "$ip2state$?case_dtor_runs_after_join@@YAXH@Z":
+    // CHECK-NEXT: .long [[func_begin:.Lfunc_begin([0-9]+)@IMGREL]]
+    // CHECK-NEXT: .long -1
+    // CHECK-NEXT: .long [[tmp1:.Ltmp([0-9]+)]]@IMGREL
+    // CHECK-NEXT: .long 0
+    // CHECK-NEXT: .long [[tmp2:.Ltmp([0-9]+)]]@IMGREL
+    // CHECK-NEXT: .long -1
+}
+
+
+// Check the behavior of NOP padding around tail calls.
+// We do not expect to insert NOPs around tail calls.
+// However, the first call (to other_func()) does get a NOP
+// because it comes before .seh_startepilogue.
+void case_tail_call_no_eh() {
+    // CHECK-LABEL: .def "?case_tail_call_no_eh@@YAXXZ"
+    // CHECK: .seh_endprologue
+
+    // ordinary call
+    other_func(10);
+    // CHECK: call "?other_func@@YAXH@Z"
+    // CHECK-NEXT: nop
+
+    // tail call; no NOP padding after JMP
+    does_not_throw();
+
+    // CHECK: .seh_startepilogue
+    // CHECK: .seh_endepilogue
+    // CHECK: jmp "?does_not_throw@@YAXXZ"
+    // CHECK-NOT: nop
+    // CHECK: .seh_endproc
+}
+
+
+// Check the behavior of a try/catch
+int case_try_catch() {
+    // CHECK-LABEL: .def "?case_try_catch@@YAHXZ"
+    // CHECK: .seh_endprologue
+
+    // Because of the EH_LABELs, the ctor and other_func() get NOPs.
+
+    int result = 0;
+    try {
+        // CHECK: call "??0HasDtor@@QEAA@H@Z"
+        // CHECK-NEXT: nop
+        HasDtor hd{20};
+
+        // CHECK: call "?other_func@@YAXH@Z"
+        // CHECK-NEXT: nop
+        other_func(10);
+
+        // CHECK: call "??1HasDtor@@QEAA@XZ"
+        // CHECK: mov
+    } catch (BadError& e) {
+        result = 1;
+    }
+    return result;
+
+    // CHECK: .seh_endproc
+
+    // CHECK: .def "?dtor$4@?0??case_try_catch@@YAHXZ@4HA"
+    // CHECK: .seh_endprologue
+    // CHECK: call "??1HasDtor@@QEAA@XZ"
+    // CHECK-NEXT: nop
+    // CHECK: .seh_startepilogue
+    // CHECK: .seh_endproc
+}
diff --git a/clang/test/CodeGenCoroutines/coro-await.cpp b/clang/test/CodeGenCoroutines/coro-await.cpp
index 8dd53a7..7b7d141 100644
--- a/clang/test/CodeGenCoroutines/coro-await.cpp
+++ b/clang/test/CodeGenCoroutines/coro-await.cpp
@@ -100,7 +100,7 @@ extern "C" void f0() {
   // CHECK: call i8 @llvm.coro.suspend(token %[[FINALSP_ID]], i1 true)
 
   // Await suspend wrapper
-  // CHECK: define{{.*}} @f0.__await_suspend_wrapper__await(ptr {{[^,]*}} %[[AWAITABLE_ARG:.+]], ptr {{[^,]*}} %[[FRAME_ARG:.+]])
+  // CHECK: define internal {{.*}} @f0.__await_suspend_wrapper__await(ptr {{[^,]*}} %[[AWAITABLE_ARG:.+]], ptr {{[^,]*}} %[[FRAME_ARG:.+]])
   // CHECK: store ptr %[[AWAITABLE_ARG]], ptr %[[AWAITABLE_TMP:.+]],
   // CHECK: store ptr %[[FRAME_ARG]], ptr %[[FRAME_TMP:.+]],
   // CHECK: %[[AWAITABLE:.+]] = load ptr, ptr %[[AWAITABLE_TMP]]
@@ -162,7 +162,7 @@ extern "C" void f1(int) {
   // CHECK:     call void @_ZN13suspend_maybe12await_resumeEv(ptr {{[^,]*}} %[[AWAITABLE]])
 
   // Await suspend wrapper
-  // CHECK: define {{.*}} i1 @f1.__await_suspend_wrapper__yield(ptr {{[^,]*}} %[[AWAITABLE_ARG:.+]], ptr {{[^,]*}} %[[FRAME_ARG:.+]])
+  // CHECK: define internal {{.*}} i1 @f1.__await_suspend_wrapper__yield(ptr {{[^,]*}} %[[AWAITABLE_ARG:.+]], ptr {{[^,]*}} %[[FRAME_ARG:.+]])
   // CHECK: store ptr %[[AWAITABLE_ARG]], ptr %[[AWAITABLE_TMP:.+]],
   // CHECK: store ptr %[[FRAME_ARG]], ptr %[[FRAME_TMP:.+]],
   // CHECK: %[[AWAITABLE:.+]] = load ptr, ptr %[[AWAITABLE_TMP]]
@@ -379,7 +379,7 @@ extern "C" void TestTailcall() {
   // CHECK-NEXT: ]
 
   // Await suspend wrapper
-  // CHECK: define {{.*}} ptr @TestTailcall.__await_suspend_wrapper__await(ptr {{[^,]*}} %[[AWAITABLE_ARG:.+]], ptr {{[^,]*}} %[[FRAME_ARG:.+]])
+  // CHECK: define internal {{.*}} ptr @TestTailcall.__await_suspend_wrapper__await(ptr {{[^,]*}} %[[AWAITABLE_ARG:.+]], ptr {{[^,]*}} %[[FRAME_ARG:.+]])
   // CHECK: store ptr %[[AWAITABLE_ARG]], ptr %[[AWAITABLE_TMP:.+]],
   // CHECK: store ptr %[[FRAME_ARG]], ptr %[[FRAME_TMP:.+]],
   // CHECK: %[[AWAITABLE:.+]] = load ptr, ptr %[[AWAITABLE_TMP]]
diff --git a/clang/test/CodeGenHLSL/BasicFeatures/ArrayOutputArguments.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/ArrayOutputArguments.hlsl
index bccfaf5..4571649 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/ArrayOutputArguments.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/ArrayOutputArguments.hlsl
@@ -11,7 +11,7 @@ void increment(inout int Arr[2]) {
 // CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 @{{.*}}, i32 8, i1 false)
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false)
-// CHECK-NEXT: call void @{{.*}}increment{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp]])
+// CHECK-NEXT: call void @{{.*}}increment{{.*}}(ptr noalias noundef align 4 [[Tmp]])
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 [[Tmp]], i32 8, i1 false)
 // CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i32 0, i32 0
 // CHECK-NEXT: [[B:%.*]] = load i32, ptr [[Idx]], align 4
@@ -32,7 +32,7 @@ void fn2(out int Arr[2]) {
 // CHECK: [[A:%.*]] = alloca [2 x i32], align 4
 // CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 @{{.*}}, i32 8, i1 false)
-// CHECK-NEXT: call void @{{.*}}fn2{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp]])
+// CHECK-NEXT: call void @{{.*}}fn2{{.*}}(ptr noalias noundef align 4 [[Tmp]])
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 [[Tmp]], i32 8, i1 false)
 // CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i32 0, i32 0
 // CHECK-NEXT: [[B:%.*]] = load i32, ptr [[Idx]], align 4
@@ -56,7 +56,7 @@ void nestedCall(inout int Arr[2], uint index) {
 // CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 @{{.*}}, i32 8, i1 false)
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false)
-// CHECK-NEXT: call void @{{.*}}nestedCall{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp]], i32 noundef 0)
+// CHECK-NEXT: call void @{{.*}}nestedCall{{.*}}(ptr noalias noundef align 4 [[Tmp]], i32 noundef 0)
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 [[Tmp]], i32 8, i1 false)
 // CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i32 0, i32 1
 // CHECK-NEXT: [[B:%.*]] = load i32, ptr [[Idx]], align 4
@@ -70,7 +70,7 @@ export int arrayCall3() {
 // CHECK-LABEL: outerCall
 // CHECK: [[Tmp:%.*]] = alloca [2 x i32], align 4
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 %{{.*}}, i32 8, i1 false)
-// CHECK-NEXT: call void {{.*}}increment{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp]])
+// CHECK-NEXT: call void {{.*}}increment{{.*}}(ptr noalias noundef align 4 [[Tmp]])
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 {{.*}}, ptr align 4 [[Tmp]], i32 8, i1 false)
 // CHECK-NEXT: ret void
 void outerCall(inout int Arr[2]) {
@@ -82,7 +82,7 @@ void outerCall(inout int Arr[2]) {
 // CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 @{{.*}}, i32 8, i1 false)
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false)
-// CHECK-NEXT: call void @{{.*}}outerCall{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp]])
+// CHECK-NEXT: call void @{{.*}}outerCall{{.*}}(ptr noalias noundef align 4 [[Tmp]])
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 [[Tmp]], i32 8, i1 false)
 // CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i32 0, i32 0
 // CHECK-NEXT: [[B:%.*]] = load i32, ptr [[Idx]], align 4
@@ -110,7 +110,7 @@ void outerCall2(inout int Arr[2]) {
 // CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 @{{.*}}, i32 8, i1 false)
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false)
-// CHECK-NEXT: call void @{{.*}}outerCall2{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp]])
+// CHECK-NEXT: call void @{{.*}}outerCall2{{.*}}(ptr noalias noundef align 4 [[Tmp]])
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 [[Tmp]], i32 8, i1 false)
 // CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i32 0, i32 0
 // CHECK-NEXT: [[B:%.*]] = load i32, ptr [[Idx]], align 4
diff --git a/clang/test/CodeGenHLSL/builtins/D3DCOLORtoUBYTE4.hlsl b/clang/test/CodeGenHLSL/builtins/D3DCOLORtoUBYTE4.hlsl
index 990f0aa..3c9e35a 100644
--- a/clang/test/CodeGenHLSL/builtins/D3DCOLORtoUBYTE4.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/D3DCOLORtoUBYTE4.hlsl
@@ -5,8 +5,16 @@
 // CHECK-LABEL: D3DCOLORtoUBYTE4
 int4 test_D3DCOLORtoUBYTE4(float4 p1) {
   // CHECK: %[[SCALED:.*]] = fmul [[FMFLAGS:.*]][[FLOAT_TYPE:<4 x float>]] %{{.*}}, splat (float 0x406FE01000000000)
-  // CHECK: %[[CONVERTED:.*]] = fptoui [[FLOAT_TYPE]] %[[SCALED]] to [[INT_TYPE:<4 x i32>]]
+  // CHECK: %[[CONVERTED:.*]] = fptosi [[FLOAT_TYPE]] %[[SCALED]] to [[INT_TYPE:<4 x i32>]]
   // CHECK: %[[SHUFFLED:.*]] = shufflevector [[INT_TYPE]] %[[CONVERTED]], [[INT_TYPE]] poison, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
   // CHECK: ret [[INT_TYPE]] %[[SHUFFLED]]
   return D3DCOLORtoUBYTE4(p1);
 }
+
+// Note this test confirms issue 150673 is fixed 
+// by confirming the negative does not become a poison
+// CHECK-LABEL: test_constant_inputs
+int4 test_constant_inputs() {
+  // CHECK: ret <4 x i32> <i32 -12877, i32 2833, i32 0, i32 25500>
+  return D3DCOLORtoUBYTE4(float4(0, 11.11, -50.5, 100));
+}
diff --git a/clang/test/CodeGenObjC/exceptions.m b/clang/test/CodeGenObjC/exceptions.m
index 1546ed2..832d3a45 100644
--- a/clang/test/CodeGenObjC/exceptions.m
+++ b/clang/test/CodeGenObjC/exceptions.m
@@ -144,18 +144,17 @@ void f4(void) {
   // CHECK-NEXT: br label
   //   -> rethrow
 
-  // finally.call-exit:  Predecessors are the @try and @catch fallthroughs
-  // as well as the no-match case in the catch mechanism.  The i1 is whether
-  // to rethrow and should be true only in the last case.
-  // CHECK:      phi ptr
-  // CHECK-NEXT: phi i1
-  // CHECK-NEXT: call void @objc_exception_try_exit(ptr nonnull [[EXNDATA]])
+  // finally.call-exit:  Predecessor is the no-match case in the catch mechanism
+  // which rethrows.
+  // CHECK:      call void @objc_exception_try_exit(ptr nonnull [[EXNDATA]])
   // CHECK-NEXT: call void @f4_help(i32 noundef 2)
-  // CHECK-NEXT: br i1
-  //   -> ret, rethrow
+  // CHECK-NEXT: br label
+  //   -> rethrow
 
-  // ret:
-  // CHECK:      ret void
+  // finally.end.critedge:  Predecessors are the @try and @catch fallthroughs.
+  // CHECK:      call void @objc_exception_try_exit(ptr nonnull [[EXNDATA]])
+  // CHECK-NEXT: call void @f4_help(i32 noundef 2)
+  // CHECK-NEXT: ret void
 
   // Catch mechanism:
   // CHECK:      call ptr @objc_exception_extract(ptr nonnull [[EXNDATA]])
diff --git a/clang/test/CodeGenOpenCL/amdgpu-features-readonly.cl b/clang/test/CodeGenOpenCL/amdgpu-features-readonly.cl
index d23e6f2..5b76cff 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-features-readonly.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-features-readonly.cl
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple amdgcn -target-feature +gws -o /dev/null %s 2>&1 \
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx942 -target-feature +gws -o /dev/null %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=GWS %s
 
 // GWS: warning: feature flag '+gws' is ignored since the feature is read only [-Winvalid-command-line-argument]
diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl
index 75e9710..efd70a9 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -108,7 +108,7 @@
 // GFX1153: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
 // GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
 // GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1250: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf16-trans-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+transpose-load-f4f6-insts,+wavefrontsize32
+// GFX1250: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32
 
 // GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64"
 
diff --git a/clang/test/CodeGenOpenCL/amdgpu-readonly-features-written-with-no-target.cl b/clang/test/CodeGenOpenCL/amdgpu-readonly-features-written-with-no-target.cl
new file mode 100644
index 0000000..1542efa
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/amdgpu-readonly-features-written-with-no-target.cl
@@ -0,0 +1,16 @@
+// REQUIRES: amdgpu-registered-target
+
+// Check the readonly feature will can be written to the IR
+// if there is no target specified.
+
+// RUN: %clang_cc1 -triple amdgcn -emit-llvm -o - %s | FileCheck --check-prefix=NOCPU %s
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx942 -emit-llvm -o - %s | FileCheck --check-prefix=GFX942 %s
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1100 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1100 %s
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1200 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1200 %s
+
+__attribute__((target("gws,image-insts,vmem-to-lds-load-insts"))) void test() {}
+
+// NOCPU: "target-features"="+gws,+image-insts,+vmem-to-lds-load-insts"
+// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
+// GFX1100: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
+// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-async-load-store-lds.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-async-load-store-lds.cl
index e3fe31f..ccc05f0 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-async-load-store-lds.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-async-load-store-lds.cl
@@ -2,6 +2,89 @@
 // REQUIRES: amdgpu-registered-target
 // RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1250 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX1250
 
+typedef int    v2i   __attribute__((ext_vector_type(2)));
+typedef int    v4i   __attribute__((ext_vector_type(4)));
+
+// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_async_to_lds_b8(
+// CHECK-GFX1250-NEXT:  entry:
+// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.global.load.async.to.lds.b8(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0)
+// CHECK-GFX1250-NEXT:    ret void
+//
+void test_amdgcn_global_load_async_to_lds_b8( global char* gaddr, local char* laddr)
+{
+  __builtin_amdgcn_global_load_async_to_lds_b8(gaddr, laddr, 16, 0);
+}
+
+// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_async_to_lds_b32(
+// CHECK-GFX1250-NEXT:  entry:
+// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.global.load.async.to.lds.b32(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0)
+// CHECK-GFX1250-NEXT:    ret void
+//
+void test_amdgcn_global_load_async_to_lds_b32(global int* gaddr, local int* laddr)
+{
+  __builtin_amdgcn_global_load_async_to_lds_b32(gaddr, laddr, 16, 0);
+}
+
+// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_async_to_lds_b64(
+// CHECK-GFX1250-NEXT:  entry:
+// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.global.load.async.to.lds.b64(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0)
+// CHECK-GFX1250-NEXT:    ret void
+//
+void test_amdgcn_global_load_async_to_lds_b64(global v2i* gaddr, local v2i* laddr)
+{
+  __builtin_amdgcn_global_load_async_to_lds_b64(gaddr, laddr, 16, 0);
+}
+
+// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_async_to_lds_b128(
+// CHECK-GFX1250-NEXT:  entry:
+// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.global.load.async.to.lds.b128(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0)
+// CHECK-GFX1250-NEXT:    ret void
+//
+void test_amdgcn_global_load_async_to_lds_b128( global v4i* gaddr, local v4i* laddr)
+{
+  __builtin_amdgcn_global_load_async_to_lds_b128(gaddr, laddr, 16, 0);
+}
+
+// CHECK-GFX1250-LABEL: @test_amdgcn_global_store_async_from_lds_b8(
+// CHECK-GFX1250-NEXT:  entry:
+// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.global.store.async.from.lds.b8(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0)
+// CHECK-GFX1250-NEXT:    ret void
+//
+void test_amdgcn_global_store_async_from_lds_b8(global char* gaddr, local char* laddr)
+{
+  __builtin_amdgcn_global_store_async_from_lds_b8(gaddr, laddr, 16, 0);
+}
+
+// CHECK-GFX1250-LABEL: @test_amdgcn_global_store_async_from_lds_b32(
+// CHECK-GFX1250-NEXT:  entry:
+// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.global.store.async.from.lds.b32(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0)
+// CHECK-GFX1250-NEXT:    ret void
+//
+void test_amdgcn_global_store_async_from_lds_b32(global int* gaddr, local int* laddr)
+{
+  __builtin_amdgcn_global_store_async_from_lds_b32(gaddr, laddr, 16, 0);
+}
+
+// CHECK-GFX1250-LABEL: @test_amdgcn_global_store_async_from_lds_b64(
+// CHECK-GFX1250-NEXT:  entry:
+// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.global.store.async.from.lds.b64(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0)
+// CHECK-GFX1250-NEXT:    ret void
+//
+void test_amdgcn_global_store_async_from_lds_b64(global v2i* gaddr, local v2i* laddr)
+{
+  __builtin_amdgcn_global_store_async_from_lds_b64(gaddr, laddr, 16, 0);
+}
+
+// CHECK-GFX1250-LABEL: @test_amdgcn_global_store_async_from_lds_b128(
+// CHECK-GFX1250-NEXT:  entry:
+// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.global.store.async.from.lds.b128(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0)
+// CHECK-GFX1250-NEXT:    ret void
+//
+void test_amdgcn_global_store_async_from_lds_b128(global v4i* gaddr, local v4i* laddr)
+{
+  __builtin_amdgcn_global_store_async_from_lds_b128(gaddr, laddr, 16, 0);
+}
+
 // CHECK-GFX1250-LABEL: @test_amdgcn_ds_atomic_async_barrier_arrive_b64(
 // CHECK-GFX1250-NEXT:  entry:
 // CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.ds.atomic.async.barrier.arrive.b64(ptr addrspace(3) [[ADDR:%.*]])
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-load-monitor.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-load-monitor.cl
new file mode 100644
index 0000000..f2552d4
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-load-monitor.cl
@@ -0,0 +1,66 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1250 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX1250
+
+typedef int    v2i   __attribute__((ext_vector_type(2)));
+typedef int    v4i   __attribute__((ext_vector_type(4)));
+
+// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_monitor_b32(
+// CHECK-GFX1250-NEXT:  entry:
+// CHECK-GFX1250-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.global.load.monitor.b32.i32(ptr addrspace(1) [[INPTR:%.*]], i32 1)
+// CHECK-GFX1250-NEXT:    ret i32 [[TMP0]]
+//
+int test_amdgcn_global_load_monitor_b32(global int* inptr)
+{
+  return __builtin_amdgcn_global_load_monitor_b32(inptr, 1);
+}
+
+// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_monitor_b64(
+// CHECK-GFX1250-NEXT:  entry:
+// CHECK-GFX1250-NEXT:    [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.global.load.monitor.b64.v2i32(ptr addrspace(1) [[INPTR:%.*]], i32 10)
+// CHECK-GFX1250-NEXT:    ret <2 x i32> [[TMP0]]
+//
+v2i test_amdgcn_global_load_monitor_b64(global v2i* inptr)
+{
+  return __builtin_amdgcn_global_load_monitor_b64(inptr, 10);
+}
+
+// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_monitor_b128(
+// CHECK-GFX1250-NEXT:  entry:
+// CHECK-GFX1250-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.global.load.monitor.b128.v4i32(ptr addrspace(1) [[INPTR:%.*]], i32 22)
+// CHECK-GFX1250-NEXT:    ret <4 x i32> [[TMP0]]
+//
+v4i test_amdgcn_global_load_monitor_b128(global v4i* inptr)
+{
+  return __builtin_amdgcn_global_load_monitor_b128(inptr, 22);
+}
+
+// CHECK-GFX1250-LABEL: @test_amdgcn_flat_load_monitor_b32(
+// CHECK-GFX1250-NEXT:  entry:
+// CHECK-GFX1250-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.flat.load.monitor.b32.i32(ptr [[INPTR:%.*]], i32 27)
+// CHECK-GFX1250-NEXT:    ret i32 [[TMP0]]
+//
+int test_amdgcn_flat_load_monitor_b32(int* inptr)
+{
+  return __builtin_amdgcn_flat_load_monitor_b32(inptr, 27);
+}
+
+// CHECK-GFX1250-LABEL: @test_amdgcn_flat_load_monitor_b64(
+// CHECK-GFX1250-NEXT:  entry:
+// CHECK-GFX1250-NEXT:    [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.flat.load.monitor.b64.v2i32(ptr [[INPTR:%.*]], i32 1)
+// CHECK-GFX1250-NEXT:    ret <2 x i32> [[TMP0]]
+//
+v2i test_amdgcn_flat_load_monitor_b64(v2i* inptr)
+{
+  return __builtin_amdgcn_flat_load_monitor_b64(inptr, 1);
+}
+
+// CHECK-GFX1250-LABEL: @test_amdgcn_flat_load_monitor_b128(
+// CHECK-GFX1250-NEXT:  entry:
+// CHECK-GFX1250-NEXT:    [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.flat.load.monitor.b128.v4i32(ptr [[INPTR:%.*]], i32 0)
+// CHECK-GFX1250-NEXT:    ret <4 x i32> [[TMP0]]
+//
+v4i test_amdgcn_flat_load_monitor_b128(v4i* inptr)
+{
+  return __builtin_amdgcn_flat_load_monitor_b128(inptr, 0);
+}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
index e4ef3de..86c27d4 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-wmma-w32.cl
@@ -157,6 +157,18 @@ void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c)
   *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, true);
 }
 
+// CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f32_16x16x128_f8f6f4(
+// CHECK-GFX1250-NEXT:  entry:
+// CHECK-GFX1250-NEXT:    [[TMP0:%.*]] = shufflevector <16 x i32> [[B:%.*]], <16 x i32> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+// CHECK-GFX1250-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x128.f8f6f4.v8f32.v16i32.v12i32(i32 1, <16 x i32> [[A:%.*]], i32 2, <12 x i32> [[TMP0]], i16 0, <8 x float> [[C:%.*]])
+// CHECK-GFX1250-NEXT:    store <8 x float> [[TMP1]], ptr addrspace(1) [[OUT:%.*]], align 32, !tbaa [[TBAA4]]
+// CHECK-GFX1250-NEXT:    ret void
+//
+void test_amdgcn_wmma_f32_16x16x128_f8f6f4(global v8f* out, v16i a, v16i b, v8f c)
+{
+  *out = __builtin_amdgcn_wmma_f32_16x16x128_f8f6f4(1, a, 2, b, 0, c);
+}
+
 // CHECK-GFX1250-LABEL: @test_amdgcn_wmma_f32_16x16x32_f16(
 // CHECK-GFX1250-NEXT:  entry:
 // CHECK-GFX1250-NEXT:    [[TMP0:%.*]] = tail call <8 x float> @llvm.amdgcn.wmma.f32.16x16x32.f16.v8f32.v16f16(i1 false, <16 x half> [[A:%.*]], i1 false, <16 x half> [[B:%.*]], i16 0, <8 x float> [[C:%.*]], i1 false, i1 true)
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
index a21862c..1c67fc3 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
@@ -7,6 +7,7 @@
 typedef unsigned int uint;
 typedef unsigned short int ushort;
 typedef unsigned int __attribute__((ext_vector_type(2))) uint2;
+typedef __bf16 __attribute__((ext_vector_type(2))) bfloat2;
 typedef half __attribute__((ext_vector_type(2))) half2;
 
 // CHECK-LABEL: @test_setprio_inc_wg(
@@ -254,6 +255,33 @@ void test_cos_bf16(global __bf16* out, __bf16 a)
   *out = __builtin_amdgcn_cos_bf16(a);
 }
 
+// CHECK-LABEL: @test_cvt_sr_pk_bf16_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT:    [[SR_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:    [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
+// CHECK-NEXT:    [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT:    [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK-NEXT:    [[SR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SR_ADDR]] to ptr
+// CHECK-NEXT:    store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store float [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    store float [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    store i32 [[SR:%.*]], ptr [[SR_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[B_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[SR_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = call <2 x bfloat> @llvm.amdgcn.cvt.sr.pk.bf16.f32(float [[TMP0]], float [[TMP1]], i32 [[TMP2]])
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store <2 x bfloat> [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
+// CHECK-NEXT:    ret void
+//
+void test_cvt_sr_pk_bf16_f32(global bfloat2* out, float a, float b, uint sr)
+{
+  *out = __builtin_amdgcn_cvt_sr_pk_bf16_f32(a, b, sr);
+}
+
 // CHECK-LABEL: @test_cvt_f16_fp8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
@@ -370,6 +398,144 @@ void test_cvt_pk_f16_bf8(global half2* out, short a)
   out[0] = __builtin_amdgcn_cvt_pk_f16_bf8(a);
 }
 
+// CHECK-LABEL: @test_cvt_pk_bf8_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
+// CHECK-NEXT:    [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
+// CHECK-NEXT:    [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT:    store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store <2 x half> [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <2 x half>, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = call i16 @llvm.amdgcn.cvt.pk.bf8.f16(<2 x half> [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i16 [[TMP1]], ptr addrspace(1) [[TMP2]], align 2
+// CHECK-NEXT:    ret void
+//
+void test_cvt_pk_bf8_f16(global short* out, half2 a)
+{
+  *out = __builtin_amdgcn_cvt_pk_bf8_f16(a);
+}
+
+// CHECK-LABEL: @test_cvt_pk_fp8_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
+// CHECK-NEXT:    [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
+// CHECK-NEXT:    [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT:    store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store <2 x half> [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <2 x half>, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = call i16 @llvm.amdgcn.cvt.pk.fp8.f16(<2 x half> [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i16 [[TMP1]], ptr addrspace(1) [[TMP2]], align 2
+// CHECK-NEXT:    ret void
+//
+void test_cvt_pk_fp8_f16(global short* out, half2 a)
+{
+  *out = __builtin_amdgcn_cvt_pk_fp8_f16(a);
+}
+
+// CHECK-LABEL: @test_cvt_sr_bf8_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca half, align 2, addrspace(5)
+// CHECK-NEXT:    [[SR_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:    [[OLD_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:    [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
+// CHECK-NEXT:    [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT:    [[SR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SR_ADDR]] to ptr
+// CHECK-NEXT:    [[OLD_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OLD_ADDR]] to ptr
+// CHECK-NEXT:    store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store half [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 2
+// CHECK-NEXT:    store i32 [[SR:%.*]], ptr [[SR_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    store i32 [[OLD:%.*]], ptr [[OLD_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[SR_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
+// CHECK-NEXT:    [[TMP5:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2
+// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[SR_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half [[TMP5]], i32 [[TMP6]], i32 [[TMP7]], i32 1)
+// CHECK-NEXT:    [[TMP9:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i32 [[TMP8]], ptr addrspace(1) [[TMP9]], align 4
+// CHECK-NEXT:    [[TMP10:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2
+// CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[SR_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP13:%.*]] = call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half [[TMP10]], i32 [[TMP11]], i32 [[TMP12]], i32 2)
+// CHECK-NEXT:    [[TMP14:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i32 [[TMP13]], ptr addrspace(1) [[TMP14]], align 4
+// CHECK-NEXT:    [[TMP15:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2
+// CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[SR_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP18:%.*]] = call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half [[TMP15]], i32 [[TMP16]], i32 [[TMP17]], i32 3)
+// CHECK-NEXT:    [[TMP19:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i32 [[TMP18]], ptr addrspace(1) [[TMP19]], align 4
+// CHECK-NEXT:    ret void
+//
+void test_cvt_sr_bf8_f16(global int* out, half a, uint sr, int old)
+{
+  *out = __builtin_amdgcn_cvt_sr_bf8_f16(a, sr, old, 0);
+  *out = __builtin_amdgcn_cvt_sr_bf8_f16(a, sr, old, 1);
+  *out = __builtin_amdgcn_cvt_sr_bf8_f16(a, sr, old, 2);
+  *out = __builtin_amdgcn_cvt_sr_bf8_f16(a, sr, old, 3);
+}
+
+// CHECK-LABEL: @test_cvt_sr_fp8_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca half, align 2, addrspace(5)
+// CHECK-NEXT:    [[SR_ADDR:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-NEXT:    [[OLD_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:    [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
+// CHECK-NEXT:    [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT:    [[SR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SR_ADDR]] to ptr
+// CHECK-NEXT:    [[OLD_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OLD_ADDR]] to ptr
+// CHECK-NEXT:    store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store half [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 2
+// CHECK-NEXT:    store i16 [[SR:%.*]], ptr [[SR_ADDR_ASCAST]], align 2
+// CHECK-NEXT:    store i32 [[OLD:%.*]], ptr [[OLD_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2
+// CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[SR_ADDR_ASCAST]], align 2
+// CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP1]] to i32
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half [[TMP0]], i32 [[CONV]], i32 [[TMP2]], i32 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
+// CHECK-NEXT:    [[TMP5:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2
+// CHECK-NEXT:    [[TMP6:%.*]] = load i16, ptr [[SR_ADDR_ASCAST]], align 2
+// CHECK-NEXT:    [[CONV1:%.*]] = sext i16 [[TMP6]] to i32
+// CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half [[TMP5]], i32 [[CONV1]], i32 [[TMP7]], i32 1)
+// CHECK-NEXT:    [[TMP9:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i32 [[TMP8]], ptr addrspace(1) [[TMP9]], align 4
+// CHECK-NEXT:    [[TMP10:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2
+// CHECK-NEXT:    [[TMP11:%.*]] = load i16, ptr [[SR_ADDR_ASCAST]], align 2
+// CHECK-NEXT:    [[CONV2:%.*]] = sext i16 [[TMP11]] to i32
+// CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP13:%.*]] = call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half [[TMP10]], i32 [[CONV2]], i32 [[TMP12]], i32 2)
+// CHECK-NEXT:    [[TMP14:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i32 [[TMP13]], ptr addrspace(1) [[TMP14]], align 4
+// CHECK-NEXT:    [[TMP15:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2
+// CHECK-NEXT:    [[TMP16:%.*]] = load i16, ptr [[SR_ADDR_ASCAST]], align 2
+// CHECK-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP16]] to i32
+// CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP18:%.*]] = call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half [[TMP15]], i32 [[CONV3]], i32 [[TMP17]], i32 3)
+// CHECK-NEXT:    [[TMP19:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i32 [[TMP18]], ptr addrspace(1) [[TMP19]], align 4
+// CHECK-NEXT:    ret void
+//
+void test_cvt_sr_fp8_f16(global int* out, half a, short sr, int old)
+{
+  *out = __builtin_amdgcn_cvt_sr_fp8_f16(a, sr, old, 0);
+  *out = __builtin_amdgcn_cvt_sr_fp8_f16(a, sr, old, 1);
+  *out = __builtin_amdgcn_cvt_sr_fp8_f16(a, sr, old, 2);
+  *out = __builtin_amdgcn_cvt_sr_fp8_f16(a, sr, old, 3);
+}
+
 // CHECK-LABEL: @test_sat_pk4_i4_i8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -440,6 +606,25 @@ void test_permlane16_swap(global uint2* out, uint old, uint src) {
   *out = __builtin_amdgcn_permlane16_swap(old, src, false, true);
 }
 
+// CHECK-LABEL: @test_prefetch(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[FPTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT:    [[GPTR_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT:    [[FPTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[FPTR_ADDR]] to ptr
+// CHECK-NEXT:    [[GPTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GPTR_ADDR]] to ptr
+// CHECK-NEXT:    store ptr [[FPTR:%.*]], ptr [[FPTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store ptr addrspace(1) [[GPTR:%.*]], ptr [[GPTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[FPTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    call void @llvm.amdgcn.flat.prefetch(ptr [[TMP0]], i32 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr [[GPTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    call void @llvm.amdgcn.global.prefetch(ptr addrspace(1) [[TMP1]], i32 8)
+// CHECK-NEXT:    ret void
+//
+void test_prefetch(generic void *fptr, global void *gptr) {
+  __builtin_amdgcn_flat_prefetch(fptr, 0);
+  __builtin_amdgcn_global_prefetch(gptr, 8);
+}
+
 // CHECK-LABEL: @test_cvt_f32_fp8_e5m3(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
diff --git a/clang/test/DebugInfo/KeyInstructions/asm.c b/clang/test/DebugInfo/KeyInstructions/asm.c
new file mode 100644
index 0000000..2b33016
--- /dev/null
+++ b/clang/test/DebugInfo/KeyInstructions/asm.c
@@ -0,0 +1,59 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple aarch64 -target-feature +ls64 -O0 -emit-llvm -x c %s -o - -gkey-instructions -debug-info-kind=line-tables-only -gno-column-info | FileCheck %s
+// Partially copied from clang/test/CodeGen/AArch64/ls64-inline-asm.c
+
+// Check the inline asm call and result store are Key and distinct atoms.
+
+struct foo { unsigned long long x[8]; };
+// CHECK-LABEL: define dso_local void @load(
+// CHECK-SAME: ptr noundef [[OUTPUT:%.*]], ptr noundef [[ADDR:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG5:![0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[OUTPUT_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[ADDR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[OUTPUT]], ptr [[OUTPUT_ADDR]], align 8
+// CHECK-NEXT:    store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 8, !dbg [[DBG9:![0-9]+]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8, !dbg [[DBG9]]
+// CHECK-NEXT:    [[TMP2:%.*]] = call i512 asm sideeffect "ld64b $0,[$1]", "=r,r,~{memory}"(ptr [[TMP1]]) #[[ATTR1:[0-9]+]], !dbg [[DBG10:![0-9]+]], !srcloc [[META11:![0-9]+]]
+// CHECK-NEXT:    store i512 [[TMP2]], ptr [[TMP0]], align 8, !dbg [[DBG12:![0-9]+]]
+// CHECK-NEXT:    ret void, !dbg [[DBG13:![0-9]+]]
+//
+void load(struct foo *output, void *addr) {
+    __asm__ volatile ("ld64b %0,[%1]" : "=r" (*output) : "r" (addr) : "memory");
+}
+
+// CHECK-LABEL: define dso_local void @load2(
+// CHECK-SAME: ptr noundef [[OUTPUT:%.*]], ptr noundef [[ADDR:%.*]]) #[[ATTR0]] !dbg [[DBG14:![0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[OUTPUT_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[ADDR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[OUTPUT]], ptr [[OUTPUT_ADDR]], align 8
+// CHECK-NEXT:    store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 8, !dbg [[DBG15:![0-9]+]]
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8, !dbg [[DBG15]]
+// CHECK-NEXT:    [[TMP2:%.*]] = call i32 asm sideeffect "ld64b $0,[$1]", "=r,r,~{memory}"(ptr [[TMP1]]) #[[ATTR1]], !dbg [[DBG16:![0-9]+]], !srcloc [[META17:![0-9]+]]
+// CHECK-NEXT:    store i32 [[TMP2]], ptr [[TMP0]], align 4, !dbg [[DBG18:![0-9]+]]
+// CHECK-NEXT:    ret void, !dbg [[DBG19:![0-9]+]]
+//
+void load2(int *output, void *addr) {
+    __asm__ volatile ("ld64b %0,[%1]" : "=r" (*output) : "r" (addr) : "memory");
+}
+//.
+// CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C11, file: [[META1:![0-9]+]], producer: "{{.*}}clang version {{.*}}", isOptimized: false, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, nameTableKind: None)
+// CHECK: [[META1]] = !DIFile(filename: "{{.*}}<stdin>", directory: {{.*}})
+// CHECK: [[DBG5]] = distinct !DISubprogram(name: "load", scope: [[META6:![0-9]+]], file: [[META6]], line: 21, type: [[META7:![0-9]+]], scopeLine: 21, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: [[META0]], keyInstructions: true)
+// CHECK: [[META6]] = !DIFile(filename: "{{.*}}asm.c", directory: {{.*}})
+// CHECK: [[META7]] = !DISubroutineType(types: [[META8:![0-9]+]])
+// CHECK: [[META8]] = !{}
+// CHECK: [[DBG9]] = !DILocation(line: 22, scope: [[DBG5]])
+// CHECK: [[DBG10]] = !DILocation(line: 22, scope: [[DBG5]], atomGroup: 1, atomRank: 1)
+// CHECK: [[META11]] = !{i64 1458}
+// CHECK: [[DBG12]] = !DILocation(line: 22, scope: [[DBG5]], atomGroup: 2, atomRank: 1)
+// CHECK: [[DBG13]] = !DILocation(line: 23, scope: [[DBG5]], atomGroup: 3, atomRank: 1)
+// CHECK: [[DBG14]] = distinct !DISubprogram(name: "load2", scope: [[META6]], file: [[META6]], line: 38, type: [[META7]], scopeLine: 38, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: [[META0]], keyInstructions: true)
+// CHECK: [[DBG15]] = !DILocation(line: 39, scope: [[DBG14]])
+// CHECK: [[DBG16]] = !DILocation(line: 39, scope: [[DBG14]], atomGroup: 1, atomRank: 1)
+// CHECK: [[META17]] = !{i64 2501}
+// CHECK: [[DBG18]] = !DILocation(line: 39, scope: [[DBG14]], atomGroup: 2, atomRank: 1)
+// CHECK: [[DBG19]] = !DILocation(line: 40, scope: [[DBG14]], atomGroup: 3, atomRank: 1)
+//.
diff --git a/clang/test/DebugInfo/KeyInstructions/goto.c b/clang/test/DebugInfo/KeyInstructions/goto.c
new file mode 100644
index 0000000..ead92e6
--- /dev/null
+++ b/clang/test/DebugInfo/KeyInstructions/goto.c
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -gkey-instructions -x c++ -std=c++17 %s -debug-info-kind=line-tables-only -emit-llvm -o - -gno-column-info \
+// RUN: | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -gkey-instructions -x c %s -debug-info-kind=line-tables-only -emit-llvm -o - -gno-column-info \
+// RUN: | FileCheck %s
+
+// Check the goto branches get Key Instructions metadata.
+void ext();
+void test_goto(void) {
+// CHECK: br label %dst1, !dbg [[G1R1:!.*]]
+  goto dst1;
+dst1:
+  ext();
+
+  void *ptr = &&dst2;
+// CHECK: br label %indirectgoto, !dbg [[G3R1:!.*]]
+  goto *ptr;
+dst2:
+  ext();
+
+// CHECK: br label %dst3, !dbg [[G4R1:!.*]]
+  goto *&&dst3;
+dst3:
+  ext();
+
+  return;
+}
+
+// CHECK: [[G1R1]] = !DILocation(line: 10, scope: ![[#]], atomGroup: 1, atomRank: 1)
+// CHECK: [[G3R1]] = !DILocation(line: 16, scope: ![[#]], atomGroup: 3, atomRank: 1)
+// CHECK: [[G4R1]] = !DILocation(line: 21, scope: ![[#]], atomGroup: 4, atomRank: 1)
diff --git a/clang/test/Driver/aarch64-toolchain.c b/clang/test/Driver/aarch64-toolchain.c
index cfad4b8..512b5a8 100644
--- a/clang/test/Driver/aarch64-toolchain.c
+++ b/clang/test/Driver/aarch64-toolchain.c
@@ -11,7 +11,7 @@
 // LLD-AARCH64-BAREMETAL: "-isysroot" "{{.*}}Inputs/basic_aarch64_gcc_tree/aarch64-none-elf"
 // LLD-AARCH64-BAREMETAL: "-internal-isystem" "{{.*}}Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include"
 // LLD-AARCH64-BAREMETAL: "{{.*}}/Inputs/lld/ld.lld"
-// LLD-AARCH64-BAREMETAL: "-Bstatic" "-m" "aarch64linux" "-EL"
+// LLD-AARCH64-BAREMETAL: "-Bstatic" "-m" "aarch64elf" "-EL"
 // LLD-AARCH64-BAREMETAL: "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/lib/crt0.o"
 // LLD-AARCH64-BAREMETAL: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtbegin.o"
 // LLD-AARCH64-BAREMETAL: "-L{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1"
@@ -30,7 +30,7 @@
 // C-AARCH64-BAREMETAL: "-internal-isystem" "{{.*}}Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include"
 // C-AARCH64-BAREMETAL: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../bin/aarch64-none-elf-ld"
 // C-AARCH64-BAREMETAL: "--sysroot={{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf"
-// C-AARCH64-BAREMETAL: "-Bstatic" "-m" "aarch64linux" "-EL"
+// C-AARCH64-BAREMETAL: "-Bstatic" "-m" "aarch64elf" "-EL"
 // C-AARCH64-BAREMETAL: "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/lib/crt0.o"
 // C-AARCH64-BAREMETAL: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtbegin.o"
 // C-AARCH64-BAREMETAL: "-L{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1"
@@ -47,7 +47,7 @@
 // C-AARCH64-BAREMETAL-NOSYSROOT: "-cc1" "-triple" "aarch64-unknown-none-elf"
 // C-AARCH64-BAREMETAL-NOSYSROOT: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../aarch64-none-elf/include"
 // C-AARCH64-BAREMETAL-NOSYSROOT: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../bin/aarch64-none-elf-ld"
-// C-AARCH64-BAREMETAL-NOSYSROOT: "-Bstatic" "-m" "aarch64linux" "-EL"
+// C-AARCH64-BAREMETAL-NOSYSROOT: "-Bstatic" "-m" "aarch64elf" "-EL"
 // C-AARCH64-BAREMETAL-NOSYSROOT: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../aarch64-none-elf/lib/crt0.o"
 // C-AARCH64-BAREMETAL-NOSYSROOT: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtbegin.o"
 // C-AARCH64-BAREMETAL-NOSYSROOT: "-L{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1"
@@ -67,7 +67,7 @@
 // CXX-AARCH64-BAREMETAL: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include"
 // CXX-AARCH64-BAREMETAL: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../bin/aarch64-none-elf-ld"
 // CXX-AARCH64-BAREMETAL: "--sysroot={{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf"
-// CXX-AARCH64-BAREMETAL: "-Bstatic" "-m" "aarch64linux" "-EL"
+// CXX-AARCH64-BAREMETAL: "-Bstatic" "-m" "aarch64elf" "-EL"
 // CXX-AARCH64-BAREMETAL: "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/lib/crt0.o"
 // CXX-AARCH64-BAREMETAL: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtbegin.o"
 // CXX-AARCH64-BAREMETAL: "-L{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1"
@@ -86,7 +86,7 @@
 // CXX-AARCH64-BAREMETAL-NOSYSROOT: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../aarch64-none-elf/include/c++/8.2.1"
 // CXX-AARCH64-BAREMETAL-NOSYSROOT: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../aarch64-none-elf/include"
 // CXX-AARCH64-BAREMETAL-NOSYSROOT: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../bin/aarch64-none-elf-ld"
-// CXX-AARCH64-BAREMETAL-NOSYSROOT: "-Bstatic" "-m" "aarch64linux" "-EL"
+// CXX-AARCH64-BAREMETAL-NOSYSROOT: "-Bstatic" "-m" "aarch64elf" "-EL"
 // CXX-AARCH64-BAREMETAL-NOSYSROOT: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../aarch64-none-elf/lib/crt0.o"
 // CXX-AARCH64-BAREMETAL-NOSYSROOT: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtbegin.o"
 // CXX-AARCH64-BAREMETAL-NOSYSROOT: "-L{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1"
@@ -105,7 +105,7 @@
 // CXX-AARCH64-BAREMETAL-LIBCXX: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include"
 // CXX-AARCH64-BAREMETAL-LIBCXX: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../bin/aarch64-none-elf-ld"
 // CXX-AARCH64-BAREMETAL-LIBCXX: "--sysroot={{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf"
-// CXX-AARCH64-BAREMETAL-LIBCXX: "-Bstatic" "-m" "aarch64linux" "-EL"
+// CXX-AARCH64-BAREMETAL-LIBCXX: "-Bstatic" "-m" "aarch64elf" "-EL"
 // CXX-AARCH64-BAREMETAL-LIBCXX: "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/lib/crt0.o"
 // CXX-AARCH64-BAREMETAL-LIBCXX: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtbegin.o"
 // CXX-AARCH64-BAREMETAL-LIBCXX: "-L{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1"
@@ -122,7 +122,7 @@
 // CXX-AARCH64-BAREMETAL-NOSYSROOT-LIBCXX: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../aarch64-none-elf/include/c++/v1"
 // CXX-AARCH64-BAREMETAL-NOSYSROOT-LIBCXX: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../aarch64-none-elf/include"
 // CXX-AARCH64-BAREMETAL-NOSYSROOT-LIBCXX: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../bin/aarch64-none-elf-ld"
-// CXX-AARCH64-BAREMETAL-NOSYSROOT-LIBCXX: "-Bstatic" "-m" "aarch64linux" "-EL"
+// CXX-AARCH64-BAREMETAL-NOSYSROOT-LIBCXX: "-Bstatic" "-m" "aarch64elf" "-EL"
 // CXX-AARCH64-BAREMETAL-NOSYSROOT-LIBCXX: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../aarch64-none-elf/lib/crt0.o"
 // CXX-AARCH64-BAREMETAL-NOSYSROOT-LIBCXX: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtbegin.o"
 // CXX-AARCH64-BAREMETAL-NOSYSROOT-LIBCXX: "-L{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1"
diff --git a/clang/test/Driver/amdgpu-hip-system-arch.c b/clang/test/Driver/amdgpu-hip-system-arch.c
index 9c27bc0..12e298a 100644
--- a/clang/test/Driver/amdgpu-hip-system-arch.c
+++ b/clang/test/Driver/amdgpu-hip-system-arch.c
@@ -14,14 +14,14 @@
 // RUN:   | FileCheck %s --check-prefix=NO-OUTPUT-ERROR
 // RUN:   not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-new-driver --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_fail -x hip %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=NO-OUTPUT-ERROR
-// NO-OUTPUT-ERROR: error: cannot determine amdgcn architecture{{.*}}; consider passing it via '--offload-arch'
+// NO-OUTPUT-ERROR: error: cannot determine hip architecture{{.*}}; consider passing it via '--offload-arch'
 
 // case when amdgpu-arch does not return anything with successful execution
 // RUN:   not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_empty -x hip %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=EMPTY-OUTPUT
 // RUN:   not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-new-driver --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_empty -x hip %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=EMPTY-OUTPUT
-// EMPTY-OUTPUT: error: cannot determine amdgcn architecture: No AMD GPU detected in the system; consider passing it via '--offload-arch'
+// EMPTY-OUTPUT: error: cannot determine hip architecture: No GPU detected in the system; consider passing it via '--offload-arch'
 
 // case when amdgpu-arch returns a gfx906 GPU.
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 -x hip %s 2>&1 \
@@ -36,4 +36,4 @@
 // RUN:     --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 \
 // RUN:     -x hip %s 2>&1 | \
 // RUN:   FileCheck %s --check-prefix=BAD-TIMEOUT
-// BAD-TIMEOUT: clang: error: cannot determine amdgcn architecture: CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected an integer, got 'foo'; consider passing it via '--offload-arch'; environment variable CLANG_TOOLCHAIN_PROGRAM_TIMEOUT specifies the tool timeout (integer secs, <=0 is infinite)
+// BAD-TIMEOUT: clang: error: cannot determine hip architecture: CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected an integer, got 'foo'; consider passing it via '--offload-arch'; environment variable CLANG_TOOLCHAIN_PROGRAM_TIMEOUT specifies the tool timeout (integer secs, <=0 is infinite)
diff --git a/clang/test/Driver/arm-aarch64-multilib-invalid-arch.c b/clang/test/Driver/arm-aarch64-multilib-invalid-arch.c
new file mode 100644
index 0000000..2ef27ad
--- /dev/null
+++ b/clang/test/Driver/arm-aarch64-multilib-invalid-arch.c
@@ -0,0 +1,2 @@
+// RUN: not %clang -multi-lib-config=%S/Inputs/multilib/empty.yaml --target=arm-none-eabi -march=invalid
+// RUN: not %clang -multi-lib-config=%S/Inputs/multilib/empty.yaml --target=aarch64-none-elf -march=invalid
diff --git a/clang/test/Driver/arm-toolchain.c b/clang/test/Driver/arm-toolchain.c
index c367594..9005992 100644
--- a/clang/test/Driver/arm-toolchain.c
+++ b/clang/test/Driver/arm-toolchain.c
@@ -10,7 +10,7 @@
 // LLD-ARM-BAREMETAL: "-isysroot" "{{.*}}Inputs/basic_arm_gcc_tree/armv6m-none-eabi"
 // LLD-ARM-BAREMETAL: "-internal-isystem" "{{.*}}Inputs/basic_arm_gcc_tree/armv6m-none-eabi/include"
 // LLD-ARM-BAREMETAL: "{{.*}}/Inputs/lld/ld.lld"
-// LLD-ARM-BAREMETAL: "-Bstatic" "-m" "armelf_linux_eabi" "-EL"
+// LLD-ARM-BAREMETAL: "-Bstatic" "-m" "armelf" "-EL"
 // LLD-ARM-BAREMETAL: "{{.*}}/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/lib/crt0.o"
 // LLD-ARM-BAREMETAL: "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/crtbegin.o"
 // LLD-ARM-BAREMETAL: "-L{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1"
@@ -29,7 +29,7 @@
 // C-ARM-BAREMETAL: "-internal-isystem" "{{.*}}Inputs/basic_arm_gcc_tree/armv6m-none-eabi/include"
 // C-ARM-BAREMETAL: "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../bin/armv6m-none-eabi-ld"
 // C-ARM-BAREMETAL: "--sysroot={{.*}}/Inputs/basic_arm_gcc_tree/armv6m-none-eabi"
-// C-ARM-BAREMETAL: "-Bstatic" "-m" "armelf_linux_eabi" "-EL"
+// C-ARM-BAREMETAL: "-Bstatic" "-m" "armelf" "-EL"
 // C-ARM-BAREMETAL: "{{.*}}/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/lib/crt0.o"
 // C-ARM-BAREMETAL: "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/crtbegin.o"
 // C-ARM-BAREMETAL: "-L{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1"
@@ -46,7 +46,7 @@
 // C-ARM-BAREMETAL-NOSYSROOT: "-cc1" "-triple" "thumbv6m-unknown-none-eabi"
 // C-ARM-BAREMETAL-NOSYSROOT: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../armv6m-none-eabi/include"
 // C-ARM-BAREMETAL-NOSYSROOT: "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../bin/armv6m-none-eabi-ld"
-// C-ARM-BAREMETAL-NOSYSROOT: "-Bstatic" "-m" "armelf_linux_eabi" "-EL"
+// C-ARM-BAREMETAL-NOSYSROOT: "-Bstatic" "-m" "armelf" "-EL"
 // C-ARM-BAREMETAL-NOSYSROOT: "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../armv6m-none-eabi/lib/crt0.o"
 // C-ARM-BAREMETAL-NOSYSROOT: "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/crtbegin.o"
 // C-ARM-BAREMETAL-NOSYSROOT: "-L{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1"
@@ -67,7 +67,7 @@
 // CXX-ARM-BAREMETAL: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/include"
 // CXX-ARM-BAREMETAL: "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../bin/armv6m-none-eabi-ld"
 // CXX-ARM-BAREMETAL: "--sysroot={{.*}}/Inputs/basic_arm_gcc_tree/armv6m-none-eabi"
-// CXX-ARM-BAREMETAL: "-Bstatic" "-m" "armelf_linux_eabi" "-EL"
+// CXX-ARM-BAREMETAL: "-Bstatic" "-m" "armelf" "-EL"
 // CXX-ARM-BAREMETAL: "{{.*}}/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/lib/crt0.o"
 // CXX-ARM-BAREMETAL: "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/crtbegin.o"
 // CXX-ARM-BAREMETAL: "-L{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1"
@@ -87,7 +87,7 @@
 // CXX-ARM-BAREMETAL-NOSYSROOT: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../armv6m-none-eabi/include/c++/8.2.1"
 // CXX-ARM-BAREMETAL-NOSYSROOT: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../armv6m-none-eabi/include"
 // CXX-ARM-BAREMETAL-NOSYSROOT: "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../bin/armv6m-none-eabi-ld"
-// CXX-ARM-BAREMETAL-NOSYSROOT: "-Bstatic" "-m" "armelf_linux_eabi" "-EL"
+// CXX-ARM-BAREMETAL-NOSYSROOT: "-Bstatic" "-m" "armelf" "-EL"
 // CXX-ARM-BAREMETAL-NOSYSROOT: "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../armv6m-none-eabi/lib/crt0.o"
 // CXX-ARM-BAREMETAL-NOSYSROOT: "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/crtbegin.o"
 // CXX-ARM-BAREMETAL-NOSYSROOT: "-L{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1"
@@ -106,7 +106,7 @@
 // CXX-ARM-BAREMETAL-LIBCXX: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/include"
 // CXX-ARM-BAREMETAL-LIBCXX: "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../bin/armv6m-none-eabi-ld"
 // CXX-ARM-BAREMETAL-LIBCXX: "--sysroot={{.*}}/Inputs/basic_arm_gcc_tree/armv6m-none-eabi"
-// CXX-ARM-BAREMETAL-LIBCXX: "-Bstatic" "-m" "armelf_linux_eabi" "-EL"
+// CXX-ARM-BAREMETAL-LIBCXX: "-Bstatic" "-m" "armelf" "-EL"
 // CXX-ARM-BAREMETAL-LIBCXX: "{{.*}}/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/lib/crt0.o"
 // CXX-ARM-BAREMETAL-LIBCXX: "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/crtbegin.o"
 // CXX-ARM-BAREMETAL-LIBCXX: "-L{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1"
@@ -123,7 +123,7 @@
 // CXX-ARM-BAREMETAL-NOSYSROOT-LIBCXX: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../armv6m-none-eabi/include/c++/v1"
 // CXX-ARM-BAREMETAL-NOSYSROOT-LIBCXX: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../armv6m-none-eabi/include"
 // CXX-ARM-BAREMETAL-NOSYSROOT-LIBCXX: "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../bin/armv6m-none-eabi-ld"
-// CXX-ARM-BAREMETAL-NOSYSROOT-LIBCXX: "-Bstatic" "-m" "armelf_linux_eabi" "-EL"
+// CXX-ARM-BAREMETAL-NOSYSROOT-LIBCXX: "-Bstatic" "-m" "armelf" "-EL"
 // CXX-ARM-BAREMETAL-NOSYSROOT-LIBCXX: "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../armv6m-none-eabi/lib/crt0.o"
 // CXX-ARM-BAREMETAL-NOSYSROOT-LIBCXX: "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/crtbegin.o"
 // CXX-ARM-BAREMETAL-NOSYSROOT-LIBCXX: "-L{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1"
diff --git a/clang/test/Driver/baremetal.cpp b/clang/test/Driver/baremetal.cpp
index 4dc3201..26f030d 100644
--- a/clang/test/Driver/baremetal.cpp
+++ b/clang/test/Driver/baremetal.cpp
@@ -17,7 +17,7 @@
 // CHECK-V6M-C-SAME: "-x" "c++" "{{.*}}baremetal.cpp"
 // CHECK-V6M-C-NEXT: ld{{(.exe)?}}"
 // CHECK-V6M-C-SAME: "--sysroot={{.*}}{{[/\\]+}}Inputs{{[/\\]+}}baremetal_arm"
-// CHECK-V6M-C-SAME: "-Bstatic" "-m" "armelf_linux_eabi" "-EL"
+// CHECK-V6M-C-SAME: "-Bstatic" "-m" "armelf" "-EL"
 // CHECK-V6M-C-SAME: "[[SYSROOT:[^"]+]]{{[/\\]+}}lib{{[/\\]+}}crt0.o"
 // CHECK-V6M-C-SAME: "-T" "semihosted.lds" "-Lsome{{[/\\]+}}directory{{[/\\]+}}user{{[/\\]+}}asked{{[/\\]+}}for"
 // CHECK-V6M-C-SAME: "-L[[SYSROOT:[^"]+]]{{[/\\]+}}lib"
@@ -43,7 +43,7 @@
 // CHECK-V6M-TREE-SAME: "-internal-isystem" "[[INSTALLED_DIR]]{{[/\\]+}}..{{[/\\]+}}include{{[/\\]+}}armv6m-unknown-none-eabi"
 // CHECK-V6M-TREE-SAME: "-x" "c++" "{{.*}}baremetal.cpp"
 // CHECK-V6M-TREE-NEXT: ld{{(.exe)?}}"
-// CHECK-V6M-TREE-SAME: "-Bstatic" "-m" "armelf_linux_eabi" "-EL"
+// CHECK-V6M-TREE-SAME: "-Bstatic" "-m" "armelf" "-EL"
 // CHECK-V6M-TREE-SAME: "[[INSTALLED_DIR]]{{[/\\]+}}..{{[/\\]+}}lib{{[/\\]+}}armv6m-unknown-none-eabi{{[/\\]+}}crt0.o"
 // CHECK-V6M-TREE-SAME: "-L[[INSTALLED_DIR]]{{[/\\]+}}..{{[/\\]+}}lib{{[/\\]+}}armv6m-unknown-none-eabi"
 // CHECK-V6M-TREE-SAME "{{.*}}.o"
@@ -60,7 +60,7 @@
 // CHECK-ARMV7M-PER-TARGET: "-x" "c++" "{{.*}}baremetal.cpp"
 // CHECK-ARMV7M-PER-TARGET: ld{{(.exe)?}}"
 // CHECK-ARMV7M-PER-TARGET: "--sysroot={{.*}}{{[/\\]+}}Inputs{{[/\\]+}}baremetal_arm"
-// CHECK-ARMV7M-PER-TARGET: "-Bstatic" "-m" "armelf_linux_eabi" "-EL"
+// CHECK-ARMV7M-PER-TARGET: "-Bstatic" "-m" "armelf" "-EL"
 // CHECK-ARMV7M-PER_TARGET: "[[SYSROOT:[^"]+]]{{[/\\]+}}lib{{[/\\]+}}crt0.o"
 // CHECK-ARMV7M-PER-TARGET: "-L[[SYSROOT:[^"]+]]{{[/\\]+}}lib"
 // CHECK-ARMV7M-PER-TARGET: "-L[[RESOURCE_DIR:[^"]+]]{{[/\\]+}}lib{{[/\\]+}}armv7m-vendor-none-eabi
@@ -73,7 +73,7 @@
 // CHECK-V6M-DEFAULTCXX: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // CHECK-V6M-DEFAULTCXX: ld{{(.exe)?}}"
 // CHECK-V6M-DEFAULTCXX: "--sysroot={{.*}}{{[/\\]+}}Inputs{{[/\\]+}}baremetal_arm"
-// CHECK-V6M-DEFAULTCXX: "-Bstatic" "-m" "armelf_linux_eabi" "-EL"
+// CHECK-V6M-DEFAULTCXX: "-Bstatic" "-m" "armelf" "-EL"
 // CHECK-V6M-DEFAULTCXX-SAME: "[[SYSROOT:[^"]+]]{{[/\\]+}}lib{{[/\\]+}}crt0.o"
 // CHECK-V6M-DEFAULTCXX-SAME: "-L{{[^"]*}}{{[/\\]+}}Inputs{{[/\\]+}}baremetal_arm{{[/\\]+}}lib"
 // CHECK-V6M-DEFAULTCXX-SAME: "{{.*}}.o"
@@ -90,7 +90,7 @@
 // CHECK-V6M-LIBCXX-SAME: "-internal-isystem" "{{[^"]+}}{{[/\\]+}}include{{[/\\]+}}c++{{[/\\]+}}v1"
 // CHECK-V6M-LIBCXX: ld{{(.exe)?}}"
 // CHECK-V6M-LIBCXX-SAME: "--sysroot={{.*}}{{[/\\]+}}Inputs{{[/\\]+}}baremetal_arm"
-// CHECK-V6M-LIBCXX-SAME: "-Bstatic" "-m" "armelf_linux_eabi" "-EL"
+// CHECK-V6M-LIBCXX-SAME: "-Bstatic" "-m" "armelf" "-EL"
 // CHECK-V6M-LIBCXX-SAME: "-L{{[^"]*}}{{[/\\]+}}Inputs{{[/\\]+}}baremetal_arm{{[/\\]+}}lib"
 // CHECK-V6M-LIBCXX-SAME: "{{.*}}.o"
 // CHECK-V6M-LIBCXX-SAME: "-lc++"
@@ -108,7 +108,7 @@
 // CHECK-V6M-LIBSTDCXX-SAME: "-internal-isystem" "{{[^"]+}}{{[/\\]+}}include{{[/\\]+}}c++{{[/\\]+}}6.0.0"
 // CHECK-V6M-LIBSTDCXX: ld{{(.exe)?}}"
 // CHECK-V6M-LIBSTDCXX-SAME: "--sysroot={{.*}}{{[/\\]+}}Inputs{{[/\\]+}}baremetal_arm"
-// CHECK-V6M-LIBSTDCXX-SAME: "-Bstatic" "-m" "armelf_linux_eabi" "-EL"
+// CHECK-V6M-LIBSTDCXX-SAME: "-Bstatic" "-m" "armelf" "-EL"
 // CHECK-V6M-LIBSTDCXX-SAME: "-L{{[^"]*}}{{[/\\]+}}Inputs{{[/\\]+}}baremetal_arm{{[/\\]+}}lib"
 // CHECK-V6M-LIBSTDCXX-SAME: "{{.*}}.o"
 // CHECK-V6M-LIBSTDCXX-SAME: "-lstdc++" "-lm"
@@ -123,7 +123,7 @@
 // CHECK-V6M-NDL: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // CHECK-V6M-NDL: ld{{(.exe)?}}"
 // CHECK-V6M-NDL: "--sysroot={{.*}}{{[/\\]+}}Inputs{{[/\\]+}}baremetal_arm"
-// CHECK-V6M-NDL: "-Bstatic" "-m" "armelf_linux_eabi" "-EL"
+// CHECK-V6M-NDL: "-Bstatic" "-m" "armelf" "-EL"
 // CHECK-V6M-NDL-SAME: "-L{{[^"]*}}{{[/\\]+}}Inputs{{[/\\]+}}baremetal_arm{{[/\\]+}}lib"
 
 // RUN: rm -rf %T/baremetal_cxx_sysroot
@@ -163,6 +163,16 @@
 // RUN:   | FileCheck %s --check-prefix=CHECK-RTLIB-GCC
 // CHECK-RTLIB-GCC: -lgcc
 
+// RUN: %clang -### --target=arm-none-eabi -nolibc -rtlib=compiler-rt %s 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-NOLIBC
+// CHECK-NOLIBC-NOT: "-lc"
+// CHECK-NOLIBC: "{{[^"]*}}libclang_rt.builtins.a"
+
+// RUN: %clang -### --target=arm-none-eabi -nostdlib -rtlib=compiler-rt %s 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-NOSTDLIB
+// CHECK-NOSTDLIB-NOT: "-lc"
+// CHECK-NOSTDLIB-NOT: "{{[^"]*}}libclang_rt.builtins.a"
+
 // RUN: %clang -### --target=arm-none-eabi -v %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=CHECK-SYSROOT-INC
 // CHECK-SYSROOT-INC-NOT: "-internal-isystem" "include"
@@ -171,7 +181,7 @@
 // RUN:   | FileCheck --check-prefix=CHECK-ARMV7EB %s
 // CHECK-ARMV7EB: "{{.*}}ld{{(.exe)?}}"
 // CHECK-ARMV7EB: "--sysroot={{.*}}{{[/\\]+}}Inputs{{[/\\]+}}baremetal_arm"
-// CHECK-ARMV7EB: "-Bstatic" "-m" "armelfb_linux_eabi" "--be8" "-EB"
+// CHECK-ARMV7EB: "-Bstatic" "-m" "armelfb" "--be8" "-EB"
 
 // RUN: %clang -### %s --target=armv7-none-eabi -mbig-endian --sysroot=%S/Inputs/baremetal_arm 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-ARMV7EB %s
@@ -183,7 +193,7 @@
 // RUN:   | FileCheck --check-prefix=CHECK-ARMV7EL %s
 // CHECK-ARMV7EL: "{{.*}}ld{{(.exe)?}}"
 // CHECK-ARMV7EL: "--sysroot={{.*}}{{[/\\]+}}Inputs{{[/\\]+}}baremetal_arm"
-// CHECK-ARMV7EL: "-Bstatic" "-m" "armelf_linux_eabi" "-EL"
+// CHECK-ARMV7EL: "-Bstatic" "-m" "armelf" "-EL"
 // CHECK-ARMV7EL-NOT: "--be8"
 
 // RUN: %clang -### %s --target=armebv7-none-eabi -mlittle-endian --sysroot=%S/Inputs/baremetal_arm 2>&1 \
@@ -196,7 +206,7 @@
 // RUN:   | FileCheck --check-prefix=CHECK-AARCH64BE %s
 // CHECK-AARCH64BE: "{{.*}}ld{{(.exe)?}}"
 // CHECK-AARCH64BE: sysroot={{.*}}{{[/\\]+}}Inputs{{[/\\]+}}baremetal_arm"
-// CHECK-AARCH64BE: "-Bstatic" "-m" "aarch64linuxb" "-EB"
+// CHECK-AARCH64BE: "-Bstatic" "-m" "aarch64elfb" "-EB"
 // CHECK-AARCH64BE-NOT: "--be8"
 
 // RUN: %clang -### %s --target=aarch64-none-elf -mbig-endian --sysroot=%S/Inputs/baremetal_arm 2>&1 \
@@ -209,7 +219,7 @@
 // RUN:   | FileCheck --check-prefix=CHECK-AARCH64LE %s
 // CHECK-AARCH64LE: "{{.*}}ld{{(.exe)?}}"
 // CHECK-AARCH64LE: "--sysroot={{.*}}{{[/\\]+}}Inputs{{[/\\]+}}baremetal_arm"
-// CHECK-AARCH64LE: "-Bstatic" "-m" "aarch64linux" "-EL"
+// CHECK-AARCH64LE: "-Bstatic" "-m" "aarch64elf" "-EL"
 // CHECK-AARCH64LE-NOT: "--be8"
 
 // RUN: %clang -### %s --target=aarch64_be-none-elf -mlittle-endian --sysroot=%S/Inputs/baremetal_arm 2>&1 \
@@ -257,7 +267,7 @@
 // CHECK-RV64-SAME:"{{.*}}.o"
 // CHECK-RV64-SAME: "{{[^"]*}}libclang_rt.builtins.a"
 // CHECK-RV64-SAME: "-lc"
-// CHECK-RV64-SAME: "-X" "-o" "{{.*}}.tmp.out"
+// CHECK-RV64-SAME: "-o" "{{.*}}.tmp.out"
 
 // RUN: %clangxx %s -### --target=riscv64-unknown-elf 2>&1 \
 // RUN:     --sysroot=%S/Inputs/basic_riscv64_tree/riscv64-unknown-elf \
@@ -271,7 +281,7 @@
 // CHECK-RV64-DEFAULTCXX-SAME: "-lc++" "-lm"
 // CHECK-RV64-DEFAULTCXX-SAME: "{{[^"]*}}libclang_rt.builtins.a"
 // CHECK-RV64-DEFAULTCXX-SAME: "-lc"
-// CHECK-RV64-DEFAULTCXX-SAME: "-X" "-o" "a.out"
+// CHECK-RV64-DEFAULTCXX-SAME: "-o" "a.out"
 
 // RUN: %clangxx %s -### --target=riscv64-unknown-elf 2>&1 \
 // RUN:     --sysroot=%S/Inputs/basic_riscv64_tree/riscv64-unknown-elf \
@@ -288,7 +298,7 @@
 // CHECK-RV64-LIBCXX-SAME: "-lc++" "-lm"
 // CHECK-RV64-LIBCXX-SAME: "{{[^"]*}}libclang_rt.builtins.a"
 // CHECK-RV64-LIBCXX-SAME: "-lc"
-// CHECK-RV64-LIBCXX-SAME: "-X" "-o" "a.out"
+// CHECK-RV64-LIBCXX-SAME: "-o" "a.out"
 
 // RUN: %clangxx %s -### 2>&1 --target=riscv64-unknown-elf \
 // RUN:     --sysroot=%S/Inputs/basic_riscv64_tree/riscv64-unknown-elf \
@@ -305,7 +315,7 @@
 // CHECK-RV64-LIBSTDCXX-SAME: "-lstdc++" "-lm"
 // CHECK-RV64-LIBSTDCXX-SAME: "{{[^"]*}}libclang_rt.builtins.a"
 // CHECK-RV64-LIBSTDCXX-SAME: "-lc"
-// CHECK-RV64-LIBSTDCXX-SAME: "-X" "-o" "a.out"
+// CHECK-RV64-LIBSTDCXX-SAME: "-o" "a.out"
 
 // RUN: %clang %s -### 2>&1 --target=riscv32-unknown-elf \
 // RUN:     -L some/directory/user/asked/for \
@@ -325,7 +335,7 @@
 // CHECK-RV32-SAME: "{{.*}}.o"
 // CHECK-RV32-SAME: "{{[^"]*}}libclang_rt.builtins.a"
 // CHECK-RV32-SAME: "-lc"
-// CHECK-RV32-SAME: "-X" "-o" "a.out"
+// CHECK-RV32-SAME: "-o" "a.out"
 
 // RUN: %clangxx %s -### 2>&1 --target=riscv32-unknown-elf \
 // RUN:     --sysroot=%S/Inputs/basic_riscv32_tree/riscv32-unknown-elf \
@@ -339,7 +349,7 @@
 // CHECK-RV32-DEFAULTCXX-SAME: "-lc++" "-lm"
 // CHECK-RV32-DEFAULTCXX-SAME: "{{[^"]*}}libclang_rt.builtins.a"
 // CHECK-RV32-DEFAULTCXX-SAME: "-lc"
-// CHECK-RV32-DEFAULTCXX-SAME: "-X" "-o" "a.out"
+// CHECK-RV32-DEFAULTCXX-SAME: "-o" "a.out"
 
 // RUN: %clangxx %s -### 2>&1 --target=riscv32-unknown-elf \
 // RUN:     --sysroot=%S/Inputs/basic_riscv32_tree/riscv32-unknown-elf \
@@ -355,7 +365,7 @@
 // CHECK-RV32-LIBCXX-SAME: "{{.*}}.o"
 // CHECK-RV32-LIBCXX-SAME: "-lc++" "-lm"
 // CHECK-RV32-LIBCXX-SAME: "{{[^"]*}}libclang_rt.builtins.a"
-// CHECK-RV32-LIBCXX-SAME: "-X" "-o" "a.out"
+// CHECK-RV32-LIBCXX-SAME: "-o" "a.out"
 
 // RUN: %clangxx %s -### 2>&1 --target=riscv32-unknown-elf \
 // RUN:     --sysroot=%S/Inputs/basic_riscv32_tree/riscv32-unknown-elf \
@@ -372,7 +382,7 @@
 // CHECK-RV32-LIBSTDCXX-SAME: "-lstdc++" "-lm"
 // CHECK-RV32-LIBSTDCXX-SAME: "{{[^"]*}}libclang_rt.builtins.a"
 // CHECK-RV32-LIBSTDCXX-SAME: "-lc"
-// CHECK-RV32-LIBSTDCXX-SAME: "-X" "-o" "a.out"
+// CHECK-RV32-LIBSTDCXX-SAME: "-o" "a.out"
 
 // RUN: %clang %s -### 2>&1 --target=riscv64-unknown-elf \
 // RUN:     -nostdlibinc -nobuiltininc \
diff --git a/clang/test/Driver/compilation-dir.c b/clang/test/Driver/compilation-dir.c
index dbe801c..70a117b 100644
--- a/clang/test/Driver/compilation-dir.c
+++ b/clang/test/Driver/compilation-dir.c
@@ -8,3 +8,8 @@
 // RUN: %clang -### -integrated-as -ffile-compilation-dir=. -x assembler %s 2>&1 | FileCheck -check-prefixes=CHECK-DEBUG-COMPILATION-DIR %s
 // CHECK-DEBUG-COMPILATION-DIR: "-fdebug-compilation-dir=."
 // CHECK-DEBUG-COMPILATION-DIR-NOT: "-ffile-compilation-dir=."
+
+// RUN: %clang -### -S %s -working-directory %S 2>&1 | FileCheck -check-prefix=CHECK-CWD %s
+// RUN: cd %S
+// RUN: %clang -### -S %s 2>&1 | FileCheck -check-prefix=CHECK-CWD %s
+// CHECK-CWD: -fdebug-compilation-dir={{.*}}Driver
diff --git a/clang/test/Driver/cuda-phases.cu b/clang/test/Driver/cuda-phases.cu
index 8b91a1d..220a320 100644
--- a/clang/test/Driver/cuda-phases.cu
+++ b/clang/test/Driver/cuda-phases.cu
@@ -324,8 +324,8 @@
 // RUN:        -ccc-print-phases --offload-arch=sm_999 -fgpu-rdc -c %s 2>&1 \
 // RUN: | FileCheck -check-prefix=INVALID-ARCH %s
 //      INVALID-ARCH: error: unsupported CUDA gpu architecture: sm_999
-// INVALID-ARCH-NEXT: 0: input, "[[INPUT:.+]]", cuda, (host-cuda)
-// INVALID-ARCH-NEXT: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
-// INVALID-ARCH-NEXT: 2: compiler, {1}, ir, (host-cuda)
-// INVALID-ARCH-NEXT: 3: backend, {2}, assembler, (host-cuda)
-// INVALID-ARCH-NEXT: 4: assembler, {3}, object, (host-cuda)
+//      INVALID-ARCH: 0: input, "[[INPUT:.+]]", cuda
+// INVALID-ARCH-NEXT: 1: preprocessor, {0}, cuda-cpp-output
+// INVALID-ARCH-NEXT: 2: compiler, {1}, ir
+// INVALID-ARCH-NEXT: 3: backend, {2}, assembler
+// INVALID-ARCH-NEXT: 4: assembler, {3}, object
diff --git a/clang/test/Driver/hip-binding.hip b/clang/test/Driver/hip-binding.hip
index d8b3f1e..4d15f97 100644
--- a/clang/test/Driver/hip-binding.hip
+++ b/clang/test/Driver/hip-binding.hip
@@ -61,7 +61,7 @@
 // MULTI-D-ONLY-NEXT: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]"], output: "[[GFX90a:.+]]"
 // MULTI-D-ONLY-NEXT: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[GFX90a]]"], output: "[[GFX90a_OUT:.+]]"
 //
-// RUN: not %clang -### --target=x86_64-linux-gnu --offload-new-driver -ccc-print-bindings -nogpulib -nogpuinc \
+// RUN: not %clang -### --target=x86_64-linux-gnu --offload-new-driver -ccc-print-bindings -nogpulib -nogpuinc -emit-llvm \
 // RUN:        --no-gpu-bundle-output --offload-arch=gfx90a --offload-arch=gfx908 --offload-device-only -c -o %t %s 2>&1 \
 // RUN: | FileCheck -check-prefix=MULTI-D-ONLY-NO-BUNDLE-O %s
 // MULTI-D-ONLY-NO-BUNDLE-O: error: cannot specify -o when generating multiple output files
@@ -75,6 +75,13 @@
 // MULTI-D-ONLY-O-NEXT: "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[GFX90A_OBJ]]"], output: "[[GFX90A:.+]]"
 // MULTI-D-ONLY-O-NEXT: "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[GFX908]]", "[[GFX90A]]"], output: "a.out"
 
+// RUN: %clang -### --target=x86_64-linux-gnu --offload-new-driver -ccc-print-bindings -nogpulib -nogpuinc -emit-llvm \
+// RUN:        --gpu-bundle-output --offload-arch=gfx90a --offload-arch=gfx908 --offload-device-only -c -o a.out %s 2>&1 \
+// RUN: | FileCheck -check-prefix=MULTI-D-ONLY-BC %s
+//      MULTI-D-ONLY-BC: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[GFX908_BC:.+]]"
+// MULTI-D-ONLY-BC-NEXT: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT]]"], output: "[[GFX90A_BC:.+]]"
+// MULTI-D-ONLY-BC-NEXT: "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[GFX908_BC]]", "[[GFX90A_BC]]"], output: "a.out"
+
 //
 // Check to ensure that we can use '-fsyntax-only' for HIP output with the new
 // driver.
diff --git a/clang/test/Driver/hip-dependent-options.hip b/clang/test/Driver/hip-dependent-options.hip
index b0dc7f2..e65fec2 100644
--- a/clang/test/Driver/hip-dependent-options.hip
+++ b/clang/test/Driver/hip-dependent-options.hip
@@ -4,6 +4,12 @@
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
 // RUN:   %S/Inputs/hip_multiple_inputs/b.hip --gpu-bundle-output \
 // RUN: 2>&1 | FileCheck -check-prefixes=RELOCRDC %s
+// RUN: not %clang -### --target=x86_64-linux-gnu --offload-new-driver \
+// RUN:   -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
+// RUN:   -c -fhip-emit-relocatable -nogpuinc -nogpulib --cuda-device-only -fgpu-rdc \
+// RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
+// RUN:   %S/Inputs/hip_multiple_inputs/b.hip --gpu-bundle-output \
+// RUN: 2>&1 | FileCheck -check-prefixes=RELOCRDC %s
 
 // RELOCRDC: error: option '-fhip-emit-relocatable' cannot be specified with '-fgpu-rdc'
 
@@ -13,5 +19,11 @@
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
 // RUN:   %S/Inputs/hip_multiple_inputs/b.hip --gpu-bundle-output \
 // RUN: 2>&1 | FileCheck -check-prefixes=RELOCHOST %s
+// RUN: not %clang -### --target=x86_64-linux-gnu --offload-new-driver \
+// RUN:   -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
+// RUN:   -c -fhip-emit-relocatable -nogpuinc -nogpulib \
+// RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
+// RUN:   %S/Inputs/hip_multiple_inputs/b.hip --gpu-bundle-output \
+// RUN: 2>&1 | FileCheck -check-prefixes=RELOCHOST %s
 
-// RELOCHOST: error: option '-fhip-emit-relocatable' cannot be specified without '--cuda-device-only'
+// RELOCHOST: error: option '-fhip-emit-relocatable' cannot be specified without '--offload-device-only'
diff --git a/clang/test/Driver/hip-inputs.hip b/clang/test/Driver/hip-inputs.hip
index 2d4cc31..a8e25ad 100644
--- a/clang/test/Driver/hip-inputs.hip
+++ b/clang/test/Driver/hip-inputs.hip
@@ -15,5 +15,5 @@
 // RUN:   --hip-link %S/Inputs/hip_multiple_inputs/a.cu 2>&1 \
 // RUN: | FileCheck -check-prefix=MIX %s
 
-// CHECK-NOT: error: mixed CUDA and HIP compilation is not supported
-// MIX: error: mixed CUDA and HIP compilation is not supported
+// CHECK-NOT: error: mixed CUDA and HIP offloading compilation is not supported
+// MIX: error: mixed CUDA and HIP offloading compilation is not supported
diff --git a/clang/test/Driver/hip-invalid-target-id.hip b/clang/test/Driver/hip-invalid-target-id.hip
index 555043f..ad942e4 100644
--- a/clang/test/Driver/hip-invalid-target-id.hip
+++ b/clang/test/Driver/hip-invalid-target-id.hip
@@ -4,7 +4,7 @@
 // RUN:   --rocm-path=%S/Inputs/rocm \
 // RUN:   %s 2>&1 | FileCheck -check-prefix=NOPLUS %s
 
-// NOPLUS: error: invalid target ID 'gfx908xnack'
+// NOPLUS: error: unsupported HIP gpu architecture: gfx908xnack 
 
 // RUN: not %clang -### --target=x86_64-linux-gnu \
 // RUN:   -x hip --offload-arch=gfx900 \
@@ -22,7 +22,7 @@
 // RUN:   --rocm-path=%S/Inputs/rocm \
 // RUN:   %s 2>&1 | FileCheck -check-prefix=UNK %s
 
-// UNK: error: invalid target ID 'gfx908:unknown+'
+// UNK: error: unsupported HIP gpu architecture: gfx900+xnack 
 
 // RUN: not %clang -### --target=x86_64-linux-gnu \
 // RUN:   -x hip --offload-arch=gfx908 \
@@ -31,7 +31,7 @@
 // RUN:   --rocm-path=%S/Inputs/rocm \
 // RUN:   %s 2>&1 | FileCheck -check-prefix=MIXED %s
 
-// MIXED: error: invalid target ID 'gfx908:sramecc+:unknown+'
+// MIXED: error: unsupported HIP gpu architecture: gfx900+xnack 
 
 // RUN: not %clang -### --target=x86_64-linux-gnu \
 // RUN:   -x hip --offload-arch=gfx908 \
@@ -55,7 +55,7 @@
 // RUN:   --rocm-path=%S/Inputs/rocm \
 // RUN:   %s 2>&1 | FileCheck -check-prefix=NOCOLON %s
 
-// NOCOLON: error: invalid target ID 'gfx900+xnack'
+// NOCOLON: error: unsupported HIP gpu architecture: gfx900+xnack
 
 // RUN: not %clang -### --target=x86_64-linux-gnu \
 // RUN:   -x hip --offload-arch=gfx908 \
diff --git a/clang/test/Driver/hip-options.hip b/clang/test/Driver/hip-options.hip
index 4fb5571..ba23bc2 100644
--- a/clang/test/Driver/hip-options.hip
+++ b/clang/test/Driver/hip-options.hip
@@ -115,11 +115,6 @@
 // OMP-NOT: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fopenmp"
 // OMP: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-fopenmp"
 
-// RUN: not %clang --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
-// RUN:   --offload-arch=gfx906 -fopenmp=libomp -fopenmp-targets=amdgcn %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=OMPTGT %s
-// OMPTGT: unsupported option '--offload-targets=' for language mode 'HIP'
-
 // Check -Xoffload-linker option is passed to lld.
 
 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
diff --git a/clang/test/Driver/hip-phases.hip b/clang/test/Driver/hip-phases.hip
index d8a58b7..6bac97a 100644
--- a/clang/test/Driver/hip-phases.hip
+++ b/clang/test/Driver/hip-phases.hip
@@ -275,10 +275,16 @@
 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases --no-offload-new-driver \
 // RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only -fhip-emit-relocatable 2>&1 \
 // RUN: | FileCheck -check-prefixes=RELOC %s
+// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases --offload-new-driver \
+// RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only -fhip-emit-relocatable 2>&1 \
+// RUN: | FileCheck -check-prefixes=RELOC %s
 //
 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases --no-offload-new-driver \
 // RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only -fhip-emit-relocatable -Wl,--disable-new-dtags \
 // RUN: 2>&1 | FileCheck -check-prefixes=RELOC %s
+// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases --offload-new-driver \
+// RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only -fhip-emit-relocatable -Wl,--disable-new-dtags \
+// RUN: 2>&1 | FileCheck -check-prefixes=RELOC %s
 //
 // RELOC-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
 // RELOC-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
@@ -286,7 +292,7 @@
 // RELOC-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
 // RELOC-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]])
 // RELOC-NOT: linker
-// RELOC-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P4]]}, object
+// RELOC-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P4]]}
 // RELOC-NOT: host
 
 //
@@ -675,3 +681,25 @@
 // DEVICE-ONLY-NEXT: 2: compiler, {1}, ir, (device-hip, gfx90a)
 // DEVICE-ONLY-NEXT: 3: backend, {2}, ir, (device-hip, gfx90a)
 // DEVICE-ONLY-NEXT: 4: offload, "device-hip (amdgcn-amd-amdhsa:gfx90a)" {3}, none
+
+//
+// Test the new driver bundling SPIR-V targets.
+//
+// RUN: %clang -### --target=x86_64-linux-gnu --offload-new-driver -ccc-print-phases \
+// RUN:        --offload-device-only --offload-arch=amdgcnspirv,gfx1030 %s 2>&1 \
+// RUN: | FileCheck -check-prefix=SPIRV-ONLY %s
+//      SPIRV-ONLY: 0: input, "[[INPUT:.+]]", hip, (device-hip, gfx1030)
+// SPIRV-ONLY-NEXT: 1: preprocessor, {0}, hip-cpp-output, (device-hip, gfx1030)
+// SPIRV-ONLY-NEXT: 2: compiler, {1}, ir, (device-hip, gfx1030)
+// SPIRV-ONLY-NEXT: 3: backend, {2}, assembler, (device-hip, gfx1030)
+// SPIRV-ONLY-NEXT: 4: assembler, {3}, object, (device-hip, gfx1030)
+// SPIRV-ONLY-NEXT: 5: linker, {4}, image, (device-hip, gfx1030)
+// SPIRV-ONLY-NEXT: 6: offload, "device-hip (amdgcn-amd-amdhsa:gfx1030)" {5}, image
+// SPIRV-ONLY-NEXT: 7: input, "[[INPUT]]", hip, (device-hip, amdgcnspirv)
+// SPIRV-ONLY-NEXT: 8: preprocessor, {7}, hip-cpp-output, (device-hip, amdgcnspirv)
+// SPIRV-ONLY-NEXT: 9: compiler, {8}, ir, (device-hip, amdgcnspirv)
+// SPIRV-ONLY-NEXT: 10: backend, {9}, ir, (device-hip, amdgcnspirv)
+// SPIRV-ONLY-NEXT: 11: linker, {10}, image, (device-hip, amdgcnspirv)
+// SPIRV-ONLY-NEXT: 12: offload, "device-hip (spirv64-amd-amdhsa:amdgcnspirv)" {11}, image
+// SPIRV-ONLY-NEXT: 13: linker, {6, 12}, hip-fatbin, (device-hip)
+// SPIRV-ONLY-NEXT: 14: offload, "device-hip (amdgcn-amd-amdhsa)" {13}, none
diff --git a/clang/test/Driver/hipspv-link-static-library.hip b/clang/test/Driver/hipspv-link-static-library.hip
new file mode 100644
index 0000000..03126ae
--- /dev/null
+++ b/clang/test/Driver/hipspv-link-static-library.hip
@@ -0,0 +1,28 @@
+// Test HIPSPV static device library linking
+// REQUIRES: system-linux
+// UNSUPPORTED: system-windows
+
+// Create a dummy archive to test SDL linking
+// RUN: rm -rf %t && mkdir %t
+// RUN: touch %t/dummy.bc  
+// RUN: llvm-ar cr %t/libSDL.a %t/dummy.bc
+
+// Test that -l options are passed to llvm-link for --offload=spirv64
+// RUN: %clang -### --target=x86_64-linux-gnu --offload=spirv64 \
+// RUN:   --hip-path=%S/Inputs/hipspv -nohipwrapperinc %s \
+// RUN:   -L%t -lSDL \
+// RUN: 2>&1 | FileCheck -check-prefix=SDL-LINK %s
+
+// Test that .a files are properly unbundled and passed to llvm-link  
+// RUN: %clang -### --target=x86_64-linux-gnu --offload=spirv64 \
+// RUN:   --hip-path=%S/Inputs/hipspv -nohipwrapperinc %s \
+// RUN:   %t/libSDL.a \
+// RUN: 2>&1 | FileCheck -check-prefix=SDL-ARCHIVE %s
+
+// Verify that the input files are added before the SDL files in llvm-link command
+// This tests the ordering fix to match HIPAMD behavior
+// SDL-LINK: "{{.*}}clang-offload-bundler" "-unbundle" "-type=a" "-input={{.*}}libSDL.a" "-targets=hip-spirv64-unknown-unknown-unknown-generic" "-output=[[SDL_A:.*\.a]]" "-allow-missing-bundles"
+// SDL-LINK: "{{.*}}llvm-link" "{{.*}}.bc" "[[SDL_A]]" "-o"
+
+// SDL-ARCHIVE: "{{.*}}clang-offload-bundler" "-unbundle" "-type=a" "-input={{.*}}libSDL.a" "-targets=hip-spirv64-unknown-unknown-unknown-generic" "-output=[[SDL_A:.*\.a]]" "-allow-missing-bundles"  
+// SDL-ARCHIVE: "{{.*}}llvm-link" "{{.*}}.bc" "[[SDL_A]]" "-o"
diff --git a/clang/test/Driver/invalid-offload-options.cpp b/clang/test/Driver/invalid-offload-options.cpp
index 48d5310..6048a3c 100644
--- a/clang/test/Driver/invalid-offload-options.cpp
+++ b/clang/test/Driver/invalid-offload-options.cpp
@@ -1,29 +1,7 @@
 // UNSUPPORTED: system-windows
 
-// RUN: not %clang -### -x hip --target=x86_64-linux-gnu --offload= \
-// RUN:   --hip-path=%S/Inputs/hipspv -nogpuinc -nogpulib %s \
-// RUN: 2>&1 | FileCheck --check-prefix=INVALID-TARGET %s
 // RUN: not %clang -### -x hip --target=x86_64-linux-gnu --offload=foo \
 // RUN:   --hip-path=%S/Inputs/hipspv -nogpuinc -nogpulib %s \
 // RUN: 2>&1 | FileCheck --check-prefix=INVALID-TARGET %s
 
 // INVALID-TARGET: error: invalid or unsupported offload target: '{{.*}}'
-
-// In the future we should be able to specify multiple targets for HIP
-// compilation but currently it is not supported.
-//
-// RUN: not %clang -### -x hip --target=x86_64-linux-gnu --offload=foo,bar \
-// RUN:   --hip-path=%S/Inputs/hipspv -nogpuinc -nogpulib %s \
-// RUN: 2>&1 | FileCheck --check-prefix=TOO-MANY-TARGETS %s
-// RUN: not %clang -### -x hip --target=x86_64-linux-gnu \
-// RUN:   --offload=foo --offload=bar \
-// RUN:   --hip-path=%S/Inputs/hipspv -nogpuinc -nogpulib %s \
-// RUN: 2>&1 | FileCheck --check-prefix=TOO-MANY-TARGETS %s
-
-// TOO-MANY-TARGETS: error: only one offload target is supported
-
-// RUN: not %clang -### -x hip --target=x86_64-linux-gnu -nogpuinc -nogpulib \
-// RUN:   --offload=amdgcn-amd-amdhsa --offload-arch=gfx900 %s \
-// RUN: 2>&1 | FileCheck --check-prefix=OFFLOAD-ARCH-MIX %s
-
-// OFFLOAD-ARCH-MIX: error: option '--offload-arch' cannot be specified with '--offload'
diff --git a/clang/test/Driver/linker-wrapper-libs.c b/clang/test/Driver/linker-wrapper-libs.c
index cb5c7c1..1404fe3 100644
--- a/clang/test/Driver/linker-wrapper-libs.c
+++ b/clang/test/Driver/linker-wrapper-libs.c
@@ -48,8 +48,8 @@ int bar() { return weak; }
 // RUN:   --linker-path=/usr/bin/ld %t.a %t.o -o a.out 2>&1 \
 // RUN: | FileCheck %s --check-prefix=LIBRARY-RESOLVES
 
-// LIBRARY-RESOLVES: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
-// LIBRARY-RESOLVES: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o
+// LIBRARY-RESOLVES: clang{{.*}} -o {{.*}}.img -dumpdir {{.*}}.img. --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
+// LIBRARY-RESOLVES: clang{{.*}} -o {{.*}}.img -dumpdir {{.*}}.img. --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o
 
 //
 // Check that we extract a static library that defines a global visibile to the
@@ -72,8 +72,8 @@ int bar() { return weak; }
 // RUN:   --linker-path=/usr/bin/ld %t.a %t.o -o a.out 2>&1 \
 // RUN: | FileCheck %s --check-prefix=LIBRARY-GLOBAL
 
-// LIBRARY-GLOBAL: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
-// LIBRARY-GLOBAL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o
+// LIBRARY-GLOBAL: clang{{.*}} -o {{.*}}.img -dumpdir {{.*}}.img. --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
+// LIBRARY-GLOBAL: clang{{.*}} -o {{.*}}.img -dumpdir {{.*}}.img. --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o
 
 //
 // Check that we do not extract a global symbol if the source file was not
@@ -95,8 +95,8 @@ int bar() { return weak; }
 // RUN:   --linker-path=/usr/bin/ld %t.o %t.a -o a.out 2>&1 \
 // RUN: | FileCheck %s --check-prefix=LIBRARY-GLOBAL-NONE
 
-// LIBRARY-GLOBAL-NONE-NOT: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o
-// LIBRARY-GLOBAL-NONE-NOT: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
+// LIBRARY-GLOBAL-NONE-NOT: clang{{.*}} -o {{.*}}.img -dumpdir {{.*}}.img. --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o
+// LIBRARY-GLOBAL-NONE-NOT: clang{{.*}} -o {{.*}}.img -dumpdir {{.*}}.img. --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
 
 //
 // Check that we do not extract an external weak symbol.
@@ -116,9 +116,9 @@ int bar() { return weak; }
 // RUN:   --linker-path=/usr/bin/ld %t.o %t.a -o a.out 2>&1 \
 // RUN: | FileCheck %s --check-prefix=LIBRARY-WEAK
 
-// LIBRARY-WEAK: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70
+// LIBRARY-WEAK: clang{{.*}} -o {{.*}}.img -dumpdir {{.*}}.img. --target=nvptx64-nvidia-cuda -march=sm_70
 // LIBRARY-WEAK-NOT: {{.*}}.o {{.*}}.o
-// LIBRARY-WEAK: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030
+// LIBRARY-WEAK: clang{{.*}} -o {{.*}}.img -dumpdir {{.*}}.img. --target=amdgcn-amd-amdhsa -mcpu=gfx1030
 
 //
 // Check that we do not extract an unneeded hidden symbol.
@@ -138,9 +138,9 @@ int bar() { return weak; }
 // RUN:   --linker-path=/usr/bin/ld %t.o %t.a -o a.out 2>&1 \
 // RUN: | FileCheck %s --check-prefix=LIBRARY-HIDDEN
 
-// LIBRARY-HIDDEN: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70
+// LIBRARY-HIDDEN: clang{{.*}} -o {{.*}}.img -dumpdir {{.*}}.img. --target=nvptx64-nvidia-cuda -march=sm_70
 // LIBRARY-HIDDEN-NOT: {{.*}}.o {{.*}}.o
-// LIBRARY-HIDDEN: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030
+// LIBRARY-HIDDEN: clang{{.*}} -o {{.*}}.img -dumpdir {{.*}}.img. --target=amdgcn-amd-amdhsa -mcpu=gfx1030
 
 //
 // Check that we do not extract a static library that defines a global visibile
@@ -161,9 +161,9 @@ int bar() { return weak; }
 // RUN:   --linker-path=/usr/bin/ld %t.o %t.a %t.a -o a.out 2>&1 \
 // RUN: | FileCheck %s --check-prefix=LIBRARY-GLOBAL-DEFINED
 
-// LIBRARY-GLOBAL-DEFINED: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
+// LIBRARY-GLOBAL-DEFINED: clang{{.*}} -o {{.*}}.img -dumpdir {{.*}}.img. --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
 // LIBRARY-GLOBAL-DEFINED-NOT: {{.*}}gfx1030{{.*}}.o
-// LIBRARY-GLOBAL-DEFINED: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o
+// LIBRARY-GLOBAL-DEFINED: clang{{.*}} -o {{.*}}.img -dumpdir {{.*}}.img. --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o
 
 //
 // Check that we can use --[no-]whole-archive to control extraction.
@@ -185,7 +185,7 @@ int bar() { return weak; }
 // RUN:   --linker-path=/usr/bin/ld %t.o --whole-archive %t.a -o a.out 2>&1 \
 // RUN: | FileCheck %s --check-prefix=LIBRARY-WHOLE-ARCHIVE
 
-// LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
-// LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o
-// LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_52 {{.*}}.o
-// LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a {{.*}}.o
+// LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img -dumpdir {{.*}}.img. --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
+// LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img -dumpdir {{.*}}.img. --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o
+// LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img -dumpdir {{.*}}.img. --target=nvptx64-nvidia-cuda -march=sm_52 {{.*}}.o
+// LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img -dumpdir {{.*}}.img. --target=amdgcn-amd-amdhsa -mcpu=gfx90a {{.*}}.o
diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c
index 80b1a57..e73fa5c 100644
--- a/clang/test/Driver/linker-wrapper.c
+++ b/clang/test/Driver/linker-wrapper.c
@@ -22,7 +22,7 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
 // RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK
 
-// NVPTX-LINK: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
+// NVPTX-LINK: clang{{.*}} -o {{.*}}.img -dumpdir a.out.nvptx64.sm_70.img. --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
 
 // RUN: clang-offload-packager -o %t.out \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
@@ -40,7 +40,7 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
 // RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LINK
 
-// AMDGPU-LINK: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
+// AMDGPU-LINK: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx908.img. --target=amdgcn-amd-amdhsa -mcpu=gfx908 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
 
 // RUN: clang-offload-packager -o %t.out \
 // RUN:   --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 \
@@ -57,7 +57,7 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN: not clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
 // RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=SPIRV-LINK
 
-// SPIRV-LINK: clang{{.*}} -o {{.*}}.img --target=spirv64-unknown-unknown {{.*}}.o --sycl-link -Xlinker -triple=spirv64-unknown-unknown -Xlinker -arch=
+// SPIRV-LINK: clang{{.*}} -o {{.*}}.img -dumpdir a.out.spirv64..img. --target=spirv64-unknown-unknown {{.*}}.o --sycl-link -Xlinker -triple=spirv64-unknown-unknown -Xlinker -arch=
 
 // RUN: clang-offload-packager -o %t.out \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu \
@@ -68,7 +68,7 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN:   --linker-path=/usr/bin/ld.lld --whole-archive %t.a --no-whole-archive \
 // RUN:   %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CPU-LINK
 
-// CPU-LINK: clang{{.*}} -o {{.*}}.img --target=x86_64-unknown-linux-gnu -Wl,--no-undefined {{.*}}.o {{.*}}.o -Wl,-Bsymbolic -shared -Wl,--whole-archive {{.*}}.a -Wl,--no-whole-archive
+// CPU-LINK: clang{{.*}} -o {{.*}}.img -dumpdir a.out.x86_64..img. --target=x86_64-unknown-linux-gnu -Wl,--no-undefined {{.*}}.o {{.*}}.o -Wl,-Bsymbolic -shared -Wl,--whole-archive {{.*}}.a -Wl,--no-whole-archive
 
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o
 // RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu -mllvm -openmp-opt-disable \
@@ -100,8 +100,8 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu \
 // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CUDA
 
-// CUDA: clang{{.*}} -o [[IMG_SM70:.+]] --target=nvptx64-nvidia-cuda -march=sm_70
-// CUDA: clang{{.*}} -o [[IMG_SM52:.+]] --target=nvptx64-nvidia-cuda -march=sm_52
+// CUDA: clang{{.*}} -o [[IMG_SM70:.+]] -dumpdir a.out.nvptx64.sm_70.img. --target=nvptx64-nvidia-cuda -march=sm_70
+// CUDA: clang{{.*}} -o [[IMG_SM52:.+]] -dumpdir a.out.nvptx64.sm_52.img. --target=nvptx64-nvidia-cuda -march=sm_52
 // CUDA: fatbinary{{.*}}-64 --create {{.*}}.fatbin --image=profile=sm_70,file=[[IMG_SM70]] --image=profile=sm_52,file=[[IMG_SM52]]
 // CUDA: usr/bin/ld{{.*}} {{.*}}.openmp.image.{{.*}}.o {{.*}}.cuda.image.{{.*}}.o
 
@@ -127,8 +127,8 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN:   --compress --compression-level=6 \
 // RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=HIP
 
-// HIP: clang{{.*}} -o [[IMG_GFX90A:.+]] --target=amdgcn-amd-amdhsa -mcpu=gfx90a
-// HIP: clang{{.*}} -o [[IMG_GFX908:.+]] --target=amdgcn-amd-amdhsa -mcpu=gfx908
+// HIP: clang{{.*}} -o [[IMG_GFX90A:.+]] -dumpdir a.out.amdgcn.gfx90a.img. --target=amdgcn-amd-amdhsa -mcpu=gfx90a
+// HIP: clang{{.*}} -o [[IMG_GFX908:.+]] -dumpdir a.out.amdgcn.gfx908.img. --target=amdgcn-amd-amdhsa -mcpu=gfx908
 // HIP: clang-offload-bundler{{.*}}-type=o -bundle-align=4096 -compress -compression-level=6 -targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa--gfx90a,hip-amdgcn-amd-amdhsa--gfx908 -input={{/dev/null|NUL}} -input=[[IMG_GFX90A]] -input=[[IMG_GFX908]] -output={{.*}}.hipfb
 
 // RUN: clang-offload-packager -o %t.out \
@@ -157,7 +157,7 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --clang-backend \
 // RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CLANG-BACKEND
 
-// CLANG-BACKEND: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -flto -Wl,--no-undefined {{.*}}.o
+// CLANG-BACKEND: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx908.img. --target=amdgcn-amd-amdhsa -mcpu=gfx908 -flto -Wl,--no-undefined {{.*}}.o
 
 // RUN: clang-offload-packager -o %t.out \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70
@@ -180,8 +180,8 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
 // RUN:   --linker-path=/usr/bin/ld %t-on.o %t-off.o %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=AMD-TARGET-ID
 
-// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+ -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
-// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack- -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
+// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx90a:xnack+.img. --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+ -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
+// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx90a:xnack-.img. --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack- -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
 
 // RUN: clang-offload-packager -o %t-lib.out \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=generic
@@ -196,8 +196,8 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
 // RUN:   --linker-path=/usr/bin/ld %t1.o %t2.o %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=ARCH-ALL
 
-// ARCH-ALL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
-// ARCH-ALL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
+// ARCH-ALL: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx90a.img. --target=amdgcn-amd-amdhsa -mcpu=gfx90a -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
+// ARCH-ALL: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx908.img. --target=amdgcn-amd-amdhsa -mcpu=gfx908 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
 
 // RUN: clang-offload-packager -o %t.out \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu \
@@ -207,7 +207,7 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN:   --linker-path=/usr/bin/ld.lld -r %t.o \
 // RUN:   %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=RELOCATABLE-LINK
 
-// RELOCATABLE-LINK: clang{{.*}} -o {{.*}}.img --target=x86_64-unknown-linux-gnu
+// RELOCATABLE-LINK: clang{{.*}} -o {{.*}}.img -dumpdir a.out.x86_64..img. --target=x86_64-unknown-linux-gnu
 // RELOCATABLE-LINK: /usr/bin/ld.lld{{.*}}-r
 // RELOCATABLE-LINK: llvm-objcopy{{.*}}a.out --remove-section .llvm.offloading
 
@@ -219,7 +219,7 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN:   --linker-path=/usr/bin/ld.lld -r %t.o \
 // RUN:   %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=RELOCATABLE-LINK-HIP
 
-// RELOCATABLE-LINK-HIP: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa
+// RELOCATABLE-LINK-HIP: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx90a.img. --target=amdgcn-amd-amdhsa
 // RELOCATABLE-LINK-HIP: clang-offload-bundler{{.*}} -type=o -bundle-align=4096 -targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa--gfx90a -input={{/dev/null|NUL}} -input={{.*}} -output={{.*}}
 // RELOCATABLE-LINK-HIP: /usr/bin/ld.lld{{.*}}-r
 // RELOCATABLE-LINK-HIP: llvm-objcopy{{.*}}a.out --remove-section .llvm.offloading
@@ -233,7 +233,7 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN:   --linker-path=/usr/bin/ld.lld -r %t.o \
 // RUN:   %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=RELOCATABLE-LINK-CUDA
 
-// RELOCATABLE-LINK-CUDA: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda
+// RELOCATABLE-LINK-CUDA: clang{{.*}} -o {{.*}}.img -dumpdir a.out.nvptx64.sm_89.img. --target=nvptx64-nvidia-cuda
 // RELOCATABLE-LINK-CUDA: fatbinary{{.*}} -64 --create {{.*}}.fatbin --image=profile=sm_89,file={{.*}}.img
 // RELOCATABLE-LINK-CUDA: /usr/bin/ld.lld{{.*}}-r
 // RELOCATABLE-LINK-CUDA: llvm-objcopy{{.*}}a.out --remove-section .llvm.offloading
diff --git a/clang/test/Driver/lto-dwo.c b/clang/test/Driver/lto-dwo.c
index 206d4cb..5072765 100644
--- a/clang/test/Driver/lto-dwo.c
+++ b/clang/test/Driver/lto-dwo.c
@@ -1,9 +1,25 @@
 // Confirm that -gsplit-dwarf=DIR is passed to linker
 
-// RUN: %clang --target=x86_64-unknown-linux -### %s -flto=thin -gsplit-dwarf -o a.out 2> %t
+// DEFINE: %{RUN-ELF} = %clang --target=x86_64-unknown-linux -### %s \
+// DEFINE:              -flto=thin -gsplit-dwarf
+
+// RUN: %{RUN-ELF} -o a.out 2> %t
 // RUN: FileCheck -check-prefix=CHECK-LINK-ELF-DWO-DIR-DEFAULT < %t %s
 // RUN: %clang_cl --target=x86_64-unknown-windows-msvc -### -fuse-ld=lld -flto -gsplit-dwarf -o a.out -- %s 2> %t
 // RUN: FileCheck -check-prefix=CHECK-LINK-COFF-DWO-DIR-DEFAULT < %t %s
 //
-// CHECK-LINK-ELF-DWO-DIR-DEFAULT:  "-plugin-opt=dwo_dir=a.out_dwo"
+// CHECK-LINK-ELF-DWO-DIR-DEFAULT:  "-plugin-opt=dwo_dir=a.out-dwo"
 // CHECK-LINK-COFF-DWO-DIR-DEFAULT: "/dwodir:a.out_dwo"
+
+// Check -dumpdir effect on -gsplit-dwarf.
+//
+// DEFINE: %{RUN-DUMPDIR} = %{RUN-ELF} -dumpdir /dir/file.ext
+//
+// RUN: %{RUN-ELF} 2>&1 | FileCheck %s -check-prefix=CHECK-NO-O
+// RUN: %{RUN-ELF} -o FOO 2>&1 | FileCheck %s -check-prefix=CHECK-O
+// RUN: %{RUN-DUMPDIR} 2>&1 | FileCheck %s -check-prefix=CHECK-DUMPDIR
+// RUN: %{RUN-DUMPDIR} -o FOO 2>&1 | FileCheck %s -check-prefix=CHECK-DUMPDIR
+//
+//    CHECK-NO-O: "-plugin-opt=dwo_dir=a-dwo"
+//       CHECK-O: "-plugin-opt=dwo_dir=FOO-dwo"
+// CHECK-DUMPDIR: "-plugin-opt=dwo_dir=/dir/file.extdwo"
diff --git a/clang/test/Driver/mingw-msvcrt.c b/clang/test/Driver/mingw-msvcrt.c
index 340ce1f..e164863 100644
--- a/clang/test/Driver/mingw-msvcrt.c
+++ b/clang/test/Driver/mingw-msvcrt.c
@@ -7,10 +7,10 @@
 // CHECK_DEFAULT: "-lmingwex" "-lmsvcrt" "-ladvapi32"
 // CHECK_DEFAULT-SAME: "-lmsvcrt" "-lkernel32" "{{.*}}crtend.o"
 // CHECK_MSVCR120: "-lmsvcr120"
-// CHECK_MSVCR120-SAME: "-lmingwex" "-ladvapi32"
+// CHECK_MSVCR120-SAME: "-lmingwex" "-lmsvcr120" "-ladvapi32"
 // CHECK_UCRTBASE: "-lucrtbase"
-// CHECK_UCRTBASE-SAME: "-lmingwex" "-ladvapi32"
+// CHECK_UCRTBASE-SAME: "-lmingwex" "-lucrtbase" "-ladvapi32"
 // CHECK_UCRT: "-lucrt"
-// CHECK_UCRT-SAME: "-lmingwex" "-ladvapi32"
+// CHECK_UCRT-SAME: "-lmingwex" "-lucrt" "-ladvapi32"
 // CHECK_CRTDLL: "-lcrtdll"
-// CHECK_CRTDLL-SAME: "-lmingwex" "-ladvapi32"
+// CHECK_CRTDLL-SAME: "-lmingwex" "-lcrtdll" "-ladvapi32"
diff --git a/clang/test/Driver/module-fgen-reduced-bmi.cppm b/clang/test/Driver/module-fgen-reduced-bmi.cppm
index 9bdd4c9..4b893ff 100644
--- a/clang/test/Driver/module-fgen-reduced-bmi.cppm
+++ b/clang/test/Driver/module-fgen-reduced-bmi.cppm
@@ -64,7 +64,8 @@
 // RUN:     -Wno-missing-reduced-bmi -### 2>&1 | FileCheck Hello.cppm -check-prefix=NO_WARN
 //
 // RUN: %clang -std=c++20 Hello.cppm --precompile -o Hello.pcm \
-// RUN:     -Wno-missing-reduced-bmi -### 2>&1 | FileCheck Hello.cppm -check-prefix=NO_WARN
+// RUN:     -fno-modules-reduced-bmi -Wno-missing-reduced-bmi -### 2>&1 | \
+// RUN:     FileCheck Hello.cppm -check-prefix=NO_WARN
 
 //--- Hello.cppm
 export module Hello;
diff --git a/clang/test/Driver/module-output.cppm b/clang/test/Driver/module-output.cppm
index 7cf0771..197f1d8 100644
--- a/clang/test/Driver/module-output.cppm
+++ b/clang/test/Driver/module-output.cppm
@@ -13,28 +13,29 @@
 // Tests that the .pcm file will be generated in the same directory with the specified
 // output and the name of the .pcm file should be the same with the input file.
 // RUN: %clang -std=c++20 %t/Hello.cppm -fmodule-output -c -o %t/output/Hello.o \
-// RUN:   -### 2>&1 | FileCheck %t/Hello.cppm
+// RUN:   -fno-modules-reduced-bmi -### 2>&1 | FileCheck %t/Hello.cppm
 //
 // Tests that the output file will be generated in the input directory if the output
 // file is not the corresponding object file.
 // RUN: %clang -std=c++20 %t/Hello.cppm %t/AnotherModule.cppm -fmodule-output -o \
-// RUN:   %t/output/a.out -### 2>&1 | FileCheck  %t/AnotherModule.cppm
+// RUN:   %t/output/a.out -fno-modules-reduced-bmi -### 2>&1 | FileCheck  %t/AnotherModule.cppm
 //
 // Tests that clang will reject the command line if it specifies -fmodule-output with
 // multiple archs.
 // RUN: not %clang %t/Hello.cppm -fmodule-output -arch i386 -arch x86_64 -### \
-// RUN:   --target=x86_64-apple-darwin 2>&1 | FileCheck %t/Hello.cppm -check-prefix=MULTIPLE-ARCH
+// RUN:   -fno-modules-reduced-bmi --target=x86_64-apple-darwin 2>&1 | FileCheck %t/Hello.cppm \
+// RUN:   -check-prefix=MULTIPLE-ARCH
 
 // Tests that the .pcm file will be generated in the same path with the specified one
 // in the comamnd line.
 // RUN: %clang -std=c++20 %t/Hello.cppm -fmodule-output=%t/pcm/Hello.pcm -o %t/Hello.o \
-// RUN:   -c -### 2>&1 | FileCheck %t/Hello.cppm --check-prefix=CHECK-SPECIFIED
+// RUN:   -fno-modules-reduced-bmi  -c -### 2>&1 | FileCheck %t/Hello.cppm --check-prefix=CHECK-SPECIFIED
 //
 // RUN: %clang -std=c++20 %t/Hello.cppm -fmodule-output=%t/Hello.pcm -fmodule-output -c -fsyntax-only \
-// RUN:   -### 2>&1 | FileCheck %t/Hello.cppm --check-prefix=CHECK-NOT-USED
+// RUN:   -fno-modules-reduced-bmi  -### 2>&1 | FileCheck %t/Hello.cppm --check-prefix=CHECK-NOT-USED
 
 // Test that we can emit a warning if the type of the input file is not a module interface unit.
-// RUN: %clang -std=c++20 %t/a.cpp -fmodule-output -c -o %t/a.o -### 2>&1 | FileCheck %t/a.cpp
+// RUN: %clang -std=c++20 %t/a.cpp -fmodule-output -fno-modules-reduced-bmi  -c -o %t/a.o -### 2>&1 | FileCheck %t/a.cpp
 
 //--- Hello.cppm
 export module Hello;
diff --git a/clang/test/Driver/modules.cpp b/clang/test/Driver/modules.cpp
index 088a732..edbe8d8 100644
--- a/clang/test/Driver/modules.cpp
+++ b/clang/test/Driver/modules.cpp
@@ -34,7 +34,7 @@
 
 // Check combining precompile and compile steps works.
 //
-// RUN: %clang -std=c++2a -x c++-module %t/foo.cpp -S -o %t/foo2.pcm.o -v 2>&1 | FileCheck %s --check-prefix=CHECK-PRECOMPILE --check-prefix=CHECK-COMPILE
+// RUN: %clang -std=c++2a -x c++-module -fno-modules-reduced-bmi %t/foo.cpp -S -o %t/foo2.pcm.o -v 2>&1 | FileCheck %s --check-prefix=CHECK-PRECOMPILE --check-prefix=CHECK-COMPILE
 
 // Check that .cppm is treated as a module implicitly.
 //
diff --git a/clang/test/Driver/modules.mm b/clang/test/Driver/modules.mm
index d1536c7..f0b0669 100644
--- a/clang/test/Driver/modules.mm
+++ b/clang/test/Driver/modules.mm
@@ -3,6 +3,9 @@
 // RUN: %clang -fmodules -fno-cxx-modules -### %s 2>&1 | FileCheck -check-prefix=CHECK-NO-MODULES %s
 // CHECK-NO-MODULES-NOT: -fmodules
 
+// RUN: %clang -std=c++20 -fno-cxx-modules -### %s 2>&1 | FileCheck -check-prefix=CHECK-NO-CPP-20-MODULES %s
+// CHECK-NO-CPP-20-MODULES: -fno-cxx-modules
+
 // RUN: %clang -fmodules -### %s 2>&1 | FileCheck -check-prefix=CHECK-HAS-MODULES %s
 // RUN: %clang -fmodules -fno-cxx-modules -fcxx-modules -### %s 2>&1 | FileCheck -check-prefix=CHECK-HAS-MODULES %s
 // CHECK-HAS-MODULES: -fmodules
diff --git a/clang/test/Driver/nvptx-cuda-system-arch.c b/clang/test/Driver/nvptx-cuda-system-arch.c
index c54eeac..2d4eca8 100644
--- a/clang/test/Driver/nvptx-cuda-system-arch.c
+++ b/clang/test/Driver/nvptx-cuda-system-arch.c
@@ -16,14 +16,14 @@
 // RUN:   | FileCheck %s --check-prefix=NO-OUTPUT-ERROR
 // RUN:   not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-new-driver --offload-arch=native --nvptx-arch-tool=%t/nvptx_arch_fail -x cuda %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=NO-OUTPUT-ERROR
-// NO-OUTPUT-ERROR: error: cannot determine nvptx64 architecture{{.*}}; consider passing it via '--offload-arch'
+// NO-OUTPUT-ERROR: error: cannot determine cuda architecture{{.*}}; consider passing it via '--offload-arch'
 
 // case when nvptx-arch does not return anything with successful execution
 // RUN:   not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-arch=native --nvptx-arch-tool=%t/nvptx_arch_empty -x cuda %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=EMPTY-OUTPUT
 // RUN:   not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-new-driver --offload-arch=native --nvptx-arch-tool=%t/nvptx_arch_empty -x cuda %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=EMPTY-OUTPUT
-// EMPTY-OUTPUT: error: cannot determine nvptx64 architecture: No NVIDIA GPU detected in the system; consider passing it via '--offload-arch'
+// EMPTY-OUTPUT: error: cannot determine cuda architecture: No GPU detected in the system; consider passing it via '--offload-arch'
 
 // case when nvptx-arch does not return anything with successful execution
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-arch=native --nvptx-arch-tool=%t/nvptx_arch_sm_70 -x cuda --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda %s 2>&1 \
@@ -49,4 +49,4 @@
 // RUN:     --offload-arch=native --nvptx-arch-tool=%t/nvptx_arch_sm_70 \
 // RUN:     --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda -x cuda %s 2>&1 | \
 // RUN:   FileCheck %s --check-prefix=BAD-TIMEOUT
-// BAD-TIMEOUT: clang: error: cannot determine nvptx64 architecture: CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected an integer, got 'foo'; consider passing it via '--offload-arch'; environment variable CLANG_TOOLCHAIN_PROGRAM_TIMEOUT specifies the tool timeout (integer secs, <=0 is infinite)
+// BAD-TIMEOUT: clang: error: cannot determine cuda architecture: CLANG_TOOLCHAIN_PROGRAM_TIMEOUT expected an integer, got 'foo'; consider passing it via '--offload-arch'; environment variable CLANG_TOOLCHAIN_PROGRAM_TIMEOUT specifies the tool timeout (integer secs, <=0 is infinite)
diff --git a/clang/test/Driver/offload-target.c b/clang/test/Driver/offload-target.c
new file mode 100644
index 0000000..23a2cf2
--- /dev/null
+++ b/clang/test/Driver/offload-target.c
@@ -0,0 +1,22 @@
+// RUN: %clang -### -fsycl --offload-targets=spirv64 -nogpuinc %s -ccc-print-bindings 2>&1 \
+// RUN: | FileCheck %s -check-prefix=SYCL
+// SYCL: "spirv64" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[SYCL_BC:.+]]"
+
+// RUN: %clang -### --offload-targets=amdgcn-amd-amdhsa -nogpulib -nogpuinc -x hip %s -ccc-print-bindings 2>&1 \
+// RUN: | FileCheck %s -check-prefix=HIP
+// HIP: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[AMD_OBJ:.+]]"
+
+// RUN: %clang -### --offload-targets=nvptx64-nvidia-cuda -nogpulib -nogpuinc -x cuda %s -ccc-print-bindings 2>&1 \
+// RUN: | FileCheck %s -check-prefix=CUDA
+// CUDA: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[NV_OBJ:.+]]"
+
+// RUN: %clang -### --offload-targets=amdgcn-amd-amdhsa,nvptx64-nvidia-cuda -fopenmp=libomp \
+// RUN:   -Xarch_amdgcn --offload-arch=gfx90a -Xarch_nvptx64 --offload-arch=sm_89 \
+// RUN:   -nogpulib -nogpuinc %s -ccc-print-bindings 2>&1 \
+// RUN: | FileCheck %s -check-prefix=OPENMP
+// OPENMP: "amdgcn-amd-amdhsa" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[AMD_OBJ:.+]]"
+// OPENMP: "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]"], output: "[[NV_OBJ:.+]]"
+
+// RUN: %clang -### --offload-targets=spirv64-amd-amdhsa -nogpulib -nogpuinc -x hip %s -ccc-print-bindings 2>&1 \
+// RUN: | FileCheck %s -check-prefix=HIPSPIRV
+// HIPSPIRV: "spirv64-amd-amdhsa" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[AMD_OBJ:.+]]"
diff --git a/clang/test/Driver/openbsd.c b/clang/test/Driver/openbsd.c
index 6639e9d..1f12cfc 100644
--- a/clang/test/Driver/openbsd.c
+++ b/clang/test/Driver/openbsd.c
@@ -127,9 +127,12 @@
 // UNWIND-TABLES: "-funwind-tables=2"
 // NO-UNWIND-TABLES-NOT: "-funwind-tables=2"
 
-// Check that the -X and --no-relax flags are passed to the linker on riscv64
+// Check that the -X and --no-relax flags are passed to the linker
+// RUN: %clang --target=loongarch64-unknown-openbsd -mno-relax -### %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=LA64-FLAGS %s
 // RUN: %clang --target=riscv64-unknown-openbsd -mno-relax -### %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=RISCV64-FLAGS %s
+// LA64-FLAGS: "-X" "--no-relax"
 // RISCV64-FLAGS: "-X" "--no-relax"
 
 // Check passing LTO flags to the linker
diff --git a/clang/test/Driver/openmp-offload.c b/clang/test/Driver/openmp-offload.c
index 162ff20..64d45f9 100644
--- a/clang/test/Driver/openmp-offload.c
+++ b/clang/test/Driver/openmp-offload.c
@@ -7,7 +7,7 @@
 /// Check whether an invalid OpenMP target is specified:
 // RUN:   not %clang -### -fopenmp=libomp -fopenmp-targets=aaa-bbb-ccc-ddd %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-INVALID-TARGET %s
-// CHK-INVALID-TARGET: error: OpenMP target is invalid: 'aaa-bbb-ccc-ddd'
+// CHK-INVALID-TARGET: error: invalid or unsupported offload target: 'aaa-bbb-ccc-ddd'
 
 /// ###########################################################################
 
@@ -18,15 +18,6 @@
 
 /// ###########################################################################
 
-/// Check error for no -fopenmp option
-// RUN:   not %clang -### -fopenmp-targets=powerpc64le-ibm-linux-gnu  %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHK-NO-FOPENMP %s
-// RUN:   not %clang -### -fopenmp=libgomp -fopenmp-targets=powerpc64le-ibm-linux-gnu  %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHK-NO-FOPENMP %s
-// CHK-NO-FOPENMP: error: '-fopenmp-targets' must be used in conjunction with a '-fopenmp' option compatible with offloading; e.g., '-fopenmp=libomp' or '-fopenmp=libiomp5'
-
-/// ###########################################################################
-
 /// Check warning for duplicate offloading targets.
 // RUN:   %clang -### -ccc-print-phases -fopenmp=libomp -fopenmp-targets=powerpc64le-ibm-linux-gnu,powerpc64le-ibm-linux-gnu  %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHK-DUPLICATES %s
diff --git a/clang/test/Driver/openmp-system-arch.c b/clang/test/Driver/openmp-system-arch.c
index b18ecf3..167b07a 100644
--- a/clang/test/Driver/openmp-system-arch.c
+++ b/clang/test/Driver/openmp-system-arch.c
@@ -24,13 +24,7 @@
 // RUN:   not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp --offload-arch=native \
 // RUN:     --nvptx-arch-tool=%t/nvptx_arch_empty --amdgpu-arch-tool=%t/amdgpu_arch_empty %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=NO-OUTPUT-ERROR
-// RUN:   not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp --offload-arch= \
-// RUN:     --nvptx-arch-tool=%t/nvptx_arch_fail --amdgpu-arch-tool=%t/amdgpu_arch_fail %s 2>&1 \
-// RUN:   | FileCheck %s --check-prefix=NO-OUTPUT-ERROR
-// RUN:   not %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp --offload-arch= \
-// RUN:     --nvptx-arch-tool=%t/nvptx_arch_empty --amdgpu-arch-tool=%t/amdgpu_arch_empty %s 2>&1 \
-// RUN:   | FileCheck %s --check-prefix=NO-OUTPUT-ERROR
-// NO-OUTPUT-ERROR: error: failed to deduce triple for target architecture 'native'; specify the triple using '-fopenmp-targets' and '-Xopenmp-target' instead
+// NO-OUTPUT-ERROR: error: cannot determine openmp architecture
 
 // case when amdgpu-arch succeeds.
 // RUN:   %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp=libomp --offload-arch=native \
diff --git a/clang/test/Driver/opt-record.c b/clang/test/Driver/opt-record.c
index 220f5db..86d00d5 100644
--- a/clang/test/Driver/opt-record.c
+++ b/clang/test/Driver/opt-record.c
@@ -58,12 +58,12 @@
 // CHECK-NOPASS-NOT: "-plugin-opt=opt-remarks-format=yaml"
 // CHECK-NOPASS-NOT: "-plugin-opt=opt-remarks-hotness-threshold=100"
 
-// CHECK-PASS-A:      "-plugin-opt=opt-remarks-filename=a.out.opt.ld.yaml"
+// CHECK-PASS-A:      "-plugin-opt=opt-remarks-filename=a-opt.ld.yaml"
 // CHECK-PASS-A-SAME: "-plugin-opt=opt-remarks-passes=inline"
 // CHECK-PASS-A-SAME: "-plugin-opt=opt-remarks-format=yaml"
 // CHECK-PASS-A-SAME: "-plugin-opt=opt-remarks-hotness-threshold=100"
 
-// CHECK-PASS:      "-plugin-opt=opt-remarks-filename=FOO.opt.ld.yaml"
+// CHECK-PASS:      "-plugin-opt=opt-remarks-filename=FOO-opt.ld.yaml"
 // CHECK-PASS-SAME: "-plugin-opt=opt-remarks-passes=inline"
 // CHECK-PASS-SAME: "-plugin-opt=opt-remarks-format=yaml"
 // CHECK-PASS-SAME: "-plugin-opt=opt-remarks-hotness-threshold=100"
@@ -78,3 +78,17 @@
 // CHECK-PASS-RPASS-SAME: "-plugin-opt=opt-remarks-hotness-threshold=100"
 
 // CHECK-PASS-AUTO:   "-plugin-opt=opt-remarks-hotness-threshold=auto"
+
+// Check -dumpdir effect on -foptimization-record-file.
+//
+// DEFINE: %{RUN-DUMPDIR} = \
+// DEFINE:   %clang --target=x86_64-linux -### -fuse-ld=lld -B%S/Inputs/lld \
+// DEFINE:       -flto -fsave-optimization-record -dumpdir /dir/file.ext %s
+//
+// RUN: %{RUN-DUMPDIR} 2>&1 | FileCheck %s -check-prefix=CHECK-DUMPDIR
+// RUN: %{RUN-DUMPDIR} -o FOO 2>&1 | FileCheck %s -check-prefix=CHECK-DUMPDIR
+// RUN: %{RUN-DUMPDIR} -foptimization-record-file=user-file.ext 2>&1 | \
+// RUN:   FileCheck %s -check-prefix=CHECK-DUMPDIR-IGNORE
+//
+//        CHECK-DUMPDIR: "-plugin-opt=opt-remarks-filename=/dir/file.extopt.ld.yaml"
+// CHECK-DUMPDIR-IGNORE: "-plugin-opt=opt-remarks-filename=user-file.ext.opt.ld.yaml"
diff --git a/clang/test/Driver/print-multi-selection-flags.c b/clang/test/Driver/print-multi-selection-flags.c
index 8cf8f04bb..b1a0a29 100644
--- a/clang/test/Driver/print-multi-selection-flags.c
+++ b/clang/test/Driver/print-multi-selection-flags.c
@@ -126,3 +126,20 @@
 // CHECK-PIE1: -fpie
 // CHECK-ROPI: -fropi
 // CHECK-RWPI: -frwpi
+
+// RUN: %clang -multi-lib-config=%S/Inputs/multilib/empty.yaml -print-multi-flags-experimental --target=arm-none-eabi -march=armv7a -Os | FileCheck --check-prefix=CHECK-OPT-OS %s
+// RUN: %clang -multi-lib-config=%S/Inputs/multilib/empty.yaml -print-multi-flags-experimental --target=arm-none-eabi -march=armv7a -Oz | FileCheck --check-prefix=CHECK-OPT-OZ %s
+// RUN: %clang -multi-lib-config=%S/Inputs/multilib/empty.yaml -print-multi-flags-experimental --target=arm-none-eabi -march=armv7a     | FileCheck --check-prefix=CHECK-OPT %s
+// RUN: %clang -multi-lib-config=%S/Inputs/multilib/empty.yaml -print-multi-flags-experimental --target=arm-none-eabi -march=armv7a -O1 | FileCheck --check-prefix=CHECK-OPT %s
+// RUN: %clang -multi-lib-config=%S/Inputs/multilib/empty.yaml -print-multi-flags-experimental --target=arm-none-eabi -march=armv7a -O2 | FileCheck --check-prefix=CHECK-OPT %s
+// RUN: %clang -multi-lib-config=%S/Inputs/multilib/empty.yaml -print-multi-flags-experimental --target=arm-none-eabi -march=armv7a -O3 | FileCheck --check-prefix=CHECK-OPT %s
+// RUN: %clang -multi-lib-config=%S/Inputs/multilib/empty.yaml -print-multi-flags-experimental --target=aarch64-none-eabi -Os           | FileCheck --check-prefix=CHECK-OPT-OS %s
+// RUN: %clang -multi-lib-config=%S/Inputs/multilib/empty.yaml -print-multi-flags-experimental --target=aarch64-none-eabi -Oz           | FileCheck --check-prefix=CHECK-OPT-OZ %s
+// RUN: %clang -multi-lib-config=%S/Inputs/multilib/empty.yaml -print-multi-flags-experimental --target=aarch64-none-eabi               | FileCheck --check-prefix=CHECK-OPT %s
+// RUN: %clang -multi-lib-config=%S/Inputs/multilib/empty.yaml -print-multi-flags-experimental --target=aarch64-none-eabi -O1           | FileCheck --check-prefix=CHECK-OPT %s
+// RUN: %clang -multi-lib-config=%S/Inputs/multilib/empty.yaml -print-multi-flags-experimental --target=aarch64-none-eabi -O2           | FileCheck --check-prefix=CHECK-OPT %s
+// RUN: %clang -multi-lib-config=%S/Inputs/multilib/empty.yaml -print-multi-flags-experimental --target=aarch64-none-eabi -O3           | FileCheck --check-prefix=CHECK-OPT %s
+// CHECK-OPT-OZ: -Oz
+// CHECK-OPT-OS: -Os
+// CHECK-OPT-NOT: -Oz
+// CHECK-OPT-NOT: -Os
diff --git a/clang/test/Driver/sparc-target-features.c b/clang/test/Driver/sparc-target-features.c
index a839604..bd17da1 100644
--- a/clang/test/Driver/sparc-target-features.c
+++ b/clang/test/Driver/sparc-target-features.c
@@ -20,6 +20,11 @@
 
 // RUN: %clang --target=sparc -mvis2 %s -### 2>&1 | FileCheck -check-prefix=VIS2 %s
 // RUN: %clang --target=sparc -mno-vis2 %s -### 2>&1 | FileCheck -check-prefix=NO-VIS2 %s
+/// Solaris/SPARC defaults to -mvis2
+// RUN: %clang --target=sparc-sun-solaris2.11 %s -### 2>&1 | FileCheck -check-prefix=VIS2 %s
+// RUN: %clang --target=sparc-sun-solaris2.11 -mno-vis2 %s -### 2>&1 | FileCheck -check-prefix=NO-VIS2 %s
+// RUN: %clang --target=sparcv9-sun-solaris2.11 %s -### 2>&1 | FileCheck -check-prefix=VIS2 %s
+// RUN: %clang --target=sparcv9-sun-solaris2.11 -mno-vis2 %s -### 2>&1 | FileCheck -check-prefix=NO-VIS2 %s
 // VIS2: "-target-feature" "+vis2"
 // NO-VIS2: "-target-feature" "-vis2"
 
@@ -34,4 +39,8 @@
 // SOFT-QUAD-FLOAT: "-target-feature" "-hard-quad-float"
 
 // RUN: %clang --target=sparc -mv8plus %s -### 2>&1 | FileCheck -check-prefix=V8PLUS %s
+/// 32-bit Solaris/SPARC defaults to -mv8plus
+// RUN: %clang --target=sparc-sun-solaris2.11 %s -### 2>&1 | FileCheck -check-prefix=V8PLUS %s
+// RUN: %clang --target=sparc-sun-solaris2.11 -mno-v8plus %s -### 2>&1 | FileCheck -check-prefix=NO-V8PLUS %s
 // V8PLUS: "-target-feature" "+v8plus"
+// NO-V8PLUS-NOT: "-target-feature" "+v8plus"
diff --git a/clang/test/Driver/wasm-features.c b/clang/test/Driver/wasm-features.c
index 746bd7b..f0215ec 100644
--- a/clang/test/Driver/wasm-features.c
+++ b/clang/test/Driver/wasm-features.c
@@ -41,6 +41,12 @@
 // HALF-PRECISION: "-target-feature" "+fp16"
 // NO-HALF-PRECISION: "-target-feature" "-fp16"
 
+// RUN: %clang --target=wasm32-unknown-unknown -### %s -mgc 2>&1 | FileCheck %s -check-prefix=GC
+// RUN: %clang --target=wasm32-unknown-unknown -### %s -mno-gc 2>&1 | FileCheck %s -check-prefix=NO-GC
+
+// GC: "-target-feature" "+gc"
+// NO-GC: "-target-feature" "-gc"
+
 // RUN: %clang --target=wasm32-unknown-unknown -### %s -mmultimemory 2>&1 | FileCheck %s -check-prefix=MULTIMEMORY
 // RUN: %clang --target=wasm32-unknown-unknown -### %s -mno-multimemory 2>&1 | FileCheck %s -check-prefix=NO-MULTIMEMORY
 
diff --git a/clang/test/Frontend/dump-minimization-hints.cpp b/clang/test/Frontend/dump-minimization-hints.cpp
index 4843786..273fd7f 100644
--- a/clang/test/Frontend/dump-minimization-hints.cpp
+++ b/clang/test/Frontend/dump-minimization-hints.cpp
@@ -39,6 +39,26 @@
 // RANGE-NEXT:            "line": 15,
 // RANGE-NEXT:            "column": 2
 // RANGE-NEXT:          }
+// RANGE-NEXT:        },
+// RANGE-NEXT:        {
+// RANGE-NEXT:          "from": {
+// RANGE-NEXT:            "line": 19,
+// RANGE-NEXT:            "column": 1
+// RANGE-NEXT:          },
+// RANGE-NEXT:          "to": {
+// RANGE-NEXT:            "line": 19,
+// RANGE-NEXT:            "column": 41
+// RANGE-NEXT:          }
+// RANGE-NEXT:        },
+// RANGE-NEXT:        {
+// RANGE-NEXT:          "from": {
+// RANGE-NEXT:            "line": 20,
+// RANGE-NEXT:            "column": 1
+// RANGE-NEXT:          },
+// RANGE-NEXT:          "to": {
+// RANGE-NEXT:            "line": 23,
+// RANGE-NEXT:            "column": 2
+// RANGE-NEXT:          }
 // RANGE-NEXT:        }
 // RANGE-NEXT:      ]
 // RANGE-NEXT:    }
@@ -68,6 +88,16 @@ int multiply(int a, int b) {
     return a * b;
 }
 
+inline int unused_by_foo() {} // line 17
+
+inline void recursively_used_by_foo() {} // line 19
+inline int used_by_foo() { // line 20
+  recursively_used_by_foo();
+  return 1;
+}
+
+struct UnusedByFoo {};
+
 //--- foo.cpp
 #include "foo.h"
 int global_value = 5;
@@ -76,4 +106,6 @@ int main() {
   int current_value = data.getValue();
   int doubled_value = multiply(current_value, 2);
   int final_result = doubled_value + global_value;
+
+  return used_by_foo();
 }
diff --git a/clang/test/Headers/__clang_hip_math.hip b/clang/test/Headers/__clang_hip_math.hip
index 11c9cd3..81c5f43 100644
--- a/clang/test/Headers/__clang_hip_math.hip
+++ b/clang/test/Headers/__clang_hip_math.hip
@@ -49,30 +49,27 @@ typedef unsigned long long uint64_t;
 
 // CHECK-LABEL: @test___make_mantissa_base8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[WHILE_COND_I:%.*]]
-// CHECK:       while.cond.i:
-// CHECK-NEXT:    [[__TAGP_ADDR_0_I:%.*]] = phi ptr [ [[P:%.*]], [[ENTRY:%.*]] ], [ [[__TAGP_ADDR_1_I:%.*]], [[CLEANUP_I:%.*]] ]
-// CHECK-NEXT:    [[__R_0_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[__R_1_I:%.*]], [[CLEANUP_I]] ]
-// CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I]], align 1, !tbaa [[TBAA4:![0-9]+]]
-// CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP0]], 0
-// CHECK-NEXT:    br i1 [[CMP_NOT_I]], label [[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[P:%.*]], align 1, !tbaa [[TBAA4:![0-9]+]]
+// CHECK-NEXT:    [[CMP_NOT_I1:%.*]] = icmp eq i8 [[TMP0]], 0
+// CHECK-NEXT:    br i1 [[CMP_NOT_I1]], label [[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
 // CHECK:       while.body.i:
-// CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[TMP0]], -8
-// CHECK-NEXT:    [[OR_COND_I:%.*]] = icmp eq i8 [[TMP1]], 48
-// CHECK-NEXT:    br i1 [[OR_COND_I]], label [[IF_THEN_I:%.*]], label [[CLEANUP_I]]
+// CHECK-NEXT:    [[TMP1:%.*]] = phi i8 [ [[TMP3:%.*]], [[IF_THEN_I:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
+// CHECK-NEXT:    [[__R_0_I3:%.*]] = phi i64 [ [[SUB_I:%.*]], [[IF_THEN_I]] ], [ 0, [[ENTRY]] ]
+// CHECK-NEXT:    [[__TAGP_ADDR_0_I2:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[IF_THEN_I]] ], [ [[P]], [[ENTRY]] ]
+// CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], -8
+// CHECK-NEXT:    [[OR_COND_I:%.*]] = icmp eq i8 [[TMP2]], 48
+// CHECK-NEXT:    br i1 [[OR_COND_I]], label [[IF_THEN_I]], label [[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT]]
 // CHECK:       if.then.i:
-// CHECK-NEXT:    [[MUL_I:%.*]] = shl i64 [[__R_0_I]], 3
-// CHECK-NEXT:    [[CONV5_I:%.*]] = zext nneg i8 [[TMP0]] to i64
+// CHECK-NEXT:    [[MUL_I:%.*]] = shl i64 [[__R_0_I3]], 3
+// CHECK-NEXT:    [[CONV5_I:%.*]] = zext nneg i8 [[TMP1]] to i64
 // CHECK-NEXT:    [[ADD_I:%.*]] = add i64 [[MUL_I]], -48
-// CHECK-NEXT:    [[SUB_I:%.*]] = add i64 [[ADD_I]], [[CONV5_I]]
-// CHECK-NEXT:    [[INCDEC_PTR_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I]], i64 1
-// CHECK-NEXT:    br label [[CLEANUP_I]]
-// CHECK:       cleanup.i:
-// CHECK-NEXT:    [[__TAGP_ADDR_1_I]] = phi ptr [ [[INCDEC_PTR_I]], [[IF_THEN_I]] ], [ [[__TAGP_ADDR_0_I]], [[WHILE_BODY_I]] ]
-// CHECK-NEXT:    [[__R_1_I]] = phi i64 [ [[SUB_I]], [[IF_THEN_I]] ], [ [[__R_0_I]], [[WHILE_BODY_I]] ]
-// CHECK-NEXT:    br i1 [[OR_COND_I]], label [[WHILE_COND_I]], label [[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT]], !llvm.loop [[LOOP7:![0-9]+]]
+// CHECK-NEXT:    [[SUB_I]] = add i64 [[ADD_I]], [[CONV5_I]]
+// CHECK-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I2]], i64 1
+// CHECK-NEXT:    [[TMP3]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP3]], 0
+// CHECK-NEXT:    br i1 [[CMP_NOT_I]], label [[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP7:![0-9]+]]
 // CHECK:       _ZL21__make_mantissa_base8PKc.exit:
-// CHECK-NEXT:    [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[CLEANUP_I]] ], [ [[__R_0_I]], [[WHILE_COND_I]] ]
+// CHECK-NEXT:    [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ 0, [[WHILE_BODY_I]] ], [ [[SUB_I]], [[IF_THEN_I]] ]
 // CHECK-NEXT:    ret i64 [[RETVAL_2_I]]
 //
 // AMDGCNSPIRV-LABEL: @test___make_mantissa_base8(
@@ -105,30 +102,27 @@ extern "C" __device__ uint64_t test___make_mantissa_base8(const char *p) {
 
 // CHECK-LABEL: @test___make_mantissa_base10(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[WHILE_COND_I:%.*]]
-// CHECK:       while.cond.i:
-// CHECK-NEXT:    [[__TAGP_ADDR_0_I:%.*]] = phi ptr [ [[P:%.*]], [[ENTRY:%.*]] ], [ [[__TAGP_ADDR_1_I:%.*]], [[CLEANUP_I:%.*]] ]
-// CHECK-NEXT:    [[__R_0_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[__R_1_I:%.*]], [[CLEANUP_I]] ]
-// CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I]], align 1, !tbaa [[TBAA4]]
-// CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP0]], 0
-// CHECK-NEXT:    br i1 [[CMP_NOT_I]], label [[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[P:%.*]], align 1, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[CMP_NOT_I1:%.*]] = icmp eq i8 [[TMP0]], 0
+// CHECK-NEXT:    br i1 [[CMP_NOT_I1]], label [[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
 // CHECK:       while.body.i:
-// CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[TMP0]], -48
-// CHECK-NEXT:    [[OR_COND_I:%.*]] = icmp ult i8 [[TMP1]], 10
-// CHECK-NEXT:    br i1 [[OR_COND_I]], label [[IF_THEN_I:%.*]], label [[CLEANUP_I]]
+// CHECK-NEXT:    [[TMP1:%.*]] = phi i8 [ [[TMP3:%.*]], [[IF_THEN_I:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
+// CHECK-NEXT:    [[__R_0_I3:%.*]] = phi i64 [ [[SUB_I:%.*]], [[IF_THEN_I]] ], [ 0, [[ENTRY]] ]
+// CHECK-NEXT:    [[__TAGP_ADDR_0_I2:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[IF_THEN_I]] ], [ [[P]], [[ENTRY]] ]
+// CHECK-NEXT:    [[TMP2:%.*]] = add i8 [[TMP1]], -48
+// CHECK-NEXT:    [[OR_COND_I:%.*]] = icmp ult i8 [[TMP2]], 10
+// CHECK-NEXT:    br i1 [[OR_COND_I]], label [[IF_THEN_I]], label [[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT]]
 // CHECK:       if.then.i:
-// CHECK-NEXT:    [[MUL_I:%.*]] = mul i64 [[__R_0_I]], 10
-// CHECK-NEXT:    [[CONV5_I:%.*]] = zext nneg i8 [[TMP0]] to i64
+// CHECK-NEXT:    [[MUL_I:%.*]] = mul i64 [[__R_0_I3]], 10
+// CHECK-NEXT:    [[CONV5_I:%.*]] = zext nneg i8 [[TMP1]] to i64
 // CHECK-NEXT:    [[ADD_I:%.*]] = add i64 [[MUL_I]], -48
-// CHECK-NEXT:    [[SUB_I:%.*]] = add i64 [[ADD_I]], [[CONV5_I]]
-// CHECK-NEXT:    [[INCDEC_PTR_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I]], i64 1
-// CHECK-NEXT:    br label [[CLEANUP_I]]
-// CHECK:       cleanup.i:
-// CHECK-NEXT:    [[__TAGP_ADDR_1_I]] = phi ptr [ [[INCDEC_PTR_I]], [[IF_THEN_I]] ], [ [[__TAGP_ADDR_0_I]], [[WHILE_BODY_I]] ]
-// CHECK-NEXT:    [[__R_1_I]] = phi i64 [ [[SUB_I]], [[IF_THEN_I]] ], [ [[__R_0_I]], [[WHILE_BODY_I]] ]
-// CHECK-NEXT:    br i1 [[OR_COND_I]], label [[WHILE_COND_I]], label [[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT]], !llvm.loop [[LOOP10:![0-9]+]]
+// CHECK-NEXT:    [[SUB_I]] = add i64 [[ADD_I]], [[CONV5_I]]
+// CHECK-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I2]], i64 1
+// CHECK-NEXT:    [[TMP3]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP3]], 0
+// CHECK-NEXT:    br i1 [[CMP_NOT_I]], label [[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP10:![0-9]+]]
 // CHECK:       _ZL22__make_mantissa_base10PKc.exit:
-// CHECK-NEXT:    [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[CLEANUP_I]] ], [ [[__R_0_I]], [[WHILE_COND_I]] ]
+// CHECK-NEXT:    [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ 0, [[WHILE_BODY_I]] ], [ [[SUB_I]], [[IF_THEN_I]] ]
 // CHECK-NEXT:    ret i64 [[RETVAL_2_I]]
 //
 // AMDGCNSPIRV-LABEL: @test___make_mantissa_base10(
@@ -161,78 +155,70 @@ extern "C" __device__ uint64_t test___make_mantissa_base10(const char *p) {
 
 // CHECK-LABEL: @test___make_mantissa_base16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[WHILE_COND_I:%.*]]
-// CHECK:       while.cond.i:
-// CHECK-NEXT:    [[__TAGP_ADDR_0_I:%.*]] = phi ptr [ [[P:%.*]], [[ENTRY:%.*]] ], [ [[__TAGP_ADDR_1_I:%.*]], [[CLEANUP_I:%.*]] ]
-// CHECK-NEXT:    [[__R_0_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[__R_2_I:%.*]], [[CLEANUP_I]] ]
-// CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I]], align 1, !tbaa [[TBAA4]]
-// CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP0]], 0
-// CHECK-NEXT:    br i1 [[CMP_NOT_I]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[P:%.*]], align 1, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[CMP_NOT_I1:%.*]] = icmp eq i8 [[TMP0]], 0
+// CHECK-NEXT:    br i1 [[CMP_NOT_I1]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
 // CHECK:       while.body.i:
-// CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[TMP0]], -48
-// CHECK-NEXT:    [[OR_COND_I:%.*]] = icmp ult i8 [[TMP1]], 10
-// CHECK-NEXT:    br i1 [[OR_COND_I]], label [[IF_END31_I:%.*]], label [[IF_ELSE_I:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = phi i8 [ [[TMP5:%.*]], [[IF_END31_I:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
+// CHECK-NEXT:    [[__R_0_I3:%.*]] = phi i64 [ [[ADD28_I:%.*]], [[IF_END31_I]] ], [ 0, [[ENTRY]] ]
+// CHECK-NEXT:    [[__TAGP_ADDR_0_I2:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[IF_END31_I]] ], [ [[P]], [[ENTRY]] ]
+// CHECK-NEXT:    [[TMP2:%.*]] = add i8 [[TMP1]], -48
+// CHECK-NEXT:    [[OR_COND_I:%.*]] = icmp ult i8 [[TMP2]], 10
+// CHECK-NEXT:    br i1 [[OR_COND_I]], label [[IF_END31_I]], label [[IF_ELSE_I:%.*]]
 // CHECK:       if.else.i:
-// CHECK-NEXT:    [[TMP2:%.*]] = add i8 [[TMP0]], -97
-// CHECK-NEXT:    [[OR_COND33_I:%.*]] = icmp ult i8 [[TMP2]], 6
+// CHECK-NEXT:    [[TMP3:%.*]] = add i8 [[TMP1]], -97
+// CHECK-NEXT:    [[OR_COND33_I:%.*]] = icmp ult i8 [[TMP3]], 6
 // CHECK-NEXT:    br i1 [[OR_COND33_I]], label [[IF_END31_I]], label [[IF_ELSE17_I:%.*]]
 // CHECK:       if.else17.i:
-// CHECK-NEXT:    [[TMP3:%.*]] = add i8 [[TMP0]], -65
-// CHECK-NEXT:    [[OR_COND34_I:%.*]] = icmp ult i8 [[TMP3]], 6
-// CHECK-NEXT:    br i1 [[OR_COND34_I]], label [[IF_END31_I]], label [[CLEANUP_I]]
+// CHECK-NEXT:    [[TMP4:%.*]] = add i8 [[TMP1]], -65
+// CHECK-NEXT:    [[OR_COND34_I:%.*]] = icmp ult i8 [[TMP4]], 6
+// CHECK-NEXT:    br i1 [[OR_COND34_I]], label [[IF_END31_I]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]]
 // CHECK:       if.end31.i:
 // CHECK-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I]] ], [ -87, [[IF_ELSE_I]] ], [ -55, [[IF_ELSE17_I]] ]
-// CHECK-NEXT:    [[MUL24_I:%.*]] = shl i64 [[__R_0_I]], 4
-// CHECK-NEXT:    [[CONV25_I:%.*]] = zext nneg i8 [[TMP0]] to i64
+// CHECK-NEXT:    [[MUL24_I:%.*]] = shl i64 [[__R_0_I3]], 4
+// CHECK-NEXT:    [[CONV25_I:%.*]] = zext nneg i8 [[TMP1]] to i64
 // CHECK-NEXT:    [[ADD26_I:%.*]] = add i64 [[MUL24_I]], [[DOTSINK]]
-// CHECK-NEXT:    [[ADD28_I:%.*]] = add i64 [[ADD26_I]], [[CONV25_I]]
-// CHECK-NEXT:    [[INCDEC_PTR_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I]], i64 1
-// CHECK-NEXT:    br label [[CLEANUP_I]]
-// CHECK:       cleanup.i:
-// CHECK-NEXT:    [[__TAGP_ADDR_1_I]] = phi ptr [ [[INCDEC_PTR_I]], [[IF_END31_I]] ], [ [[__TAGP_ADDR_0_I]], [[IF_ELSE17_I]] ]
-// CHECK-NEXT:    [[__R_2_I]] = phi i64 [ [[ADD28_I]], [[IF_END31_I]] ], [ [[__R_0_I]], [[IF_ELSE17_I]] ]
-// CHECK-NEXT:    [[COND_I:%.*]] = phi i1 [ true, [[IF_END31_I]] ], [ false, [[IF_ELSE17_I]] ]
-// CHECK-NEXT:    br i1 [[COND_I]], label [[WHILE_COND_I]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]], !llvm.loop [[LOOP11:![0-9]+]]
+// CHECK-NEXT:    [[ADD28_I]] = add i64 [[ADD26_I]], [[CONV25_I]]
+// CHECK-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I2]], i64 1
+// CHECK-NEXT:    [[TMP5]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP5]], 0
+// CHECK-NEXT:    br i1 [[CMP_NOT_I]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP11:![0-9]+]]
 // CHECK:       _ZL22__make_mantissa_base16PKc.exit:
-// CHECK-NEXT:    [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[CLEANUP_I]] ], [ [[__R_0_I]], [[WHILE_COND_I]] ]
+// CHECK-NEXT:    [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ 0, [[IF_ELSE17_I]] ], [ [[ADD28_I]], [[IF_END31_I]] ]
 // CHECK-NEXT:    ret i64 [[RETVAL_2_I]]
 //
 // AMDGCNSPIRV-LABEL: @test___make_mantissa_base16(
 // AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    br label [[WHILE_COND_I:%.*]]
-// AMDGCNSPIRV:       while.cond.i:
-// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I:%.*]] = phi ptr addrspace(4) [ [[P:%.*]], [[ENTRY:%.*]] ], [ [[__TAGP_ADDR_1_I:%.*]], [[CLEANUP_I:%.*]] ]
-// AMDGCNSPIRV-NEXT:    [[__R_0_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[__R_2_I:%.*]], [[CLEANUP_I]] ]
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], align 1, !tbaa [[TBAA5]]
-// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP0]], 0
-// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load i8, ptr addrspace(4) [[P:%.*]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I1:%.*]] = icmp eq i8 [[TMP0]], 0
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I1]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
 // AMDGCNSPIRV:       while.body.i:
-// AMDGCNSPIRV-NEXT:    [[TMP1:%.*]] = add i8 [[TMP0]], -48
-// AMDGCNSPIRV-NEXT:    [[OR_COND_I:%.*]] = icmp ult i8 [[TMP1]], 10
-// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I]], label [[IF_END31_I:%.*]], label [[IF_ELSE_I:%.*]]
+// AMDGCNSPIRV-NEXT:    [[TMP1:%.*]] = phi i8 [ [[TMP5:%.*]], [[IF_END31_I:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
+// AMDGCNSPIRV-NEXT:    [[__R_0_I3:%.*]] = phi i64 [ [[ADD28_I:%.*]], [[IF_END31_I]] ], [ 0, [[ENTRY]] ]
+// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I2:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I:%.*]], [[IF_END31_I]] ], [ [[P]], [[ENTRY]] ]
+// AMDGCNSPIRV-NEXT:    [[TMP2:%.*]] = add i8 [[TMP1]], -48
+// AMDGCNSPIRV-NEXT:    [[OR_COND_I:%.*]] = icmp ult i8 [[TMP2]], 10
+// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I]], label [[IF_END31_I]], label [[IF_ELSE_I:%.*]]
 // AMDGCNSPIRV:       if.else.i:
-// AMDGCNSPIRV-NEXT:    [[TMP2:%.*]] = add i8 [[TMP0]], -97
-// AMDGCNSPIRV-NEXT:    [[OR_COND33_I:%.*]] = icmp ult i8 [[TMP2]], 6
+// AMDGCNSPIRV-NEXT:    [[TMP3:%.*]] = add i8 [[TMP1]], -97
+// AMDGCNSPIRV-NEXT:    [[OR_COND33_I:%.*]] = icmp ult i8 [[TMP3]], 6
 // AMDGCNSPIRV-NEXT:    br i1 [[OR_COND33_I]], label [[IF_END31_I]], label [[IF_ELSE17_I:%.*]]
 // AMDGCNSPIRV:       if.else17.i:
-// AMDGCNSPIRV-NEXT:    [[TMP3:%.*]] = add i8 [[TMP0]], -65
-// AMDGCNSPIRV-NEXT:    [[OR_COND34_I:%.*]] = icmp ult i8 [[TMP3]], 6
-// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND34_I]], label [[IF_END31_I]], label [[CLEANUP_I]]
+// AMDGCNSPIRV-NEXT:    [[TMP4:%.*]] = add i8 [[TMP1]], -65
+// AMDGCNSPIRV-NEXT:    [[OR_COND34_I:%.*]] = icmp ult i8 [[TMP4]], 6
+// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND34_I]], label [[IF_END31_I]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]]
 // AMDGCNSPIRV:       if.end31.i:
 // AMDGCNSPIRV-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I]] ], [ -87, [[IF_ELSE_I]] ], [ -55, [[IF_ELSE17_I]] ]
-// AMDGCNSPIRV-NEXT:    [[MUL24_I:%.*]] = shl i64 [[__R_0_I]], 4
-// AMDGCNSPIRV-NEXT:    [[CONV25_I:%.*]] = zext nneg i8 [[TMP0]] to i64
+// AMDGCNSPIRV-NEXT:    [[MUL24_I:%.*]] = shl i64 [[__R_0_I3]], 4
+// AMDGCNSPIRV-NEXT:    [[CONV25_I:%.*]] = zext nneg i8 [[TMP1]] to i64
 // AMDGCNSPIRV-NEXT:    [[ADD26_I:%.*]] = add i64 [[MUL24_I]], [[DOTSINK]]
-// AMDGCNSPIRV-NEXT:    [[ADD28_I:%.*]] = add i64 [[ADD26_I]], [[CONV25_I]]
-// AMDGCNSPIRV-NEXT:    [[INCDEC_PTR_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], i64 1
-// AMDGCNSPIRV-NEXT:    br label [[CLEANUP_I]]
-// AMDGCNSPIRV:       cleanup.i:
-// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_1_I]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I]], [[IF_END31_I]] ], [ [[__TAGP_ADDR_0_I]], [[IF_ELSE17_I]] ]
-// AMDGCNSPIRV-NEXT:    [[__R_2_I]] = phi i64 [ [[ADD28_I]], [[IF_END31_I]] ], [ [[__R_0_I]], [[IF_ELSE17_I]] ]
-// AMDGCNSPIRV-NEXT:    [[COND_I:%.*]] = phi i1 [ true, [[IF_END31_I]] ], [ false, [[IF_ELSE17_I]] ]
-// AMDGCNSPIRV-NEXT:    br i1 [[COND_I]], label [[WHILE_COND_I]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]], !llvm.loop [[LOOP12:![0-9]+]]
+// AMDGCNSPIRV-NEXT:    [[ADD28_I]] = add i64 [[ADD26_I]], [[CONV25_I]]
+// AMDGCNSPIRV-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I2]], i64 1
+// AMDGCNSPIRV-NEXT:    [[TMP5]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP5]], 0
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP12:![0-9]+]]
 // AMDGCNSPIRV:       _ZL22__make_mantissa_base16PKc.exit:
-// AMDGCNSPIRV-NEXT:    [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[CLEANUP_I]] ], [ [[__R_0_I]], [[WHILE_COND_I]] ]
+// AMDGCNSPIRV-NEXT:    [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ 0, [[IF_ELSE17_I]] ], [ [[ADD28_I]], [[IF_END31_I]] ]
 // AMDGCNSPIRV-NEXT:    ret i64 [[RETVAL_2_I]]
 //
 extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) {
@@ -243,91 +229,85 @@ extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[P:%.*]], align 1, !tbaa [[TBAA4]]
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp eq i8 [[TMP0]], 48
-// CHECK-NEXT:    br i1 [[CMP_I]], label [[IF_THEN_I:%.*]], label [[WHILE_COND_I14_I:%.*]]
+// CHECK-NEXT:    br i1 [[CMP_I]], label [[IF_THEN_I:%.*]], label [[WHILE_COND_I14_I_PREHEADER:%.*]]
+// CHECK:       while.cond.i14.i.preheader:
+// CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[P]], align 1, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[CMP_NOT_I17_I5:%.*]] = icmp eq i8 [[TMP1]], 0
+// CHECK-NEXT:    br i1 [[CMP_NOT_I17_I5]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT:%.*]], label [[WHILE_BODY_I18_I:%.*]]
 // CHECK:       if.then.i:
 // CHECK-NEXT:    [[INCDEC_PTR_I:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 1
-// CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA4]]
-// CHECK-NEXT:    switch i8 [[TMP1]], label [[WHILE_COND_I_I:%.*]] [
-// CHECK-NEXT:      i8 120, label [[WHILE_COND_I30_I_PREHEADER:%.*]]
-// CHECK-NEXT:      i8 88, label [[WHILE_COND_I30_I_PREHEADER]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA4]]
+// CHECK-NEXT:    switch i8 [[TMP2]], label [[WHILE_COND_I_I_PREHEADER:%.*]] [
+// CHECK-NEXT:      i8 120, label [[IF_THEN5_I:%.*]]
+// CHECK-NEXT:      i8 88, label [[IF_THEN5_I]]
 // CHECK-NEXT:    ]
-// CHECK:       while.cond.i30.i.preheader:
-// CHECK-NEXT:    br label [[WHILE_COND_I30_I:%.*]]
-// CHECK:       while.cond.i30.i:
-// CHECK-NEXT:    [[__TAGP_ADDR_0_I31_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I37_I:%.*]], [[CLEANUP_I36_I:%.*]] ], [ [[INCDEC_PTR_I]], [[WHILE_COND_I30_I_PREHEADER]] ]
-// CHECK-NEXT:    [[__R_0_I32_I:%.*]] = phi i64 [ [[__R_2_I_I:%.*]], [[CLEANUP_I36_I]] ], [ 0, [[WHILE_COND_I30_I_PREHEADER]] ]
-// CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I31_I]], align 1, !tbaa [[TBAA4]]
-// CHECK-NEXT:    [[CMP_NOT_I33_I:%.*]] = icmp eq i8 [[TMP2]], 0
-// CHECK-NEXT:    br i1 [[CMP_NOT_I33_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT:%.*]], label [[WHILE_BODY_I34_I:%.*]]
-// CHECK:       while.body.i34.i:
-// CHECK-NEXT:    [[TMP3:%.*]] = add i8 [[TMP2]], -48
-// CHECK-NEXT:    [[OR_COND_I35_I:%.*]] = icmp ult i8 [[TMP3]], 10
-// CHECK-NEXT:    br i1 [[OR_COND_I35_I]], label [[IF_END31_I_I:%.*]], label [[IF_ELSE_I_I:%.*]]
+// CHECK:       while.cond.i.i.preheader:
+// CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[CMP_NOT_I_I14:%.*]] = icmp eq i8 [[TMP3]], 0
+// CHECK-NEXT:    br i1 [[CMP_NOT_I_I14]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I_I:%.*]]
+// CHECK:       if.then5.i:
+// CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[CMP_NOT_I30_I9:%.*]] = icmp eq i8 [[TMP4]], 0
+// CHECK-NEXT:    br i1 [[CMP_NOT_I30_I9]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I31_I:%.*]]
+// CHECK:       while.body.i31.i:
+// CHECK-NEXT:    [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], [[IF_END31_I_I:%.*]] ], [ [[TMP4]], [[IF_THEN5_I]] ]
+// CHECK-NEXT:    [[__R_0_I29_I11:%.*]] = phi i64 [ [[ADD28_I_I:%.*]], [[IF_END31_I_I]] ], [ 0, [[IF_THEN5_I]] ]
+// CHECK-NEXT:    [[__TAGP_ADDR_0_I28_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I:%.*]], [[IF_END31_I_I]] ], [ [[INCDEC_PTR_I]], [[IF_THEN5_I]] ]
+// CHECK-NEXT:    [[TMP6:%.*]] = add i8 [[TMP5]], -48
+// CHECK-NEXT:    [[OR_COND_I32_I:%.*]] = icmp ult i8 [[TMP6]], 10
+// CHECK-NEXT:    br i1 [[OR_COND_I32_I]], label [[IF_END31_I_I]], label [[IF_ELSE_I_I:%.*]]
 // CHECK:       if.else.i.i:
-// CHECK-NEXT:    [[TMP4:%.*]] = add i8 [[TMP2]], -97
-// CHECK-NEXT:    [[OR_COND33_I_I:%.*]] = icmp ult i8 [[TMP4]], 6
+// CHECK-NEXT:    [[TMP7:%.*]] = add i8 [[TMP5]], -97
+// CHECK-NEXT:    [[OR_COND33_I_I:%.*]] = icmp ult i8 [[TMP7]], 6
 // CHECK-NEXT:    br i1 [[OR_COND33_I_I]], label [[IF_END31_I_I]], label [[IF_ELSE17_I_I:%.*]]
 // CHECK:       if.else17.i.i:
-// CHECK-NEXT:    [[TMP5:%.*]] = add i8 [[TMP2]], -65
-// CHECK-NEXT:    [[OR_COND34_I_I:%.*]] = icmp ult i8 [[TMP5]], 6
-// CHECK-NEXT:    br i1 [[OR_COND34_I_I]], label [[IF_END31_I_I]], label [[CLEANUP_I36_I]]
+// CHECK-NEXT:    [[TMP8:%.*]] = add i8 [[TMP5]], -65
+// CHECK-NEXT:    [[OR_COND34_I_I:%.*]] = icmp ult i8 [[TMP8]], 6
+// CHECK-NEXT:    br i1 [[OR_COND34_I_I]], label [[IF_END31_I_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]]
 // CHECK:       if.end31.i.i:
-// CHECK-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I34_I]] ], [ -87, [[IF_ELSE_I_I]] ], [ -55, [[IF_ELSE17_I_I]] ]
-// CHECK-NEXT:    [[MUL24_I_I:%.*]] = shl i64 [[__R_0_I32_I]], 4
-// CHECK-NEXT:    [[CONV25_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64
+// CHECK-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I31_I]] ], [ -87, [[IF_ELSE_I_I]] ], [ -55, [[IF_ELSE17_I_I]] ]
+// CHECK-NEXT:    [[MUL24_I_I:%.*]] = shl i64 [[__R_0_I29_I11]], 4
+// CHECK-NEXT:    [[CONV25_I_I:%.*]] = zext nneg i8 [[TMP5]] to i64
 // CHECK-NEXT:    [[ADD26_I_I:%.*]] = add i64 [[MUL24_I_I]], [[DOTSINK]]
-// CHECK-NEXT:    [[ADD28_I_I:%.*]] = add i64 [[ADD26_I_I]], [[CONV25_I_I]]
-// CHECK-NEXT:    [[INCDEC_PTR_I40_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I31_I]], i64 1
-// CHECK-NEXT:    br label [[CLEANUP_I36_I]]
-// CHECK:       cleanup.i36.i:
-// CHECK-NEXT:    [[__TAGP_ADDR_1_I37_I]] = phi ptr [ [[INCDEC_PTR_I40_I]], [[IF_END31_I_I]] ], [ [[__TAGP_ADDR_0_I31_I]], [[IF_ELSE17_I_I]] ]
-// CHECK-NEXT:    [[__R_2_I_I]] = phi i64 [ [[ADD28_I_I]], [[IF_END31_I_I]] ], [ [[__R_0_I32_I]], [[IF_ELSE17_I_I]] ]
-// CHECK-NEXT:    [[COND_I_I:%.*]] = phi i1 [ true, [[IF_END31_I_I]] ], [ false, [[IF_ELSE17_I_I]] ]
-// CHECK-NEXT:    br i1 [[COND_I_I]], label [[WHILE_COND_I30_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], !llvm.loop [[LOOP11]]
-// CHECK:       while.cond.i.i:
-// CHECK-NEXT:    [[__TAGP_ADDR_0_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I_I:%.*]], [[CLEANUP_I_I:%.*]] ], [ [[INCDEC_PTR_I]], [[IF_THEN_I]] ]
-// CHECK-NEXT:    [[__R_0_I_I:%.*]] = phi i64 [ [[__R_1_I_I:%.*]], [[CLEANUP_I_I]] ], [ 0, [[IF_THEN_I]] ]
-// CHECK-NEXT:    [[TMP6:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I_I]], align 1, !tbaa [[TBAA4]]
-// CHECK-NEXT:    [[CMP_NOT_I_I:%.*]] = icmp eq i8 [[TMP6]], 0
-// CHECK-NEXT:    br i1 [[CMP_NOT_I_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I_I:%.*]]
+// CHECK-NEXT:    [[ADD28_I_I]] = add i64 [[ADD26_I_I]], [[CONV25_I_I]]
+// CHECK-NEXT:    [[INCDEC_PTR_I34_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I28_I10]], i64 1
+// CHECK-NEXT:    [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I]], align 1, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[CMP_NOT_I30_I:%.*]] = icmp eq i8 [[TMP9]], 0
+// CHECK-NEXT:    br i1 [[CMP_NOT_I30_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I31_I]], !llvm.loop [[LOOP11]]
 // CHECK:       while.body.i.i:
-// CHECK-NEXT:    [[TMP7:%.*]] = and i8 [[TMP6]], -8
-// CHECK-NEXT:    [[OR_COND_I_I:%.*]] = icmp eq i8 [[TMP7]], 48
-// CHECK-NEXT:    br i1 [[OR_COND_I_I]], label [[IF_THEN_I_I:%.*]], label [[CLEANUP_I_I]]
+// CHECK-NEXT:    [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], [[IF_THEN_I_I:%.*]] ], [ [[TMP3]], [[WHILE_COND_I_I_PREHEADER]] ]
+// CHECK-NEXT:    [[__R_0_I_I16:%.*]] = phi i64 [ [[SUB_I_I:%.*]], [[IF_THEN_I_I]] ], [ 0, [[WHILE_COND_I_I_PREHEADER]] ]
+// CHECK-NEXT:    [[__TAGP_ADDR_0_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I:%.*]], [[IF_THEN_I_I]] ], [ [[INCDEC_PTR_I]], [[WHILE_COND_I_I_PREHEADER]] ]
+// CHECK-NEXT:    [[TMP11:%.*]] = and i8 [[TMP10]], -8
+// CHECK-NEXT:    [[OR_COND_I_I:%.*]] = icmp eq i8 [[TMP11]], 48
+// CHECK-NEXT:    br i1 [[OR_COND_I_I]], label [[IF_THEN_I_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]]
 // CHECK:       if.then.i.i:
-// CHECK-NEXT:    [[MUL_I_I:%.*]] = shl i64 [[__R_0_I_I]], 3
-// CHECK-NEXT:    [[CONV5_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64
+// CHECK-NEXT:    [[MUL_I_I:%.*]] = shl i64 [[__R_0_I_I16]], 3
+// CHECK-NEXT:    [[CONV5_I_I:%.*]] = zext nneg i8 [[TMP10]] to i64
 // CHECK-NEXT:    [[ADD_I_I:%.*]] = add i64 [[MUL_I_I]], -48
-// CHECK-NEXT:    [[SUB_I_I:%.*]] = add i64 [[ADD_I_I]], [[CONV5_I_I]]
-// CHECK-NEXT:    [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I]], i64 1
-// CHECK-NEXT:    br label [[CLEANUP_I_I]]
-// CHECK:       cleanup.i.i:
-// CHECK-NEXT:    [[__TAGP_ADDR_1_I_I]] = phi ptr [ [[INCDEC_PTR_I_I]], [[IF_THEN_I_I]] ], [ [[__TAGP_ADDR_0_I_I]], [[WHILE_BODY_I_I]] ]
-// CHECK-NEXT:    [[__R_1_I_I]] = phi i64 [ [[SUB_I_I]], [[IF_THEN_I_I]] ], [ [[__R_0_I_I]], [[WHILE_BODY_I_I]] ]
-// CHECK-NEXT:    br i1 [[OR_COND_I_I]], label [[WHILE_COND_I_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], !llvm.loop [[LOOP7]]
-// CHECK:       while.cond.i14.i:
-// CHECK-NEXT:    [[__TAGP_ADDR_0_I15_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I21_I:%.*]], [[CLEANUP_I20_I:%.*]] ], [ [[P]], [[ENTRY:%.*]] ]
-// CHECK-NEXT:    [[__R_0_I16_I:%.*]] = phi i64 [ [[__R_1_I22_I:%.*]], [[CLEANUP_I20_I]] ], [ 0, [[ENTRY]] ]
-// CHECK-NEXT:    [[TMP8:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I15_I]], align 1, !tbaa [[TBAA4]]
-// CHECK-NEXT:    [[CMP_NOT_I17_I:%.*]] = icmp eq i8 [[TMP8]], 0
-// CHECK-NEXT:    br i1 [[CMP_NOT_I17_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I18_I:%.*]]
+// CHECK-NEXT:    [[SUB_I_I]] = add i64 [[ADD_I_I]], [[CONV5_I_I]]
+// CHECK-NEXT:    [[INCDEC_PTR_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I15]], i64 1
+// CHECK-NEXT:    [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[CMP_NOT_I_I:%.*]] = icmp eq i8 [[TMP12]], 0
+// CHECK-NEXT:    br i1 [[CMP_NOT_I_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I_I]], !llvm.loop [[LOOP7]]
 // CHECK:       while.body.i18.i:
-// CHECK-NEXT:    [[TMP9:%.*]] = add i8 [[TMP8]], -48
-// CHECK-NEXT:    [[OR_COND_I19_I:%.*]] = icmp ult i8 [[TMP9]], 10
-// CHECK-NEXT:    br i1 [[OR_COND_I19_I]], label [[IF_THEN_I24_I:%.*]], label [[CLEANUP_I20_I]]
-// CHECK:       if.then.i24.i:
-// CHECK-NEXT:    [[MUL_I25_I:%.*]] = mul i64 [[__R_0_I16_I]], 10
-// CHECK-NEXT:    [[CONV5_I26_I:%.*]] = zext nneg i8 [[TMP8]] to i64
-// CHECK-NEXT:    [[ADD_I27_I:%.*]] = add i64 [[MUL_I25_I]], -48
-// CHECK-NEXT:    [[SUB_I28_I:%.*]] = add i64 [[ADD_I27_I]], [[CONV5_I26_I]]
-// CHECK-NEXT:    [[INCDEC_PTR_I29_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I]], i64 1
-// CHECK-NEXT:    br label [[CLEANUP_I20_I]]
-// CHECK:       cleanup.i20.i:
-// CHECK-NEXT:    [[__TAGP_ADDR_1_I21_I]] = phi ptr [ [[INCDEC_PTR_I29_I]], [[IF_THEN_I24_I]] ], [ [[__TAGP_ADDR_0_I15_I]], [[WHILE_BODY_I18_I]] ]
-// CHECK-NEXT:    [[__R_1_I22_I]] = phi i64 [ [[SUB_I28_I]], [[IF_THEN_I24_I]] ], [ [[__R_0_I16_I]], [[WHILE_BODY_I18_I]] ]
-// CHECK-NEXT:    br i1 [[OR_COND_I19_I]], label [[WHILE_COND_I14_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], !llvm.loop [[LOOP10]]
+// CHECK-NEXT:    [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], [[IF_THEN_I21_I:%.*]] ], [ [[TMP1]], [[WHILE_COND_I14_I_PREHEADER]] ]
+// CHECK-NEXT:    [[__R_0_I16_I7:%.*]] = phi i64 [ [[SUB_I25_I:%.*]], [[IF_THEN_I21_I]] ], [ 0, [[WHILE_COND_I14_I_PREHEADER]] ]
+// CHECK-NEXT:    [[__TAGP_ADDR_0_I15_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I:%.*]], [[IF_THEN_I21_I]] ], [ [[P]], [[WHILE_COND_I14_I_PREHEADER]] ]
+// CHECK-NEXT:    [[TMP14:%.*]] = add i8 [[TMP13]], -48
+// CHECK-NEXT:    [[OR_COND_I19_I:%.*]] = icmp ult i8 [[TMP14]], 10
+// CHECK-NEXT:    br i1 [[OR_COND_I19_I]], label [[IF_THEN_I21_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]]
+// CHECK:       if.then.i21.i:
+// CHECK-NEXT:    [[MUL_I22_I:%.*]] = mul i64 [[__R_0_I16_I7]], 10
+// CHECK-NEXT:    [[CONV5_I23_I:%.*]] = zext nneg i8 [[TMP13]] to i64
+// CHECK-NEXT:    [[ADD_I24_I:%.*]] = add i64 [[MUL_I22_I]], -48
+// CHECK-NEXT:    [[SUB_I25_I]] = add i64 [[ADD_I24_I]], [[CONV5_I23_I]]
+// CHECK-NEXT:    [[INCDEC_PTR_I26_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I6]], i64 1
+// CHECK-NEXT:    [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I]], align 1, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[CMP_NOT_I17_I:%.*]] = icmp eq i8 [[TMP15]], 0
+// CHECK-NEXT:    br i1 [[CMP_NOT_I17_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I18_I]], !llvm.loop [[LOOP10]]
 // CHECK:       _ZL15__make_mantissaPKc.exit:
-// CHECK-NEXT:    [[RETVAL_0_I:%.*]] = phi i64 [ 0, [[CLEANUP_I_I]] ], [ [[__R_0_I_I]], [[WHILE_COND_I_I]] ], [ 0, [[CLEANUP_I36_I]] ], [ [[__R_0_I32_I]], [[WHILE_COND_I30_I]] ], [ 0, [[CLEANUP_I20_I]] ], [ [[__R_0_I16_I]], [[WHILE_COND_I14_I]] ]
+// CHECK-NEXT:    [[RETVAL_0_I:%.*]] = phi i64 [ 0, [[WHILE_COND_I_I_PREHEADER]] ], [ 0, [[IF_THEN5_I]] ], [ 0, [[WHILE_COND_I14_I_PREHEADER]] ], [ [[SUB_I_I]], [[IF_THEN_I_I]] ], [ 0, [[WHILE_BODY_I_I]] ], [ [[ADD28_I_I]], [[IF_END31_I_I]] ], [ 0, [[IF_ELSE17_I_I]] ], [ [[SUB_I25_I]], [[IF_THEN_I21_I]] ], [ 0, [[WHILE_BODY_I18_I]] ]
 // CHECK-NEXT:    ret i64 [[RETVAL_0_I]]
 //
 // AMDGCNSPIRV-LABEL: @test___make_mantissa(
@@ -339,53 +319,49 @@ extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) {
 // AMDGCNSPIRV-NEXT:    [[INCDEC_PTR_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[P]], i64 1
 // AMDGCNSPIRV-NEXT:    [[TMP1:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA5]]
 // AMDGCNSPIRV-NEXT:    switch i8 [[TMP1]], label [[WHILE_COND_I_I:%.*]] [
-// AMDGCNSPIRV-NEXT:      i8 120, label [[WHILE_COND_I28_I_PREHEADER:%.*]]
-// AMDGCNSPIRV-NEXT:      i8 88, label [[WHILE_COND_I28_I_PREHEADER]]
+// AMDGCNSPIRV-NEXT:      i8 120, label [[IF_THEN5_I:%.*]]
+// AMDGCNSPIRV-NEXT:      i8 88, label [[IF_THEN5_I]]
 // AMDGCNSPIRV-NEXT:    ]
-// AMDGCNSPIRV:       while.cond.i28.i.preheader:
-// AMDGCNSPIRV-NEXT:    br label [[WHILE_COND_I28_I:%.*]]
-// AMDGCNSPIRV:       while.cond.i28.i:
-// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I29_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I34_I:%.*]], [[CLEANUP_I_I:%.*]] ], [ [[INCDEC_PTR_I]], [[WHILE_COND_I28_I_PREHEADER]] ]
-// AMDGCNSPIRV-NEXT:    [[__R_0_I30_I:%.*]] = phi i64 [ [[__R_2_I_I:%.*]], [[CLEANUP_I_I]] ], [ 0, [[WHILE_COND_I28_I_PREHEADER]] ]
-// AMDGCNSPIRV-NEXT:    [[TMP2:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I]], align 1, !tbaa [[TBAA5]]
-// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I31_I:%.*]] = icmp eq i8 [[TMP2]], 0
-// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I31_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT:%.*]], label [[WHILE_BODY_I32_I:%.*]]
+// AMDGCNSPIRV:       if.then5.i:
+// AMDGCNSPIRV-NEXT:    [[TMP2:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I31_I5:%.*]] = icmp eq i8 [[TMP2]], 0
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I31_I5]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT:%.*]], label [[WHILE_BODY_I32_I:%.*]]
 // AMDGCNSPIRV:       while.body.i32.i:
-// AMDGCNSPIRV-NEXT:    [[TMP3:%.*]] = add i8 [[TMP2]], -48
-// AMDGCNSPIRV-NEXT:    [[OR_COND_I33_I:%.*]] = icmp ult i8 [[TMP3]], 10
-// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I33_I]], label [[IF_END31_I_I:%.*]], label [[IF_ELSE_I_I:%.*]]
+// AMDGCNSPIRV-NEXT:    [[TMP3:%.*]] = phi i8 [ [[TMP7:%.*]], [[IF_END31_I_I:%.*]] ], [ [[TMP2]], [[IF_THEN5_I]] ]
+// AMDGCNSPIRV-NEXT:    [[__R_0_I30_I7:%.*]] = phi i64 [ [[ADD28_I_I:%.*]], [[IF_END31_I_I]] ], [ 0, [[IF_THEN5_I]] ]
+// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I29_I6:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I36_I:%.*]], [[IF_END31_I_I]] ], [ [[INCDEC_PTR_I]], [[IF_THEN5_I]] ]
+// AMDGCNSPIRV-NEXT:    [[TMP4:%.*]] = add i8 [[TMP3]], -48
+// AMDGCNSPIRV-NEXT:    [[OR_COND_I33_I:%.*]] = icmp ult i8 [[TMP4]], 10
+// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I33_I]], label [[IF_END31_I_I]], label [[IF_ELSE_I_I:%.*]]
 // AMDGCNSPIRV:       if.else.i.i:
-// AMDGCNSPIRV-NEXT:    [[TMP4:%.*]] = add i8 [[TMP2]], -97
-// AMDGCNSPIRV-NEXT:    [[OR_COND33_I_I:%.*]] = icmp ult i8 [[TMP4]], 6
+// AMDGCNSPIRV-NEXT:    [[TMP5:%.*]] = add i8 [[TMP3]], -97
+// AMDGCNSPIRV-NEXT:    [[OR_COND33_I_I:%.*]] = icmp ult i8 [[TMP5]], 6
 // AMDGCNSPIRV-NEXT:    br i1 [[OR_COND33_I_I]], label [[IF_END31_I_I]], label [[IF_ELSE17_I_I:%.*]]
 // AMDGCNSPIRV:       if.else17.i.i:
-// AMDGCNSPIRV-NEXT:    [[TMP5:%.*]] = add i8 [[TMP2]], -65
-// AMDGCNSPIRV-NEXT:    [[OR_COND34_I_I:%.*]] = icmp ult i8 [[TMP5]], 6
-// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND34_I_I]], label [[IF_END31_I_I]], label [[CLEANUP_I_I]]
+// AMDGCNSPIRV-NEXT:    [[TMP6:%.*]] = add i8 [[TMP3]], -65
+// AMDGCNSPIRV-NEXT:    [[OR_COND34_I_I:%.*]] = icmp ult i8 [[TMP6]], 6
+// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND34_I_I]], label [[IF_END31_I_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]]
 // AMDGCNSPIRV:       if.end31.i.i:
 // AMDGCNSPIRV-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I32_I]] ], [ -87, [[IF_ELSE_I_I]] ], [ -55, [[IF_ELSE17_I_I]] ]
-// AMDGCNSPIRV-NEXT:    [[MUL24_I_I:%.*]] = shl i64 [[__R_0_I30_I]], 4
-// AMDGCNSPIRV-NEXT:    [[CONV25_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64
+// AMDGCNSPIRV-NEXT:    [[MUL24_I_I:%.*]] = shl i64 [[__R_0_I30_I7]], 4
+// AMDGCNSPIRV-NEXT:    [[CONV25_I_I:%.*]] = zext nneg i8 [[TMP3]] to i64
 // AMDGCNSPIRV-NEXT:    [[ADD26_I_I:%.*]] = add i64 [[MUL24_I_I]], [[DOTSINK]]
-// AMDGCNSPIRV-NEXT:    [[ADD28_I_I:%.*]] = add i64 [[ADD26_I_I]], [[CONV25_I_I]]
-// AMDGCNSPIRV-NEXT:    [[INCDEC_PTR_I37_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I]], i64 1
-// AMDGCNSPIRV-NEXT:    br label [[CLEANUP_I_I]]
-// AMDGCNSPIRV:       cleanup.i.i:
-// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_1_I34_I]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I37_I]], [[IF_END31_I_I]] ], [ [[__TAGP_ADDR_0_I29_I]], [[IF_ELSE17_I_I]] ]
-// AMDGCNSPIRV-NEXT:    [[__R_2_I_I]] = phi i64 [ [[ADD28_I_I]], [[IF_END31_I_I]] ], [ [[__R_0_I30_I]], [[IF_ELSE17_I_I]] ]
-// AMDGCNSPIRV-NEXT:    [[COND_I_I:%.*]] = phi i1 [ true, [[IF_END31_I_I]] ], [ false, [[IF_ELSE17_I_I]] ]
-// AMDGCNSPIRV-NEXT:    br i1 [[COND_I_I]], label [[WHILE_COND_I28_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], !llvm.loop [[LOOP12]]
+// AMDGCNSPIRV-NEXT:    [[ADD28_I_I]] = add i64 [[ADD26_I_I]], [[CONV25_I_I]]
+// AMDGCNSPIRV-NEXT:    [[INCDEC_PTR_I36_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I6]], i64 1
+// AMDGCNSPIRV-NEXT:    [[TMP7]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I36_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I31_I:%.*]] = icmp eq i8 [[TMP7]], 0
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I31_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I32_I]], !llvm.loop [[LOOP12]]
 // AMDGCNSPIRV:       while.cond.i.i:
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I_I:%.*]], [[WHILE_BODY_I_I:%.*]] ], [ [[INCDEC_PTR_I]], [[IF_THEN_I]] ]
 // AMDGCNSPIRV-NEXT:    [[__R_0_I_I:%.*]] = phi i64 [ [[__R_1_I_I:%.*]], [[WHILE_BODY_I_I]] ], [ 0, [[IF_THEN_I]] ]
-// AMDGCNSPIRV-NEXT:    [[TMP6:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I]], align 1, !tbaa [[TBAA5]]
-// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I_I:%.*]] = icmp eq i8 [[TMP6]], 0
+// AMDGCNSPIRV-NEXT:    [[TMP8:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I_I:%.*]] = icmp eq i8 [[TMP8]], 0
 // AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I_I]]
 // AMDGCNSPIRV:       while.body.i.i:
-// AMDGCNSPIRV-NEXT:    [[TMP7:%.*]] = and i8 [[TMP6]], -8
-// AMDGCNSPIRV-NEXT:    [[OR_COND_I_I:%.*]] = icmp eq i8 [[TMP7]], 48
+// AMDGCNSPIRV-NEXT:    [[TMP9:%.*]] = and i8 [[TMP8]], -8
+// AMDGCNSPIRV-NEXT:    [[OR_COND_I_I:%.*]] = icmp eq i8 [[TMP9]], 48
 // AMDGCNSPIRV-NEXT:    [[MUL_I_I:%.*]] = shl i64 [[__R_0_I_I]], 3
-// AMDGCNSPIRV-NEXT:    [[CONV5_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64
+// AMDGCNSPIRV-NEXT:    [[CONV5_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64
 // AMDGCNSPIRV-NEXT:    [[ADD_I_I:%.*]] = add i64 [[MUL_I_I]], -48
 // AMDGCNSPIRV-NEXT:    [[SUB_I_I:%.*]] = add i64 [[ADD_I_I]], [[CONV5_I_I]]
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_1_I_I_IDX:%.*]] = zext i1 [[OR_COND_I_I]] to i64
@@ -395,14 +371,14 @@ extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) {
 // AMDGCNSPIRV:       while.cond.i14.i:
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I15_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I25_I:%.*]], [[WHILE_BODY_I18_I:%.*]] ], [ [[P]], [[ENTRY:%.*]] ]
 // AMDGCNSPIRV-NEXT:    [[__R_0_I16_I:%.*]] = phi i64 [ [[__R_1_I26_I:%.*]], [[WHILE_BODY_I18_I]] ], [ 0, [[ENTRY]] ]
-// AMDGCNSPIRV-NEXT:    [[TMP8:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I]], align 1, !tbaa [[TBAA5]]
-// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I17_I:%.*]] = icmp eq i8 [[TMP8]], 0
+// AMDGCNSPIRV-NEXT:    [[TMP10:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I17_I:%.*]] = icmp eq i8 [[TMP10]], 0
 // AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I17_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I18_I]]
 // AMDGCNSPIRV:       while.body.i18.i:
-// AMDGCNSPIRV-NEXT:    [[TMP9:%.*]] = add i8 [[TMP8]], -48
-// AMDGCNSPIRV-NEXT:    [[OR_COND_I19_I:%.*]] = icmp ult i8 [[TMP9]], 10
+// AMDGCNSPIRV-NEXT:    [[TMP11:%.*]] = add i8 [[TMP10]], -48
+// AMDGCNSPIRV-NEXT:    [[OR_COND_I19_I:%.*]] = icmp ult i8 [[TMP11]], 10
 // AMDGCNSPIRV-NEXT:    [[MUL_I20_I:%.*]] = mul i64 [[__R_0_I16_I]], 10
-// AMDGCNSPIRV-NEXT:    [[CONV5_I21_I:%.*]] = zext nneg i8 [[TMP8]] to i64
+// AMDGCNSPIRV-NEXT:    [[CONV5_I21_I:%.*]] = zext nneg i8 [[TMP10]] to i64
 // AMDGCNSPIRV-NEXT:    [[ADD_I22_I:%.*]] = add i64 [[MUL_I20_I]], -48
 // AMDGCNSPIRV-NEXT:    [[SUB_I23_I:%.*]] = add i64 [[ADD_I22_I]], [[CONV5_I21_I]]
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_1_I25_I_IDX:%.*]] = zext i1 [[OR_COND_I19_I]] to i64
@@ -410,7 +386,7 @@ extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) {
 // AMDGCNSPIRV-NEXT:    [[__R_1_I26_I]] = select i1 [[OR_COND_I19_I]], i64 [[SUB_I23_I]], i64 [[__R_0_I16_I]]
 // AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I19_I]], label [[WHILE_COND_I14_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], !llvm.loop [[LOOP11]]
 // AMDGCNSPIRV:       _ZL15__make_mantissaPKc.exit:
-// AMDGCNSPIRV-NEXT:    [[RETVAL_0_I:%.*]] = phi i64 [ 0, [[WHILE_BODY_I_I]] ], [ [[__R_0_I_I]], [[WHILE_COND_I_I]] ], [ 0, [[CLEANUP_I_I]] ], [ [[__R_0_I30_I]], [[WHILE_COND_I28_I]] ], [ 0, [[WHILE_BODY_I18_I]] ], [ [[__R_0_I16_I]], [[WHILE_COND_I14_I]] ]
+// AMDGCNSPIRV-NEXT:    [[RETVAL_0_I:%.*]] = phi i64 [ 0, [[IF_THEN5_I]] ], [ 0, [[WHILE_BODY_I_I]] ], [ [[__R_0_I_I]], [[WHILE_COND_I_I]] ], [ [[ADD28_I_I]], [[IF_END31_I_I]] ], [ 0, [[IF_ELSE17_I_I]] ], [ 0, [[WHILE_BODY_I18_I]] ], [ [[__R_0_I16_I]], [[WHILE_COND_I14_I]] ]
 // AMDGCNSPIRV-NEXT:    ret i64 [[RETVAL_0_I]]
 //
 extern "C" __device__ uint64_t test___make_mantissa(const char *p) {
@@ -461,22 +437,22 @@ extern "C" __device__ long long test_llabs(long x) {
 
 // DEFAULT-LABEL: @test_acosf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acos_f32(float noundef [[X:%.*]]) #[[ATTR12:[0-9]+]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acos_f32(float noundef [[X:%.*]]) #[[ATTR14:[0-9]+]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_acosf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_acos_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR12:[0-9]+]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_acos_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14:[0-9]+]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_acosf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acos_f32(float noundef [[X:%.*]]) #[[ATTR12:[0-9]+]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acos_f32(float noundef [[X:%.*]]) #[[ATTR14:[0-9]+]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_acosf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acos_f32(float noundef [[X:%.*]]) #[[ATTR12:[0-9]+]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acos_f32(float noundef [[X:%.*]]) #[[ATTR14:[0-9]+]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_acosf(
@@ -490,22 +466,22 @@ extern "C" __device__ float test_acosf(float x) {
 
 // DEFAULT-LABEL: @test_acos(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acos_f64(double noundef [[X:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acos_f64(double noundef [[X:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_acos(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_acos_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_acos_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_acos(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acos_f64(double noundef [[X:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acos_f64(double noundef [[X:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_acos(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acos_f64(double noundef [[X:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acos_f64(double noundef [[X:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_acos(
@@ -519,22 +495,22 @@ extern "C" __device__ double test_acos(double x) {
 
 // DEFAULT-LABEL: @test_acoshf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acosh_f32(float noundef [[X:%.*]]) #[[ATTR13:[0-9]+]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acosh_f32(float noundef [[X:%.*]]) #[[ATTR15:[0-9]+]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_acoshf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_acosh_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13:[0-9]+]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_acosh_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15:[0-9]+]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_acoshf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acosh_f32(float noundef [[X:%.*]]) #[[ATTR13:[0-9]+]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acosh_f32(float noundef [[X:%.*]]) #[[ATTR15:[0-9]+]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_acoshf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acosh_f32(float noundef [[X:%.*]]) #[[ATTR13:[0-9]+]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acosh_f32(float noundef [[X:%.*]]) #[[ATTR15:[0-9]+]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_acoshf(
@@ -548,22 +524,22 @@ extern "C" __device__ float test_acoshf(float x) {
 
 // DEFAULT-LABEL: @test_acosh(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acosh_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acosh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_acosh(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_acosh_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_acosh_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_acosh(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acosh_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acosh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_acosh(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acosh_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acosh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_acosh(
@@ -577,22 +553,22 @@ extern "C" __device__ double test_acosh(double x) {
 
 // DEFAULT-LABEL: @test_asinf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asin_f32(float noundef [[X:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asin_f32(float noundef [[X:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_asinf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_asin_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_asin_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_asinf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asin_f32(float noundef [[X:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asin_f32(float noundef [[X:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_asinf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asin_f32(float noundef [[X:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asin_f32(float noundef [[X:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_asinf(
@@ -606,22 +582,22 @@ extern "C" __device__ float test_asinf(float x) {
 
 // DEFAULT-LABEL: @test_asin(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asin_f64(double noundef [[X:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asin_f64(double noundef [[X:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_asin(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_asin_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_asin_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_asin(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asin_f64(double noundef [[X:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asin_f64(double noundef [[X:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_asin(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asin_f64(double noundef [[X:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asin_f64(double noundef [[X:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_asin(
@@ -636,22 +612,22 @@ extern "C" __device__ double test_asin(double x) {
 
 // DEFAULT-LABEL: @test_asinhf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asinh_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asinh_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_asinhf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_asinh_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_asinh_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_asinhf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asinh_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asinh_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_asinhf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asinh_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asinh_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_asinhf(
@@ -665,22 +641,22 @@ extern "C" __device__ float test_asinhf(float x) {
 
 // DEFAULT-LABEL: @test_asinh(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asinh_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asinh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_asinh(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_asinh_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_asinh_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_asinh(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asinh_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asinh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_asinh(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asinh_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asinh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_asinh(
@@ -694,22 +670,22 @@ extern "C" __device__ double test_asinh(double x) {
 
 // DEFAULT-LABEL: @test_atan2f(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan2_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan2_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_atan2f(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_atan2_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_atan2_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_atan2f(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan2_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan2_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_atan2f(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan2_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan2_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_atan2f(
@@ -723,22 +699,22 @@ extern "C" __device__ float test_atan2f(float x, float y) {
 
 // DEFAULT-LABEL: @test_atan2(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan2_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan2_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_atan2(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_atan2_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_atan2_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_atan2(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan2_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan2_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_atan2(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan2_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan2_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_atan2(
@@ -752,22 +728,22 @@ extern "C" __device__ double test_atan2(double x, double y) {
 
 // DEFAULT-LABEL: @test_atanf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan_f32(float noundef [[X:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan_f32(float noundef [[X:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_atanf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_atan_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_atan_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_atanf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan_f32(float noundef [[X:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan_f32(float noundef [[X:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_atanf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan_f32(float noundef [[X:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan_f32(float noundef [[X:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_atanf(
@@ -781,22 +757,22 @@ extern "C" __device__ float test_atanf(float x) {
 
 // DEFAULT-LABEL: @test_atan(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan_f64(double noundef [[X:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan_f64(double noundef [[X:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_atan(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_atan_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_atan_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_atan(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan_f64(double noundef [[X:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan_f64(double noundef [[X:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_atan(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan_f64(double noundef [[X:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan_f64(double noundef [[X:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_atan(
@@ -810,22 +786,22 @@ extern "C" __device__ double test_atan(double x) {
 
 // DEFAULT-LABEL: @test_atanhf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atanh_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atanh_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_atanhf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_atanh_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_atanh_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_atanhf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atanh_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atanh_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_atanhf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atanh_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atanh_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_atanhf(
@@ -839,22 +815,22 @@ extern "C" __device__ float test_atanhf(float x) {
 
 // DEFAULT-LABEL: @test_atanh(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atanh_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atanh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_atanh(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_atanh_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_atanh_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_atanh(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atanh_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atanh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_atanh(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atanh_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atanh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_atanh(
@@ -868,22 +844,22 @@ extern "C" __device__ double test_atanh(double x) {
 
 // DEFAULT-LABEL: @test_cbrtf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cbrt_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cbrt_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_cbrtf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cbrt_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cbrt_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_cbrtf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cbrt_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cbrt_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_cbrtf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cbrt_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cbrt_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_cbrtf(
@@ -897,22 +873,22 @@ extern "C" __device__ float test_cbrtf(float x) {
 
 // DEFAULT-LABEL: @test_cbrt(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cbrt_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cbrt_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_cbrt(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cbrt_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cbrt_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_cbrt(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cbrt_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cbrt_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_cbrt(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cbrt_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cbrt_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_cbrt(
@@ -1042,22 +1018,22 @@ extern "C" __device__ double test_copysign(double x, double y) {
 
 // DEFAULT-LABEL: @test_cosf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cos_f32(float noundef [[X:%.*]]) #[[ATTR14:[0-9]+]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cos_f32(float noundef [[X:%.*]]) #[[ATTR16:[0-9]+]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_cosf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cos_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14:[0-9]+]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cos_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16:[0-9]+]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_cosf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I1:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X:%.*]]) #[[ATTR14:[0-9]+]]
+// APPROX-NEXT:    [[CALL_I1:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X:%.*]]) #[[ATTR16:[0-9]+]]
 // APPROX-NEXT:    ret float [[CALL_I1]]
 //
 // NCRDIV-LABEL: @test_cosf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cos_f32(float noundef [[X:%.*]]) #[[ATTR14:[0-9]+]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cos_f32(float noundef [[X:%.*]]) #[[ATTR16:[0-9]+]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_cosf(
@@ -1071,22 +1047,22 @@ extern "C" __device__ float test_cosf(float x) {
 
 // DEFAULT-LABEL: @test_cos(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cos_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cos_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_cos(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cos_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cos_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_cos(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cos_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cos_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_cos(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cos_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cos_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_cos(
@@ -1100,22 +1076,22 @@ extern "C" __device__ double test_cos(double x) {
 
 // DEFAULT-LABEL: @test_coshf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cosh_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cosh_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_coshf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cosh_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cosh_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_coshf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cosh_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cosh_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_coshf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cosh_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cosh_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_coshf(
@@ -1129,22 +1105,22 @@ extern "C" __device__ float test_coshf(float x) {
 
 // DEFAULT-LABEL: @test_cosh(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cosh_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cosh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_cosh(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cosh_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cosh_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_cosh(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cosh_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cosh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_cosh(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cosh_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cosh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_cosh(
@@ -1158,22 +1134,22 @@ extern "C" __device__ double test_cosh(double x) {
 
 // DEFAULT-LABEL: @test_cospif(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cospi_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cospi_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_cospif(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cospi_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cospi_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_cospif(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cospi_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cospi_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_cospif(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cospi_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cospi_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_cospif(
@@ -1187,22 +1163,22 @@ extern "C" __device__ float test_cospif(float x) {
 
 // DEFAULT-LABEL: @test_cospi(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cospi_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cospi_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_cospi(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cospi_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cospi_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_cospi(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cospi_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cospi_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_cospi(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cospi_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cospi_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_cospi(
@@ -1216,22 +1192,22 @@ extern "C" __device__ double test_cospi(double x) {
 
 // DEFAULT-LABEL: @test_cyl_bessel_i0f(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i0_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i0_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_cyl_bessel_i0f(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_i0_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_i0_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_cyl_bessel_i0f(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i0_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i0_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_cyl_bessel_i0f(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i0_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i0_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_cyl_bessel_i0f(
@@ -1245,22 +1221,22 @@ extern "C" __device__ float test_cyl_bessel_i0f(float x) {
 
 // DEFAULT-LABEL: @test_cyl_bessel_i0(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i0_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i0_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_cyl_bessel_i0(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_i0_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_i0_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_cyl_bessel_i0(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i0_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i0_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_cyl_bessel_i0(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i0_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i0_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_cyl_bessel_i0(
@@ -1274,22 +1250,22 @@ extern "C" __device__ double test_cyl_bessel_i0(double x) {
 
 // DEFAULT-LABEL: @test_cyl_bessel_i1f(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i1_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i1_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_cyl_bessel_i1f(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_i1_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_i1_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_cyl_bessel_i1f(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i1_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i1_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_cyl_bessel_i1f(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i1_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i1_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_cyl_bessel_i1f(
@@ -1303,22 +1279,22 @@ extern "C" __device__ float test_cyl_bessel_i1f(float x) {
 
 // DEFAULT-LABEL: @test_cyl_bessel_i1(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i1_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i1_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_cyl_bessel_i1(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_i1_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_i1_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_cyl_bessel_i1(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i1_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i1_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_cyl_bessel_i1(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i1_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i1_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_cyl_bessel_i1(
@@ -1332,22 +1308,22 @@ extern "C" __device__ double test_cyl_bessel_i1(double x) {
 
 // DEFAULT-LABEL: @test_erfcf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfc_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfc_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_erfcf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_erfc_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_erfc_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_erfcf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfc_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfc_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_erfcf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfc_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfc_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_erfcf(
@@ -1361,22 +1337,22 @@ extern "C" __device__ float test_erfcf(float x) {
 
 // DEFAULT-LABEL: @test_erfc(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfc_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfc_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_erfc(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_erfc_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_erfc_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_erfc(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfc_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfc_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_erfc(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfc_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfc_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_erfc(
@@ -1390,22 +1366,22 @@ extern "C" __device__ double test_erfc(double x) {
 
 // DEFAULT-LABEL: @test_erfinvf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfinv_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfinv_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_erfinvf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_erfinv_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_erfinv_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_erfinvf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfinv_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfinv_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_erfinvf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfinv_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfinv_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_erfinvf(
@@ -1419,22 +1395,22 @@ extern "C" __device__ float test_erfinvf(float x) {
 
 // DEFAULT-LABEL: @test_erfinv(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfinv_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfinv_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_erfinv(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_erfinv_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_erfinv_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_erfinv(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfinv_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfinv_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_erfinv(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfinv_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfinv_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_erfinv(
@@ -1477,22 +1453,22 @@ extern "C" __device__ float test_exp10f(float x) {
 
 // DEFAULT-LABEL: @test_exp10(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp10_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp10_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_exp10(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_exp10_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_exp10_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_exp10(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp10_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp10_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_exp10(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp10_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp10_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_exp10(
@@ -1535,22 +1511,22 @@ extern "C" __device__ float test_exp2f(float x) {
 
 // DEFAULT-LABEL: @test_exp2(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp2_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp2_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_exp2(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_exp2_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_exp2_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_exp2(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp2_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp2_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_exp2(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp2_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp2_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_exp2(
@@ -1593,22 +1569,22 @@ extern "C" __device__ float test_expf(float x) {
 
 // DEFAULT-LABEL: @test_exp(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_exp(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_exp_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_exp_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_exp(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_exp(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_exp(
@@ -1622,22 +1598,22 @@ extern "C" __device__ double test_exp(double x) {
 
 // DEFAULT-LABEL: @test_expm1f(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_expm1_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_expm1_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_expm1f(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_expm1_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_expm1_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_expm1f(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_expm1_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_expm1_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_expm1f(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_expm1_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_expm1_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_expm1f(
@@ -1651,22 +1627,22 @@ extern "C" __device__ float test_expm1f(float x) {
 
 // DEFAULT-LABEL: @test_expm1(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_expm1_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_expm1_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_expm1(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_expm1_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_expm1_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_expm1(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_expm1_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_expm1_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_expm1(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_expm1_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_expm1_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_expm1(
@@ -1738,22 +1714,22 @@ extern "C" __device__ double test_fabs(double x) {
 
 // DEFAULT-LABEL: @test_fdimf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fdim_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fdim_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_fdimf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_fdim_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_fdim_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_fdimf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fdim_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fdim_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_fdimf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fdim_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fdim_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_fdimf(
@@ -1767,22 +1743,22 @@ extern "C" __device__ float test_fdimf(float x, float y) {
 
 // DEFAULT-LABEL: @test_fdim(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fdim_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fdim_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_fdim(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_fdim_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_fdim_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_fdim(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fdim_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fdim_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_fdim(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fdim_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fdim_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_fdim(
@@ -2086,22 +2062,22 @@ extern "C" __device__ double test_fmin(double x, double y) {
 
 // DEFAULT-LABEL: @test_fmodf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fmod_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fmod_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_fmodf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_fmod_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_fmod_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_fmodf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fmod_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fmod_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_fmodf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fmod_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fmod_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_fmodf(
@@ -2115,22 +2091,22 @@ extern "C" __device__ float test_fmodf(float x, float y) {
 
 // DEFAULT-LABEL: @test_fmod(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fmod_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fmod_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_fmod(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_fmod_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_fmod_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_fmod(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fmod_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fmod_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_fmod(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fmod_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fmod_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_fmod(
@@ -2232,22 +2208,22 @@ extern "C" __device__ double test_frexp(double x, int* y) {
 
 // DEFAULT-LABEL: @test_hypotf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_hypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_hypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_hypotf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_hypot_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_hypot_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_hypotf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_hypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_hypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_hypotf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_hypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_hypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_hypotf(
@@ -2261,22 +2237,22 @@ extern "C" __device__ float test_hypotf(float x, float y) {
 
 // DEFAULT-LABEL: @test_hypot(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_hypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_hypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_hypot(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_hypot_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_hypot_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_hypot(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_hypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_hypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_hypot(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_hypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_hypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_hypot(
@@ -2290,22 +2266,22 @@ extern "C" __device__ double test_hypot(double x, double y) {
 
 // DEFAULT-LABEL: @test_ilogbf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef [[X:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef [[X:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret i32 [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_ilogbf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret i32 [[CALL_I]]
 //
 // APPROX-LABEL: @test_ilogbf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef [[X:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef [[X:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret i32 [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_ilogbf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef [[X:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef [[X:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret i32 [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_ilogbf(
@@ -2319,22 +2295,22 @@ extern "C" __device__ int test_ilogbf(float x) {
 
 // DEFAULT-LABEL: @test_ilogb(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef [[X:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef [[X:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret i32 [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_ilogb(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret i32 [[CALL_I]]
 //
 // APPROX-LABEL: @test_ilogb(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef [[X:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef [[X:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret i32 [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_ilogb(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef [[X:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef [[X:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret i32 [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_ilogb(
@@ -2556,22 +2532,22 @@ extern "C" __device__ BOOL_TYPE test___isnan(double x) {
 
 // DEFAULT-LABEL: @test_j0f(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_j0f(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j0_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j0_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_j0f(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_j0f(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_j0f(
@@ -2585,22 +2561,22 @@ extern "C" __device__ float test_j0f(float x) {
 
 // DEFAULT-LABEL: @test_j0(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_j0(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j0_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j0_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_j0(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_j0(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_j0(
@@ -2614,22 +2590,22 @@ extern "C" __device__ double test_j0(double x) {
 
 // DEFAULT-LABEL: @test_j1f(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_j1f(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j1_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j1_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_j1f(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_j1f(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_j1f(
@@ -2643,22 +2619,22 @@ extern "C" __device__ float test_j1f(float x) {
 
 // DEFAULT-LABEL: @test_j1(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_j1(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j1_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j1_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_j1(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_j1(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_j1(
@@ -2677,14 +2653,14 @@ extern "C" __device__ double test_j1(double x) {
 // DEFAULT-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
 // DEFAULT-NEXT:    ]
 // DEFAULT:       if.then.i:
-// DEFAULT-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    br label [[_ZL3JNFIF_EXIT:%.*]]
 // DEFAULT:       if.then2.i:
-// DEFAULT-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR16]]
 // DEFAULT-NEXT:    br label [[_ZL3JNFIF_EXIT]]
 // DEFAULT:       if.end4.i:
-// DEFAULT-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR14]]
-// DEFAULT-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR16]]
+// DEFAULT-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR16]]
 // DEFAULT-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
 // DEFAULT-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3JNFIF_EXIT]]
 // DEFAULT:       for.body.i:
@@ -2710,14 +2686,14 @@ extern "C" __device__ double test_j1(double x) {
 // FINITEONLY-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
 // FINITEONLY-NEXT:    ]
 // FINITEONLY:       if.then.i:
-// FINITEONLY-NEXT:    [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j0_f32(float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j0_f32(float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    br label [[_ZL3JNFIF_EXIT:%.*]]
 // FINITEONLY:       if.then2.i:
-// FINITEONLY-NEXT:    [[CALL_I22_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j1_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I22_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j1_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    br label [[_ZL3JNFIF_EXIT]]
 // FINITEONLY:       if.end4.i:
-// FINITEONLY-NEXT:    [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j0_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
-// FINITEONLY-NEXT:    [[CALL_I21_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j1_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j0_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
+// FINITEONLY-NEXT:    [[CALL_I21_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j1_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
 // FINITEONLY-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3JNFIF_EXIT]]
 // FINITEONLY:       for.body.i:
@@ -2743,14 +2719,14 @@ extern "C" __device__ double test_j1(double x) {
 // APPROX-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
 // APPROX-NEXT:    ]
 // APPROX:       if.then.i:
-// APPROX-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    br label [[_ZL3JNFIF_EXIT:%.*]]
 // APPROX:       if.then2.i:
-// APPROX-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR16]]
 // APPROX-NEXT:    br label [[_ZL3JNFIF_EXIT]]
 // APPROX:       if.end4.i:
-// APPROX-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR14]]
-// APPROX-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR16]]
+// APPROX-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR16]]
 // APPROX-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
 // APPROX-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3JNFIF_EXIT]]
 // APPROX:       for.body.i:
@@ -2776,14 +2752,14 @@ extern "C" __device__ double test_j1(double x) {
 // NCRDIV-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
 // NCRDIV-NEXT:    ]
 // NCRDIV:       if.then.i:
-// NCRDIV-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    br label [[_ZL3JNFIF_EXIT:%.*]]
 // NCRDIV:       if.then2.i:
-// NCRDIV-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR16]]
 // NCRDIV-NEXT:    br label [[_ZL3JNFIF_EXIT]]
 // NCRDIV:       if.end4.i:
-// NCRDIV-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR14]]
-// NCRDIV-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR16]]
+// NCRDIV-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR16]]
 // NCRDIV-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
 // NCRDIV-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3JNFIF_EXIT]]
 // NCRDIV:       for.body.i:
@@ -2846,14 +2822,14 @@ extern "C" __device__ float test_jnf(int x, float y) {
 // DEFAULT-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
 // DEFAULT-NEXT:    ]
 // DEFAULT:       if.then.i:
-// DEFAULT-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    br label [[_ZL2JNID_EXIT:%.*]]
 // DEFAULT:       if.then2.i:
-// DEFAULT-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR16]]
 // DEFAULT-NEXT:    br label [[_ZL2JNID_EXIT]]
 // DEFAULT:       if.end4.i:
-// DEFAULT-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR14]]
-// DEFAULT-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR16]]
+// DEFAULT-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR16]]
 // DEFAULT-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
 // DEFAULT-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2JNID_EXIT]]
 // DEFAULT:       for.body.i:
@@ -2879,14 +2855,14 @@ extern "C" __device__ float test_jnf(int x, float y) {
 // FINITEONLY-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
 // FINITEONLY-NEXT:    ]
 // FINITEONLY:       if.then.i:
-// FINITEONLY-NEXT:    [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j0_f64(double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j0_f64(double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    br label [[_ZL2JNID_EXIT:%.*]]
 // FINITEONLY:       if.then2.i:
-// FINITEONLY-NEXT:    [[CALL_I22_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j1_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I22_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j1_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    br label [[_ZL2JNID_EXIT]]
 // FINITEONLY:       if.end4.i:
-// FINITEONLY-NEXT:    [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j0_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
-// FINITEONLY-NEXT:    [[CALL_I21_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j1_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j0_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
+// FINITEONLY-NEXT:    [[CALL_I21_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j1_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
 // FINITEONLY-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2JNID_EXIT]]
 // FINITEONLY:       for.body.i:
@@ -2912,14 +2888,14 @@ extern "C" __device__ float test_jnf(int x, float y) {
 // APPROX-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
 // APPROX-NEXT:    ]
 // APPROX:       if.then.i:
-// APPROX-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    br label [[_ZL2JNID_EXIT:%.*]]
 // APPROX:       if.then2.i:
-// APPROX-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR16]]
 // APPROX-NEXT:    br label [[_ZL2JNID_EXIT]]
 // APPROX:       if.end4.i:
-// APPROX-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR14]]
-// APPROX-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR16]]
+// APPROX-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR16]]
 // APPROX-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
 // APPROX-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2JNID_EXIT]]
 // APPROX:       for.body.i:
@@ -2945,14 +2921,14 @@ extern "C" __device__ float test_jnf(int x, float y) {
 // NCRDIV-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
 // NCRDIV-NEXT:    ]
 // NCRDIV:       if.then.i:
-// NCRDIV-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    br label [[_ZL2JNID_EXIT:%.*]]
 // NCRDIV:       if.then2.i:
-// NCRDIV-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR16]]
 // NCRDIV-NEXT:    br label [[_ZL2JNID_EXIT]]
 // NCRDIV:       if.end4.i:
-// NCRDIV-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR14]]
-// NCRDIV-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR16]]
+// NCRDIV-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR16]]
 // NCRDIV-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
 // NCRDIV-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2JNID_EXIT]]
 // NCRDIV:       for.body.i:
@@ -3068,22 +3044,22 @@ extern "C" __device__ double test_ldexp(double x, int y) {
 
 // DEFAULT-LABEL: @test_lgammaf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_lgamma_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_lgamma_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_lgammaf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_lgamma_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_lgamma_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_lgammaf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_lgamma_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_lgamma_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_lgammaf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_lgamma_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_lgamma_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_lgammaf(
@@ -3097,22 +3073,22 @@ extern "C" __device__ float test_lgammaf(float x) {
 
 // DEFAULT-LABEL: @test_lgamma(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_lgamma_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_lgamma_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_lgamma(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_lgamma_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_lgamma_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_lgamma(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_lgamma_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_lgamma_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_lgamma(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_lgamma_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_lgamma_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_lgamma(
@@ -3291,22 +3267,22 @@ extern "C" __device__ float test_log10f(float x) {
 
 // DEFAULT-LABEL: @test_log10(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log10_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log10_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_log10(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_log10_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_log10_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_log10(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log10_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log10_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_log10(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log10_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log10_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_log10(
@@ -3320,22 +3296,22 @@ extern "C" __device__ double test_log10(double x) {
 
 // DEFAULT-LABEL: @test_log1pf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_log1p_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_log1p_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_log1pf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_log1p_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_log1p_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_log1pf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_log1p_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_log1p_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_log1pf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_log1p_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_log1p_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_log1pf(
@@ -3349,22 +3325,22 @@ extern "C" __device__ float test_log1pf(float x) {
 
 // DEFAULT-LABEL: @test_log1p(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log1p_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log1p_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_log1p(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_log1p_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_log1p_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_log1p(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log1p_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log1p_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_log1p(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log1p_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log1p_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_log1p(
@@ -3407,22 +3383,22 @@ extern "C" __device__ float test_log2f(float x) {
 
 // DEFAULT-LABEL: @test_log2(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log2_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log2_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_log2(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_log2_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_log2_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_log2(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log2_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log2_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_log2(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log2_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log2_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_log2(
@@ -3436,22 +3412,22 @@ extern "C" __device__ double test_log2(double x) {
 
 // DEFAULT-LABEL: @test_logbf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_logb_f32(float noundef [[X:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_logb_f32(float noundef [[X:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_logbf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_logb_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_logb_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_logbf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_logb_f32(float noundef [[X:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_logb_f32(float noundef [[X:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_logbf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_logb_f32(float noundef [[X:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_logb_f32(float noundef [[X:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_logbf(
@@ -3465,22 +3441,22 @@ extern "C" __device__ float test_logbf(float x) {
 
 // DEFAULT-LABEL: @test_logb(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_logb_f64(double noundef [[X:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_logb_f64(double noundef [[X:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_logb(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_logb_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_logb_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_logb(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_logb_f64(double noundef [[X:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_logb_f64(double noundef [[X:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_logb(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_logb_f64(double noundef [[X:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_logb_f64(double noundef [[X:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_logb(
@@ -3660,41 +3636,41 @@ extern "C" __device__ long int test_lround(double x) {
 // DEFAULT-LABEL: @test_modff(
 // DEFAULT-NEXT:  entry:
 // DEFAULT-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5)
-// DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15:[0-9]+]]
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract noundef float @__ocml_modf_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17:[0-9]+]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract noundef float @__ocml_modf_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // DEFAULT-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA16:![0-9]+]]
 // DEFAULT-NEXT:    store float [[TMP0]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]]
-// DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_modff(
 // FINITEONLY-NEXT:  entry:
 // FINITEONLY-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5)
-// FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15:[0-9]+]]
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_modf_f32(float noundef nofpclass(nan inf) [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17:[0-9]+]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_modf_f32(float noundef nofpclass(nan inf) [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA16:![0-9]+]]
 // FINITEONLY-NEXT:    store float [[TMP0]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]]
-// FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_modff(
 // APPROX-NEXT:  entry:
 // APPROX-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5)
-// APPROX-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15:[0-9]+]]
-// APPROX-NEXT:    [[CALL_I:%.*]] = call contract noundef float @__ocml_modf_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// APPROX-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17:[0-9]+]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = call contract noundef float @__ocml_modf_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // APPROX-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA16:![0-9]+]]
 // APPROX-NEXT:    store float [[TMP0]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]]
-// APPROX-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// APPROX-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_modff(
 // NCRDIV-NEXT:  entry:
 // NCRDIV-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5)
-// NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15:[0-9]+]]
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract noundef float @__ocml_modf_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17:[0-9]+]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract noundef float @__ocml_modf_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // NCRDIV-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA17:![0-9]+]]
 // NCRDIV-NEXT:    store float [[TMP0]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA17]]
-// NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_modff(
@@ -3715,41 +3691,41 @@ extern "C" __device__ float test_modff(float x, float* y) {
 // DEFAULT-LABEL: @test_modf(
 // DEFAULT-NEXT:  entry:
 // DEFAULT-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5)
-// DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract noundef double @__ocml_modf_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract noundef double @__ocml_modf_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // DEFAULT-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA18:![0-9]+]]
 // DEFAULT-NEXT:    store double [[TMP0]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA18]]
-// DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_modf(
 // FINITEONLY-NEXT:  entry:
 // FINITEONLY-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5)
-// FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_modf_f64(double noundef nofpclass(nan inf) [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_modf_f64(double noundef nofpclass(nan inf) [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA18:![0-9]+]]
 // FINITEONLY-NEXT:    store double [[TMP0]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA18]]
-// FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_modf(
 // APPROX-NEXT:  entry:
 // APPROX-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5)
-// APPROX-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// APPROX-NEXT:    [[CALL_I:%.*]] = call contract noundef double @__ocml_modf_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// APPROX-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = call contract noundef double @__ocml_modf_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // APPROX-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA18:![0-9]+]]
 // APPROX-NEXT:    store double [[TMP0]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA18]]
-// APPROX-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// APPROX-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_modf(
 // NCRDIV-NEXT:  entry:
 // NCRDIV-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5)
-// NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract noundef double @__ocml_modf_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract noundef double @__ocml_modf_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // NCRDIV-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA19:![0-9]+]]
 // NCRDIV-NEXT:    store double [[TMP0]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA19]]
-// NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_modf(
@@ -3771,96 +3747,90 @@ extern "C" __device__ double test_modf(double x, double* y) {
 // DEFAULT-NEXT:  entry:
 // DEFAULT-NEXT:    [[TMP0:%.*]] = load i8, ptr [[TAG:%.*]], align 1, !tbaa [[TBAA4]]
 // DEFAULT-NEXT:    [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48
-// DEFAULT-NEXT:    br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I:%.*]]
+// DEFAULT-NEXT:    br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I_PREHEADER:%.*]]
+// DEFAULT:       while.cond.i14.i.i.preheader:
+// DEFAULT-NEXT:    [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    [[CMP_NOT_I17_I_I5:%.*]] = icmp eq i8 [[TMP1]], 0
+// DEFAULT-NEXT:    br i1 [[CMP_NOT_I17_I_I5]], label [[_ZL4NANFPKC_EXIT:%.*]], label [[WHILE_BODY_I18_I_I:%.*]]
 // DEFAULT:       if.then.i.i:
 // DEFAULT-NEXT:    [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TAG]], i64 1
-// DEFAULT-NEXT:    [[TMP1:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
-// DEFAULT-NEXT:    switch i8 [[TMP1]], label [[WHILE_COND_I_I_I:%.*]] [
-// DEFAULT-NEXT:      i8 120, label [[WHILE_COND_I30_I_I_PREHEADER:%.*]]
-// DEFAULT-NEXT:      i8 88, label [[WHILE_COND_I30_I_I_PREHEADER]]
+// DEFAULT-NEXT:    [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    switch i8 [[TMP2]], label [[WHILE_COND_I_I_I_PREHEADER:%.*]] [
+// DEFAULT-NEXT:      i8 120, label [[IF_THEN5_I_I:%.*]]
+// DEFAULT-NEXT:      i8 88, label [[IF_THEN5_I_I]]
 // DEFAULT-NEXT:    ]
-// DEFAULT:       while.cond.i30.i.i.preheader:
-// DEFAULT-NEXT:    br label [[WHILE_COND_I30_I_I:%.*]]
-// DEFAULT:       while.cond.i30.i.i:
-// DEFAULT-NEXT:    [[__TAGP_ADDR_0_I31_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I37_I_I:%.*]], [[CLEANUP_I36_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I30_I_I_PREHEADER]] ]
-// DEFAULT-NEXT:    [[__R_0_I32_I_I:%.*]] = phi i64 [ [[__R_2_I_I_I:%.*]], [[CLEANUP_I36_I_I]] ], [ 0, [[WHILE_COND_I30_I_I_PREHEADER]] ]
-// DEFAULT-NEXT:    [[TMP2:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I31_I_I]], align 1, !tbaa [[TBAA4]]
-// DEFAULT-NEXT:    [[CMP_NOT_I33_I_I:%.*]] = icmp eq i8 [[TMP2]], 0
-// DEFAULT-NEXT:    br i1 [[CMP_NOT_I33_I_I]], label [[_ZL4NANFPKC_EXIT:%.*]], label [[WHILE_BODY_I34_I_I:%.*]]
-// DEFAULT:       while.body.i34.i.i:
-// DEFAULT-NEXT:    [[TMP3:%.*]] = add i8 [[TMP2]], -48
-// DEFAULT-NEXT:    [[OR_COND_I35_I_I:%.*]] = icmp ult i8 [[TMP3]], 10
-// DEFAULT-NEXT:    br i1 [[OR_COND_I35_I_I]], label [[IF_END31_I_I_I:%.*]], label [[IF_ELSE_I_I_I:%.*]]
+// DEFAULT:       while.cond.i.i.i.preheader:
+// DEFAULT-NEXT:    [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    [[CMP_NOT_I_I_I14:%.*]] = icmp eq i8 [[TMP3]], 0
+// DEFAULT-NEXT:    br i1 [[CMP_NOT_I_I_I14]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]]
+// DEFAULT:       if.then5.i.i:
+// DEFAULT-NEXT:    [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    [[CMP_NOT_I30_I_I9:%.*]] = icmp eq i8 [[TMP4]], 0
+// DEFAULT-NEXT:    br i1 [[CMP_NOT_I30_I_I9]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I31_I_I:%.*]]
+// DEFAULT:       while.body.i31.i.i:
+// DEFAULT-NEXT:    [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], [[IF_END31_I_I_I:%.*]] ], [ [[TMP4]], [[IF_THEN5_I_I]] ]
+// DEFAULT-NEXT:    [[__R_0_I29_I_I11:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], [[IF_END31_I_I_I]] ], [ 0, [[IF_THEN5_I_I]] ]
+// DEFAULT-NEXT:    [[__TAGP_ADDR_0_I28_I_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I_I:%.*]], [[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN5_I_I]] ]
+// DEFAULT-NEXT:    [[TMP6:%.*]] = add i8 [[TMP5]], -48
+// DEFAULT-NEXT:    [[OR_COND_I32_I_I:%.*]] = icmp ult i8 [[TMP6]], 10
+// DEFAULT-NEXT:    br i1 [[OR_COND_I32_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE_I_I_I:%.*]]
 // DEFAULT:       if.else.i.i.i:
-// DEFAULT-NEXT:    [[TMP4:%.*]] = add i8 [[TMP2]], -97
-// DEFAULT-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP4]], 6
+// DEFAULT-NEXT:    [[TMP7:%.*]] = add i8 [[TMP5]], -97
+// DEFAULT-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP7]], 6
 // DEFAULT-NEXT:    br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]]
 // DEFAULT:       if.else17.i.i.i:
-// DEFAULT-NEXT:    [[TMP5:%.*]] = add i8 [[TMP2]], -65
-// DEFAULT-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP5]], 6
-// DEFAULT-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[CLEANUP_I36_I_I]]
+// DEFAULT-NEXT:    [[TMP8:%.*]] = add i8 [[TMP5]], -65
+// DEFAULT-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP8]], 6
+// DEFAULT-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[_ZL4NANFPKC_EXIT]]
 // DEFAULT:       if.end31.i.i.i:
-// DEFAULT-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I34_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ]
-// DEFAULT-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I32_I_I]], 4
-// DEFAULT-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64
+// DEFAULT-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I31_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ]
+// DEFAULT-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I29_I_I11]], 4
+// DEFAULT-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP5]] to i64
 // DEFAULT-NEXT:    [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]]
-// DEFAULT-NEXT:    [[ADD28_I_I_I:%.*]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
-// DEFAULT-NEXT:    [[INCDEC_PTR_I40_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I31_I_I]], i64 1
-// DEFAULT-NEXT:    br label [[CLEANUP_I36_I_I]]
-// DEFAULT:       cleanup.i36.i.i:
-// DEFAULT-NEXT:    [[__TAGP_ADDR_1_I37_I_I]] = phi ptr [ [[INCDEC_PTR_I40_I_I]], [[IF_END31_I_I_I]] ], [ [[__TAGP_ADDR_0_I31_I_I]], [[IF_ELSE17_I_I_I]] ]
-// DEFAULT-NEXT:    [[__R_2_I_I_I]] = phi i64 [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ [[__R_0_I32_I_I]], [[IF_ELSE17_I_I_I]] ]
-// DEFAULT-NEXT:    [[COND_I_I_I:%.*]] = phi i1 [ true, [[IF_END31_I_I_I]] ], [ false, [[IF_ELSE17_I_I_I]] ]
-// DEFAULT-NEXT:    br i1 [[COND_I_I_I]], label [[WHILE_COND_I30_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP11]]
-// DEFAULT:       while.cond.i.i.i:
-// DEFAULT-NEXT:    [[__TAGP_ADDR_0_I_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I_I_I:%.*]], [[CLEANUP_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN_I_I]] ]
-// DEFAULT-NEXT:    [[__R_0_I_I_I:%.*]] = phi i64 [ [[__R_1_I_I_I:%.*]], [[CLEANUP_I_I_I]] ], [ 0, [[IF_THEN_I_I]] ]
-// DEFAULT-NEXT:    [[TMP6:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[TBAA4]]
-// DEFAULT-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP6]], 0
-// DEFAULT-NEXT:    br i1 [[CMP_NOT_I_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]]
+// DEFAULT-NEXT:    [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
+// DEFAULT-NEXT:    [[INCDEC_PTR_I34_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I28_I_I10]], i64 1
+// DEFAULT-NEXT:    [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    [[CMP_NOT_I30_I_I:%.*]] = icmp eq i8 [[TMP9]], 0
+// DEFAULT-NEXT:    br i1 [[CMP_NOT_I30_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]]
 // DEFAULT:       while.body.i.i.i:
-// DEFAULT-NEXT:    [[TMP7:%.*]] = and i8 [[TMP6]], -8
-// DEFAULT-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP7]], 48
-// DEFAULT-NEXT:    br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I:%.*]], label [[CLEANUP_I_I_I]]
+// DEFAULT-NEXT:    [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], [[IF_THEN_I_I_I:%.*]] ], [ [[TMP3]], [[WHILE_COND_I_I_I_PREHEADER]] ]
+// DEFAULT-NEXT:    [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ]
+// DEFAULT-NEXT:    [[__TAGP_ADDR_0_I_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I_I_I_PREHEADER]] ]
+// DEFAULT-NEXT:    [[TMP11:%.*]] = and i8 [[TMP10]], -8
+// DEFAULT-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP11]], 48
+// DEFAULT-NEXT:    br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I]], label [[_ZL4NANFPKC_EXIT]]
 // DEFAULT:       if.then.i.i.i:
-// DEFAULT-NEXT:    [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I]], 3
-// DEFAULT-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64
+// DEFAULT-NEXT:    [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I16]], 3
+// DEFAULT-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP10]] to i64
 // DEFAULT-NEXT:    [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48
-// DEFAULT-NEXT:    [[SUB_I_I_I:%.*]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
-// DEFAULT-NEXT:    [[INCDEC_PTR_I_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I]], i64 1
-// DEFAULT-NEXT:    br label [[CLEANUP_I_I_I]]
-// DEFAULT:       cleanup.i.i.i:
-// DEFAULT-NEXT:    [[__TAGP_ADDR_1_I_I_I]] = phi ptr [ [[INCDEC_PTR_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__TAGP_ADDR_0_I_I_I]], [[WHILE_BODY_I_I_I]] ]
-// DEFAULT-NEXT:    [[__R_1_I_I_I]] = phi i64 [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_BODY_I_I_I]] ]
-// DEFAULT-NEXT:    br i1 [[OR_COND_I_I_I]], label [[WHILE_COND_I_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP7]]
-// DEFAULT:       while.cond.i14.i.i:
-// DEFAULT-NEXT:    [[__TAGP_ADDR_0_I15_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I21_I_I:%.*]], [[CLEANUP_I20_I_I:%.*]] ], [ [[TAG]], [[ENTRY:%.*]] ]
-// DEFAULT-NEXT:    [[__R_0_I16_I_I:%.*]] = phi i64 [ [[__R_1_I22_I_I:%.*]], [[CLEANUP_I20_I_I]] ], [ 0, [[ENTRY]] ]
-// DEFAULT-NEXT:    [[TMP8:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[TBAA4]]
-// DEFAULT-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP8]], 0
-// DEFAULT-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I18_I_I:%.*]]
+// DEFAULT-NEXT:    [[SUB_I_I_I]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
+// DEFAULT-NEXT:    [[INCDEC_PTR_I_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I15]], i64 1
+// DEFAULT-NEXT:    [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP12]], 0
+// DEFAULT-NEXT:    br i1 [[CMP_NOT_I_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]]
 // DEFAULT:       while.body.i18.i.i:
-// DEFAULT-NEXT:    [[TMP9:%.*]] = add i8 [[TMP8]], -48
-// DEFAULT-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP9]], 10
-// DEFAULT-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I24_I_I:%.*]], label [[CLEANUP_I20_I_I]]
-// DEFAULT:       if.then.i24.i.i:
-// DEFAULT-NEXT:    [[MUL_I25_I_I:%.*]] = mul i64 [[__R_0_I16_I_I]], 10
-// DEFAULT-NEXT:    [[CONV5_I26_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64
-// DEFAULT-NEXT:    [[ADD_I27_I_I:%.*]] = add i64 [[MUL_I25_I_I]], -48
-// DEFAULT-NEXT:    [[SUB_I28_I_I:%.*]] = add i64 [[ADD_I27_I_I]], [[CONV5_I26_I_I]]
-// DEFAULT-NEXT:    [[INCDEC_PTR_I29_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I]], i64 1
-// DEFAULT-NEXT:    br label [[CLEANUP_I20_I_I]]
-// DEFAULT:       cleanup.i20.i.i:
-// DEFAULT-NEXT:    [[__TAGP_ADDR_1_I21_I_I]] = phi ptr [ [[INCDEC_PTR_I29_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__TAGP_ADDR_0_I15_I_I]], [[WHILE_BODY_I18_I_I]] ]
-// DEFAULT-NEXT:    [[__R_1_I22_I_I]] = phi i64 [ [[SUB_I28_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_BODY_I18_I_I]] ]
-// DEFAULT-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[WHILE_COND_I14_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP10]]
+// DEFAULT-NEXT:    [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], [[IF_THEN_I21_I_I:%.*]] ], [ [[TMP1]], [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// DEFAULT-NEXT:    [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// DEFAULT-NEXT:    [[__TAGP_ADDR_0_I15_I_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ [[TAG]], [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// DEFAULT-NEXT:    [[TMP14:%.*]] = add i8 [[TMP13]], -48
+// DEFAULT-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP14]], 10
+// DEFAULT-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I21_I_I]], label [[_ZL4NANFPKC_EXIT]]
+// DEFAULT:       if.then.i21.i.i:
+// DEFAULT-NEXT:    [[MUL_I22_I_I:%.*]] = mul i64 [[__R_0_I16_I_I7]], 10
+// DEFAULT-NEXT:    [[CONV5_I23_I_I:%.*]] = zext nneg i8 [[TMP13]] to i64
+// DEFAULT-NEXT:    [[ADD_I24_I_I:%.*]] = add i64 [[MUL_I22_I_I]], -48
+// DEFAULT-NEXT:    [[SUB_I25_I_I]] = add i64 [[ADD_I24_I_I]], [[CONV5_I23_I_I]]
+// DEFAULT-NEXT:    [[INCDEC_PTR_I26_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I6]], i64 1
+// DEFAULT-NEXT:    [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP15]], 0
+// DEFAULT-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]]
 // DEFAULT:       _ZL4nanfPKc.exit:
-// DEFAULT-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[CLEANUP_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_COND_I_I_I]] ], [ 0, [[CLEANUP_I36_I_I]] ], [ [[__R_0_I32_I_I]], [[WHILE_COND_I30_I_I]] ], [ 0, [[CLEANUP_I20_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_COND_I14_I_I]] ]
+// DEFAULT-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, [[IF_THEN5_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ 0, [[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ]
 // DEFAULT-NEXT:    [[CONV_I:%.*]] = trunc i64 [[RETVAL_0_I_I]] to i32
 // DEFAULT-NEXT:    [[BF_VALUE_I:%.*]] = and i32 [[CONV_I]], 4194303
 // DEFAULT-NEXT:    [[BF_SET9_I:%.*]] = or disjoint i32 [[BF_VALUE_I]], 2143289344
-// DEFAULT-NEXT:    [[TMP10:%.*]] = bitcast i32 [[BF_SET9_I]] to float
-// DEFAULT-NEXT:    ret float [[TMP10]]
+// DEFAULT-NEXT:    [[TMP16:%.*]] = bitcast i32 [[BF_SET9_I]] to float
+// DEFAULT-NEXT:    ret float [[TMP16]]
 //
 // FINITEONLY-LABEL: @test_nanf(
 // FINITEONLY-NEXT:  entry:
@@ -3870,191 +3840,179 @@ extern "C" __device__ double test_modf(double x, double* y) {
 // APPROX-NEXT:  entry:
 // APPROX-NEXT:    [[TMP0:%.*]] = load i8, ptr [[TAG:%.*]], align 1, !tbaa [[TBAA4]]
 // APPROX-NEXT:    [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48
-// APPROX-NEXT:    br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I:%.*]]
+// APPROX-NEXT:    br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I_PREHEADER:%.*]]
+// APPROX:       while.cond.i14.i.i.preheader:
+// APPROX-NEXT:    [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    [[CMP_NOT_I17_I_I5:%.*]] = icmp eq i8 [[TMP1]], 0
+// APPROX-NEXT:    br i1 [[CMP_NOT_I17_I_I5]], label [[_ZL4NANFPKC_EXIT:%.*]], label [[WHILE_BODY_I18_I_I:%.*]]
 // APPROX:       if.then.i.i:
 // APPROX-NEXT:    [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TAG]], i64 1
-// APPROX-NEXT:    [[TMP1:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
-// APPROX-NEXT:    switch i8 [[TMP1]], label [[WHILE_COND_I_I_I:%.*]] [
-// APPROX-NEXT:      i8 120, label [[WHILE_COND_I30_I_I_PREHEADER:%.*]]
-// APPROX-NEXT:      i8 88, label [[WHILE_COND_I30_I_I_PREHEADER]]
+// APPROX-NEXT:    [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    switch i8 [[TMP2]], label [[WHILE_COND_I_I_I_PREHEADER:%.*]] [
+// APPROX-NEXT:      i8 120, label [[IF_THEN5_I_I:%.*]]
+// APPROX-NEXT:      i8 88, label [[IF_THEN5_I_I]]
 // APPROX-NEXT:    ]
-// APPROX:       while.cond.i30.i.i.preheader:
-// APPROX-NEXT:    br label [[WHILE_COND_I30_I_I:%.*]]
-// APPROX:       while.cond.i30.i.i:
-// APPROX-NEXT:    [[__TAGP_ADDR_0_I31_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I37_I_I:%.*]], [[CLEANUP_I36_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I30_I_I_PREHEADER]] ]
-// APPROX-NEXT:    [[__R_0_I32_I_I:%.*]] = phi i64 [ [[__R_2_I_I_I:%.*]], [[CLEANUP_I36_I_I]] ], [ 0, [[WHILE_COND_I30_I_I_PREHEADER]] ]
-// APPROX-NEXT:    [[TMP2:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I31_I_I]], align 1, !tbaa [[TBAA4]]
-// APPROX-NEXT:    [[CMP_NOT_I33_I_I:%.*]] = icmp eq i8 [[TMP2]], 0
-// APPROX-NEXT:    br i1 [[CMP_NOT_I33_I_I]], label [[_ZL4NANFPKC_EXIT:%.*]], label [[WHILE_BODY_I34_I_I:%.*]]
-// APPROX:       while.body.i34.i.i:
-// APPROX-NEXT:    [[TMP3:%.*]] = add i8 [[TMP2]], -48
-// APPROX-NEXT:    [[OR_COND_I35_I_I:%.*]] = icmp ult i8 [[TMP3]], 10
-// APPROX-NEXT:    br i1 [[OR_COND_I35_I_I]], label [[IF_END31_I_I_I:%.*]], label [[IF_ELSE_I_I_I:%.*]]
+// APPROX:       while.cond.i.i.i.preheader:
+// APPROX-NEXT:    [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    [[CMP_NOT_I_I_I14:%.*]] = icmp eq i8 [[TMP3]], 0
+// APPROX-NEXT:    br i1 [[CMP_NOT_I_I_I14]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]]
+// APPROX:       if.then5.i.i:
+// APPROX-NEXT:    [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    [[CMP_NOT_I30_I_I9:%.*]] = icmp eq i8 [[TMP4]], 0
+// APPROX-NEXT:    br i1 [[CMP_NOT_I30_I_I9]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I31_I_I:%.*]]
+// APPROX:       while.body.i31.i.i:
+// APPROX-NEXT:    [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], [[IF_END31_I_I_I:%.*]] ], [ [[TMP4]], [[IF_THEN5_I_I]] ]
+// APPROX-NEXT:    [[__R_0_I29_I_I11:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], [[IF_END31_I_I_I]] ], [ 0, [[IF_THEN5_I_I]] ]
+// APPROX-NEXT:    [[__TAGP_ADDR_0_I28_I_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I_I:%.*]], [[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN5_I_I]] ]
+// APPROX-NEXT:    [[TMP6:%.*]] = add i8 [[TMP5]], -48
+// APPROX-NEXT:    [[OR_COND_I32_I_I:%.*]] = icmp ult i8 [[TMP6]], 10
+// APPROX-NEXT:    br i1 [[OR_COND_I32_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE_I_I_I:%.*]]
 // APPROX:       if.else.i.i.i:
-// APPROX-NEXT:    [[TMP4:%.*]] = add i8 [[TMP2]], -97
-// APPROX-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP4]], 6
+// APPROX-NEXT:    [[TMP7:%.*]] = add i8 [[TMP5]], -97
+// APPROX-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP7]], 6
 // APPROX-NEXT:    br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]]
 // APPROX:       if.else17.i.i.i:
-// APPROX-NEXT:    [[TMP5:%.*]] = add i8 [[TMP2]], -65
-// APPROX-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP5]], 6
-// APPROX-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[CLEANUP_I36_I_I]]
+// APPROX-NEXT:    [[TMP8:%.*]] = add i8 [[TMP5]], -65
+// APPROX-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP8]], 6
+// APPROX-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[_ZL4NANFPKC_EXIT]]
 // APPROX:       if.end31.i.i.i:
-// APPROX-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I34_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ]
-// APPROX-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I32_I_I]], 4
-// APPROX-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64
+// APPROX-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I31_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ]
+// APPROX-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I29_I_I11]], 4
+// APPROX-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP5]] to i64
 // APPROX-NEXT:    [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]]
-// APPROX-NEXT:    [[ADD28_I_I_I:%.*]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
-// APPROX-NEXT:    [[INCDEC_PTR_I40_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I31_I_I]], i64 1
-// APPROX-NEXT:    br label [[CLEANUP_I36_I_I]]
-// APPROX:       cleanup.i36.i.i:
-// APPROX-NEXT:    [[__TAGP_ADDR_1_I37_I_I]] = phi ptr [ [[INCDEC_PTR_I40_I_I]], [[IF_END31_I_I_I]] ], [ [[__TAGP_ADDR_0_I31_I_I]], [[IF_ELSE17_I_I_I]] ]
-// APPROX-NEXT:    [[__R_2_I_I_I]] = phi i64 [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ [[__R_0_I32_I_I]], [[IF_ELSE17_I_I_I]] ]
-// APPROX-NEXT:    [[COND_I_I_I:%.*]] = phi i1 [ true, [[IF_END31_I_I_I]] ], [ false, [[IF_ELSE17_I_I_I]] ]
-// APPROX-NEXT:    br i1 [[COND_I_I_I]], label [[WHILE_COND_I30_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP11]]
-// APPROX:       while.cond.i.i.i:
-// APPROX-NEXT:    [[__TAGP_ADDR_0_I_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I_I_I:%.*]], [[CLEANUP_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN_I_I]] ]
-// APPROX-NEXT:    [[__R_0_I_I_I:%.*]] = phi i64 [ [[__R_1_I_I_I:%.*]], [[CLEANUP_I_I_I]] ], [ 0, [[IF_THEN_I_I]] ]
-// APPROX-NEXT:    [[TMP6:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[TBAA4]]
-// APPROX-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP6]], 0
-// APPROX-NEXT:    br i1 [[CMP_NOT_I_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]]
+// APPROX-NEXT:    [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
+// APPROX-NEXT:    [[INCDEC_PTR_I34_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I28_I_I10]], i64 1
+// APPROX-NEXT:    [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    [[CMP_NOT_I30_I_I:%.*]] = icmp eq i8 [[TMP9]], 0
+// APPROX-NEXT:    br i1 [[CMP_NOT_I30_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]]
 // APPROX:       while.body.i.i.i:
-// APPROX-NEXT:    [[TMP7:%.*]] = and i8 [[TMP6]], -8
-// APPROX-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP7]], 48
-// APPROX-NEXT:    br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I:%.*]], label [[CLEANUP_I_I_I]]
+// APPROX-NEXT:    [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], [[IF_THEN_I_I_I:%.*]] ], [ [[TMP3]], [[WHILE_COND_I_I_I_PREHEADER]] ]
+// APPROX-NEXT:    [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ]
+// APPROX-NEXT:    [[__TAGP_ADDR_0_I_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I_I_I_PREHEADER]] ]
+// APPROX-NEXT:    [[TMP11:%.*]] = and i8 [[TMP10]], -8
+// APPROX-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP11]], 48
+// APPROX-NEXT:    br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I]], label [[_ZL4NANFPKC_EXIT]]
 // APPROX:       if.then.i.i.i:
-// APPROX-NEXT:    [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I]], 3
-// APPROX-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64
+// APPROX-NEXT:    [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I16]], 3
+// APPROX-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP10]] to i64
 // APPROX-NEXT:    [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48
-// APPROX-NEXT:    [[SUB_I_I_I:%.*]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
-// APPROX-NEXT:    [[INCDEC_PTR_I_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I]], i64 1
-// APPROX-NEXT:    br label [[CLEANUP_I_I_I]]
-// APPROX:       cleanup.i.i.i:
-// APPROX-NEXT:    [[__TAGP_ADDR_1_I_I_I]] = phi ptr [ [[INCDEC_PTR_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__TAGP_ADDR_0_I_I_I]], [[WHILE_BODY_I_I_I]] ]
-// APPROX-NEXT:    [[__R_1_I_I_I]] = phi i64 [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_BODY_I_I_I]] ]
-// APPROX-NEXT:    br i1 [[OR_COND_I_I_I]], label [[WHILE_COND_I_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP7]]
-// APPROX:       while.cond.i14.i.i:
-// APPROX-NEXT:    [[__TAGP_ADDR_0_I15_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I21_I_I:%.*]], [[CLEANUP_I20_I_I:%.*]] ], [ [[TAG]], [[ENTRY:%.*]] ]
-// APPROX-NEXT:    [[__R_0_I16_I_I:%.*]] = phi i64 [ [[__R_1_I22_I_I:%.*]], [[CLEANUP_I20_I_I]] ], [ 0, [[ENTRY]] ]
-// APPROX-NEXT:    [[TMP8:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[TBAA4]]
-// APPROX-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP8]], 0
-// APPROX-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I18_I_I:%.*]]
+// APPROX-NEXT:    [[SUB_I_I_I]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
+// APPROX-NEXT:    [[INCDEC_PTR_I_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I15]], i64 1
+// APPROX-NEXT:    [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP12]], 0
+// APPROX-NEXT:    br i1 [[CMP_NOT_I_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]]
 // APPROX:       while.body.i18.i.i:
-// APPROX-NEXT:    [[TMP9:%.*]] = add i8 [[TMP8]], -48
-// APPROX-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP9]], 10
-// APPROX-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I24_I_I:%.*]], label [[CLEANUP_I20_I_I]]
-// APPROX:       if.then.i24.i.i:
-// APPROX-NEXT:    [[MUL_I25_I_I:%.*]] = mul i64 [[__R_0_I16_I_I]], 10
-// APPROX-NEXT:    [[CONV5_I26_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64
-// APPROX-NEXT:    [[ADD_I27_I_I:%.*]] = add i64 [[MUL_I25_I_I]], -48
-// APPROX-NEXT:    [[SUB_I28_I_I:%.*]] = add i64 [[ADD_I27_I_I]], [[CONV5_I26_I_I]]
-// APPROX-NEXT:    [[INCDEC_PTR_I29_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I]], i64 1
-// APPROX-NEXT:    br label [[CLEANUP_I20_I_I]]
-// APPROX:       cleanup.i20.i.i:
-// APPROX-NEXT:    [[__TAGP_ADDR_1_I21_I_I]] = phi ptr [ [[INCDEC_PTR_I29_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__TAGP_ADDR_0_I15_I_I]], [[WHILE_BODY_I18_I_I]] ]
-// APPROX-NEXT:    [[__R_1_I22_I_I]] = phi i64 [ [[SUB_I28_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_BODY_I18_I_I]] ]
-// APPROX-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[WHILE_COND_I14_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP10]]
+// APPROX-NEXT:    [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], [[IF_THEN_I21_I_I:%.*]] ], [ [[TMP1]], [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// APPROX-NEXT:    [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// APPROX-NEXT:    [[__TAGP_ADDR_0_I15_I_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ [[TAG]], [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// APPROX-NEXT:    [[TMP14:%.*]] = add i8 [[TMP13]], -48
+// APPROX-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP14]], 10
+// APPROX-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I21_I_I]], label [[_ZL4NANFPKC_EXIT]]
+// APPROX:       if.then.i21.i.i:
+// APPROX-NEXT:    [[MUL_I22_I_I:%.*]] = mul i64 [[__R_0_I16_I_I7]], 10
+// APPROX-NEXT:    [[CONV5_I23_I_I:%.*]] = zext nneg i8 [[TMP13]] to i64
+// APPROX-NEXT:    [[ADD_I24_I_I:%.*]] = add i64 [[MUL_I22_I_I]], -48
+// APPROX-NEXT:    [[SUB_I25_I_I]] = add i64 [[ADD_I24_I_I]], [[CONV5_I23_I_I]]
+// APPROX-NEXT:    [[INCDEC_PTR_I26_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I6]], i64 1
+// APPROX-NEXT:    [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP15]], 0
+// APPROX-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]]
 // APPROX:       _ZL4nanfPKc.exit:
-// APPROX-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[CLEANUP_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_COND_I_I_I]] ], [ 0, [[CLEANUP_I36_I_I]] ], [ [[__R_0_I32_I_I]], [[WHILE_COND_I30_I_I]] ], [ 0, [[CLEANUP_I20_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_COND_I14_I_I]] ]
+// APPROX-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, [[IF_THEN5_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ 0, [[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ]
 // APPROX-NEXT:    [[CONV_I:%.*]] = trunc i64 [[RETVAL_0_I_I]] to i32
 // APPROX-NEXT:    [[BF_VALUE_I:%.*]] = and i32 [[CONV_I]], 4194303
 // APPROX-NEXT:    [[BF_SET9_I:%.*]] = or disjoint i32 [[BF_VALUE_I]], 2143289344
-// APPROX-NEXT:    [[TMP10:%.*]] = bitcast i32 [[BF_SET9_I]] to float
-// APPROX-NEXT:    ret float [[TMP10]]
+// APPROX-NEXT:    [[TMP16:%.*]] = bitcast i32 [[BF_SET9_I]] to float
+// APPROX-NEXT:    ret float [[TMP16]]
 //
 // NCRDIV-LABEL: @test_nanf(
 // NCRDIV-NEXT:  entry:
 // NCRDIV-NEXT:    [[TMP0:%.*]] = load i8, ptr [[TAG:%.*]], align 1, !tbaa [[TBAA4]]
 // NCRDIV-NEXT:    [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48
-// NCRDIV-NEXT:    br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I:%.*]]
+// NCRDIV-NEXT:    br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I_PREHEADER:%.*]]
+// NCRDIV:       while.cond.i14.i.i.preheader:
+// NCRDIV-NEXT:    [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    [[CMP_NOT_I17_I_I5:%.*]] = icmp eq i8 [[TMP1]], 0
+// NCRDIV-NEXT:    br i1 [[CMP_NOT_I17_I_I5]], label [[_ZL4NANFPKC_EXIT:%.*]], label [[WHILE_BODY_I18_I_I:%.*]]
 // NCRDIV:       if.then.i.i:
 // NCRDIV-NEXT:    [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TAG]], i64 1
-// NCRDIV-NEXT:    [[TMP1:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
-// NCRDIV-NEXT:    switch i8 [[TMP1]], label [[WHILE_COND_I_I_I:%.*]] [
-// NCRDIV-NEXT:      i8 120, label [[WHILE_COND_I30_I_I_PREHEADER:%.*]]
-// NCRDIV-NEXT:      i8 88, label [[WHILE_COND_I30_I_I_PREHEADER]]
+// NCRDIV-NEXT:    [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    switch i8 [[TMP2]], label [[WHILE_COND_I_I_I_PREHEADER:%.*]] [
+// NCRDIV-NEXT:      i8 120, label [[IF_THEN5_I_I:%.*]]
+// NCRDIV-NEXT:      i8 88, label [[IF_THEN5_I_I]]
 // NCRDIV-NEXT:    ]
-// NCRDIV:       while.cond.i30.i.i.preheader:
-// NCRDIV-NEXT:    br label [[WHILE_COND_I30_I_I:%.*]]
-// NCRDIV:       while.cond.i30.i.i:
-// NCRDIV-NEXT:    [[__TAGP_ADDR_0_I31_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I37_I_I:%.*]], [[CLEANUP_I36_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I30_I_I_PREHEADER]] ]
-// NCRDIV-NEXT:    [[__R_0_I32_I_I:%.*]] = phi i64 [ [[__R_2_I_I_I:%.*]], [[CLEANUP_I36_I_I]] ], [ 0, [[WHILE_COND_I30_I_I_PREHEADER]] ]
-// NCRDIV-NEXT:    [[TMP2:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I31_I_I]], align 1, !tbaa [[TBAA4]]
-// NCRDIV-NEXT:    [[CMP_NOT_I33_I_I:%.*]] = icmp eq i8 [[TMP2]], 0
-// NCRDIV-NEXT:    br i1 [[CMP_NOT_I33_I_I]], label [[_ZL4NANFPKC_EXIT:%.*]], label [[WHILE_BODY_I34_I_I:%.*]]
-// NCRDIV:       while.body.i34.i.i:
-// NCRDIV-NEXT:    [[TMP3:%.*]] = add i8 [[TMP2]], -48
-// NCRDIV-NEXT:    [[OR_COND_I35_I_I:%.*]] = icmp ult i8 [[TMP3]], 10
-// NCRDIV-NEXT:    br i1 [[OR_COND_I35_I_I]], label [[IF_END31_I_I_I:%.*]], label [[IF_ELSE_I_I_I:%.*]]
+// NCRDIV:       while.cond.i.i.i.preheader:
+// NCRDIV-NEXT:    [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    [[CMP_NOT_I_I_I14:%.*]] = icmp eq i8 [[TMP3]], 0
+// NCRDIV-NEXT:    br i1 [[CMP_NOT_I_I_I14]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]]
+// NCRDIV:       if.then5.i.i:
+// NCRDIV-NEXT:    [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    [[CMP_NOT_I30_I_I9:%.*]] = icmp eq i8 [[TMP4]], 0
+// NCRDIV-NEXT:    br i1 [[CMP_NOT_I30_I_I9]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I31_I_I:%.*]]
+// NCRDIV:       while.body.i31.i.i:
+// NCRDIV-NEXT:    [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], [[IF_END31_I_I_I:%.*]] ], [ [[TMP4]], [[IF_THEN5_I_I]] ]
+// NCRDIV-NEXT:    [[__R_0_I29_I_I11:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], [[IF_END31_I_I_I]] ], [ 0, [[IF_THEN5_I_I]] ]
+// NCRDIV-NEXT:    [[__TAGP_ADDR_0_I28_I_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I_I:%.*]], [[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN5_I_I]] ]
+// NCRDIV-NEXT:    [[TMP6:%.*]] = add i8 [[TMP5]], -48
+// NCRDIV-NEXT:    [[OR_COND_I32_I_I:%.*]] = icmp ult i8 [[TMP6]], 10
+// NCRDIV-NEXT:    br i1 [[OR_COND_I32_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE_I_I_I:%.*]]
 // NCRDIV:       if.else.i.i.i:
-// NCRDIV-NEXT:    [[TMP4:%.*]] = add i8 [[TMP2]], -97
-// NCRDIV-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP4]], 6
+// NCRDIV-NEXT:    [[TMP7:%.*]] = add i8 [[TMP5]], -97
+// NCRDIV-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP7]], 6
 // NCRDIV-NEXT:    br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]]
 // NCRDIV:       if.else17.i.i.i:
-// NCRDIV-NEXT:    [[TMP5:%.*]] = add i8 [[TMP2]], -65
-// NCRDIV-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP5]], 6
-// NCRDIV-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[CLEANUP_I36_I_I]]
+// NCRDIV-NEXT:    [[TMP8:%.*]] = add i8 [[TMP5]], -65
+// NCRDIV-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP8]], 6
+// NCRDIV-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[_ZL4NANFPKC_EXIT]]
 // NCRDIV:       if.end31.i.i.i:
-// NCRDIV-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I34_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ]
-// NCRDIV-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I32_I_I]], 4
-// NCRDIV-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64
+// NCRDIV-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I31_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ]
+// NCRDIV-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I29_I_I11]], 4
+// NCRDIV-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP5]] to i64
 // NCRDIV-NEXT:    [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]]
-// NCRDIV-NEXT:    [[ADD28_I_I_I:%.*]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
-// NCRDIV-NEXT:    [[INCDEC_PTR_I40_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I31_I_I]], i64 1
-// NCRDIV-NEXT:    br label [[CLEANUP_I36_I_I]]
-// NCRDIV:       cleanup.i36.i.i:
-// NCRDIV-NEXT:    [[__TAGP_ADDR_1_I37_I_I]] = phi ptr [ [[INCDEC_PTR_I40_I_I]], [[IF_END31_I_I_I]] ], [ [[__TAGP_ADDR_0_I31_I_I]], [[IF_ELSE17_I_I_I]] ]
-// NCRDIV-NEXT:    [[__R_2_I_I_I]] = phi i64 [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ [[__R_0_I32_I_I]], [[IF_ELSE17_I_I_I]] ]
-// NCRDIV-NEXT:    [[COND_I_I_I:%.*]] = phi i1 [ true, [[IF_END31_I_I_I]] ], [ false, [[IF_ELSE17_I_I_I]] ]
-// NCRDIV-NEXT:    br i1 [[COND_I_I_I]], label [[WHILE_COND_I30_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP11]]
-// NCRDIV:       while.cond.i.i.i:
-// NCRDIV-NEXT:    [[__TAGP_ADDR_0_I_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I_I_I:%.*]], [[CLEANUP_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN_I_I]] ]
-// NCRDIV-NEXT:    [[__R_0_I_I_I:%.*]] = phi i64 [ [[__R_1_I_I_I:%.*]], [[CLEANUP_I_I_I]] ], [ 0, [[IF_THEN_I_I]] ]
-// NCRDIV-NEXT:    [[TMP6:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[TBAA4]]
-// NCRDIV-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP6]], 0
-// NCRDIV-NEXT:    br i1 [[CMP_NOT_I_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]]
+// NCRDIV-NEXT:    [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
+// NCRDIV-NEXT:    [[INCDEC_PTR_I34_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I28_I_I10]], i64 1
+// NCRDIV-NEXT:    [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    [[CMP_NOT_I30_I_I:%.*]] = icmp eq i8 [[TMP9]], 0
+// NCRDIV-NEXT:    br i1 [[CMP_NOT_I30_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]]
 // NCRDIV:       while.body.i.i.i:
-// NCRDIV-NEXT:    [[TMP7:%.*]] = and i8 [[TMP6]], -8
-// NCRDIV-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP7]], 48
-// NCRDIV-NEXT:    br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I:%.*]], label [[CLEANUP_I_I_I]]
+// NCRDIV-NEXT:    [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], [[IF_THEN_I_I_I:%.*]] ], [ [[TMP3]], [[WHILE_COND_I_I_I_PREHEADER]] ]
+// NCRDIV-NEXT:    [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ]
+// NCRDIV-NEXT:    [[__TAGP_ADDR_0_I_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I_I_I_PREHEADER]] ]
+// NCRDIV-NEXT:    [[TMP11:%.*]] = and i8 [[TMP10]], -8
+// NCRDIV-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP11]], 48
+// NCRDIV-NEXT:    br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I]], label [[_ZL4NANFPKC_EXIT]]
 // NCRDIV:       if.then.i.i.i:
-// NCRDIV-NEXT:    [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I]], 3
-// NCRDIV-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64
+// NCRDIV-NEXT:    [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I16]], 3
+// NCRDIV-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP10]] to i64
 // NCRDIV-NEXT:    [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48
-// NCRDIV-NEXT:    [[SUB_I_I_I:%.*]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
-// NCRDIV-NEXT:    [[INCDEC_PTR_I_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I]], i64 1
-// NCRDIV-NEXT:    br label [[CLEANUP_I_I_I]]
-// NCRDIV:       cleanup.i.i.i:
-// NCRDIV-NEXT:    [[__TAGP_ADDR_1_I_I_I]] = phi ptr [ [[INCDEC_PTR_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__TAGP_ADDR_0_I_I_I]], [[WHILE_BODY_I_I_I]] ]
-// NCRDIV-NEXT:    [[__R_1_I_I_I]] = phi i64 [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_BODY_I_I_I]] ]
-// NCRDIV-NEXT:    br i1 [[OR_COND_I_I_I]], label [[WHILE_COND_I_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP7]]
-// NCRDIV:       while.cond.i14.i.i:
-// NCRDIV-NEXT:    [[__TAGP_ADDR_0_I15_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I21_I_I:%.*]], [[CLEANUP_I20_I_I:%.*]] ], [ [[TAG]], [[ENTRY:%.*]] ]
-// NCRDIV-NEXT:    [[__R_0_I16_I_I:%.*]] = phi i64 [ [[__R_1_I22_I_I:%.*]], [[CLEANUP_I20_I_I]] ], [ 0, [[ENTRY]] ]
-// NCRDIV-NEXT:    [[TMP8:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[TBAA4]]
-// NCRDIV-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP8]], 0
-// NCRDIV-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I18_I_I:%.*]]
+// NCRDIV-NEXT:    [[SUB_I_I_I]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
+// NCRDIV-NEXT:    [[INCDEC_PTR_I_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I15]], i64 1
+// NCRDIV-NEXT:    [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP12]], 0
+// NCRDIV-NEXT:    br i1 [[CMP_NOT_I_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]]
 // NCRDIV:       while.body.i18.i.i:
-// NCRDIV-NEXT:    [[TMP9:%.*]] = add i8 [[TMP8]], -48
-// NCRDIV-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP9]], 10
-// NCRDIV-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I24_I_I:%.*]], label [[CLEANUP_I20_I_I]]
-// NCRDIV:       if.then.i24.i.i:
-// NCRDIV-NEXT:    [[MUL_I25_I_I:%.*]] = mul i64 [[__R_0_I16_I_I]], 10
-// NCRDIV-NEXT:    [[CONV5_I26_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64
-// NCRDIV-NEXT:    [[ADD_I27_I_I:%.*]] = add i64 [[MUL_I25_I_I]], -48
-// NCRDIV-NEXT:    [[SUB_I28_I_I:%.*]] = add i64 [[ADD_I27_I_I]], [[CONV5_I26_I_I]]
-// NCRDIV-NEXT:    [[INCDEC_PTR_I29_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I]], i64 1
-// NCRDIV-NEXT:    br label [[CLEANUP_I20_I_I]]
-// NCRDIV:       cleanup.i20.i.i:
-// NCRDIV-NEXT:    [[__TAGP_ADDR_1_I21_I_I]] = phi ptr [ [[INCDEC_PTR_I29_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__TAGP_ADDR_0_I15_I_I]], [[WHILE_BODY_I18_I_I]] ]
-// NCRDIV-NEXT:    [[__R_1_I22_I_I]] = phi i64 [ [[SUB_I28_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_BODY_I18_I_I]] ]
-// NCRDIV-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[WHILE_COND_I14_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP10]]
+// NCRDIV-NEXT:    [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], [[IF_THEN_I21_I_I:%.*]] ], [ [[TMP1]], [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// NCRDIV-NEXT:    [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// NCRDIV-NEXT:    [[__TAGP_ADDR_0_I15_I_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ [[TAG]], [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// NCRDIV-NEXT:    [[TMP14:%.*]] = add i8 [[TMP13]], -48
+// NCRDIV-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP14]], 10
+// NCRDIV-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I21_I_I]], label [[_ZL4NANFPKC_EXIT]]
+// NCRDIV:       if.then.i21.i.i:
+// NCRDIV-NEXT:    [[MUL_I22_I_I:%.*]] = mul i64 [[__R_0_I16_I_I7]], 10
+// NCRDIV-NEXT:    [[CONV5_I23_I_I:%.*]] = zext nneg i8 [[TMP13]] to i64
+// NCRDIV-NEXT:    [[ADD_I24_I_I:%.*]] = add i64 [[MUL_I22_I_I]], -48
+// NCRDIV-NEXT:    [[SUB_I25_I_I]] = add i64 [[ADD_I24_I_I]], [[CONV5_I23_I_I]]
+// NCRDIV-NEXT:    [[INCDEC_PTR_I26_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I6]], i64 1
+// NCRDIV-NEXT:    [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP15]], 0
+// NCRDIV-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]]
 // NCRDIV:       _ZL4nanfPKc.exit:
-// NCRDIV-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[CLEANUP_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_COND_I_I_I]] ], [ 0, [[CLEANUP_I36_I_I]] ], [ [[__R_0_I32_I_I]], [[WHILE_COND_I30_I_I]] ], [ 0, [[CLEANUP_I20_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_COND_I14_I_I]] ]
+// NCRDIV-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, [[IF_THEN5_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ 0, [[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ]
 // NCRDIV-NEXT:    [[CONV_I:%.*]] = trunc i64 [[RETVAL_0_I_I]] to i32
 // NCRDIV-NEXT:    [[BF_VALUE_I:%.*]] = and i32 [[CONV_I]], 4194303
 // NCRDIV-NEXT:    [[BF_SET9_I:%.*]] = or disjoint i32 [[BF_VALUE_I]], 2143289344
-// NCRDIV-NEXT:    [[TMP10:%.*]] = bitcast i32 [[BF_SET9_I]] to float
-// NCRDIV-NEXT:    ret float [[TMP10]]
+// NCRDIV-NEXT:    [[TMP16:%.*]] = bitcast i32 [[BF_SET9_I]] to float
+// NCRDIV-NEXT:    ret float [[TMP16]]
 //
 // AMDGCNSPIRV-LABEL: @test_nanf(
 // AMDGCNSPIRV-NEXT:  entry:
@@ -4065,53 +4023,49 @@ extern "C" __device__ double test_modf(double x, double* y) {
 // AMDGCNSPIRV-NEXT:    [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TAG]], i64 1
 // AMDGCNSPIRV-NEXT:    [[TMP1:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA5]]
 // AMDGCNSPIRV-NEXT:    switch i8 [[TMP1]], label [[WHILE_COND_I_I_I:%.*]] [
-// AMDGCNSPIRV-NEXT:      i8 120, label [[WHILE_COND_I28_I_I_PREHEADER:%.*]]
-// AMDGCNSPIRV-NEXT:      i8 88, label [[WHILE_COND_I28_I_I_PREHEADER]]
+// AMDGCNSPIRV-NEXT:      i8 120, label [[IF_THEN5_I_I:%.*]]
+// AMDGCNSPIRV-NEXT:      i8 88, label [[IF_THEN5_I_I]]
 // AMDGCNSPIRV-NEXT:    ]
-// AMDGCNSPIRV:       while.cond.i28.i.i.preheader:
-// AMDGCNSPIRV-NEXT:    br label [[WHILE_COND_I28_I_I:%.*]]
-// AMDGCNSPIRV:       while.cond.i28.i.i:
-// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I29_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I34_I_I:%.*]], [[CLEANUP_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I28_I_I_PREHEADER]] ]
-// AMDGCNSPIRV-NEXT:    [[__R_0_I30_I_I:%.*]] = phi i64 [ [[__R_2_I_I_I:%.*]], [[CLEANUP_I_I_I]] ], [ 0, [[WHILE_COND_I28_I_I_PREHEADER]] ]
-// AMDGCNSPIRV-NEXT:    [[TMP2:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I_I]], align 1, !tbaa [[TBAA5]]
-// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I31_I_I:%.*]] = icmp eq i8 [[TMP2]], 0
-// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I31_I_I]], label [[_ZL4NANFPKC_EXIT:%.*]], label [[WHILE_BODY_I32_I_I:%.*]]
+// AMDGCNSPIRV:       if.then5.i.i:
+// AMDGCNSPIRV-NEXT:    [[TMP2:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I31_I_I5:%.*]] = icmp eq i8 [[TMP2]], 0
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I31_I_I5]], label [[_ZL4NANFPKC_EXIT:%.*]], label [[WHILE_BODY_I32_I_I:%.*]]
 // AMDGCNSPIRV:       while.body.i32.i.i:
-// AMDGCNSPIRV-NEXT:    [[TMP3:%.*]] = add i8 [[TMP2]], -48
-// AMDGCNSPIRV-NEXT:    [[OR_COND_I33_I_I:%.*]] = icmp ult i8 [[TMP3]], 10
-// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I33_I_I]], label [[IF_END31_I_I_I:%.*]], label [[IF_ELSE_I_I_I:%.*]]
+// AMDGCNSPIRV-NEXT:    [[TMP3:%.*]] = phi i8 [ [[TMP7:%.*]], [[IF_END31_I_I_I:%.*]] ], [ [[TMP2]], [[IF_THEN5_I_I]] ]
+// AMDGCNSPIRV-NEXT:    [[__R_0_I30_I_I7:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], [[IF_END31_I_I_I]] ], [ 0, [[IF_THEN5_I_I]] ]
+// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I29_I_I6:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I36_I_I:%.*]], [[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN5_I_I]] ]
+// AMDGCNSPIRV-NEXT:    [[TMP4:%.*]] = add i8 [[TMP3]], -48
+// AMDGCNSPIRV-NEXT:    [[OR_COND_I33_I_I:%.*]] = icmp ult i8 [[TMP4]], 10
+// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I33_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE_I_I_I:%.*]]
 // AMDGCNSPIRV:       if.else.i.i.i:
-// AMDGCNSPIRV-NEXT:    [[TMP4:%.*]] = add i8 [[TMP2]], -97
-// AMDGCNSPIRV-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP4]], 6
+// AMDGCNSPIRV-NEXT:    [[TMP5:%.*]] = add i8 [[TMP3]], -97
+// AMDGCNSPIRV-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP5]], 6
 // AMDGCNSPIRV-NEXT:    br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]]
 // AMDGCNSPIRV:       if.else17.i.i.i:
-// AMDGCNSPIRV-NEXT:    [[TMP5:%.*]] = add i8 [[TMP2]], -65
-// AMDGCNSPIRV-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP5]], 6
-// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[CLEANUP_I_I_I]]
+// AMDGCNSPIRV-NEXT:    [[TMP6:%.*]] = add i8 [[TMP3]], -65
+// AMDGCNSPIRV-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP6]], 6
+// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[_ZL4NANFPKC_EXIT]]
 // AMDGCNSPIRV:       if.end31.i.i.i:
 // AMDGCNSPIRV-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I32_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ]
-// AMDGCNSPIRV-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I30_I_I]], 4
-// AMDGCNSPIRV-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64
+// AMDGCNSPIRV-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I30_I_I7]], 4
+// AMDGCNSPIRV-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP3]] to i64
 // AMDGCNSPIRV-NEXT:    [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]]
-// AMDGCNSPIRV-NEXT:    [[ADD28_I_I_I:%.*]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
-// AMDGCNSPIRV-NEXT:    [[INCDEC_PTR_I37_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I_I]], i64 1
-// AMDGCNSPIRV-NEXT:    br label [[CLEANUP_I_I_I]]
-// AMDGCNSPIRV:       cleanup.i.i.i:
-// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_1_I34_I_I]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I37_I_I]], [[IF_END31_I_I_I]] ], [ [[__TAGP_ADDR_0_I29_I_I]], [[IF_ELSE17_I_I_I]] ]
-// AMDGCNSPIRV-NEXT:    [[__R_2_I_I_I]] = phi i64 [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ [[__R_0_I30_I_I]], [[IF_ELSE17_I_I_I]] ]
-// AMDGCNSPIRV-NEXT:    [[COND_I_I_I:%.*]] = phi i1 [ true, [[IF_END31_I_I_I]] ], [ false, [[IF_ELSE17_I_I_I]] ]
-// AMDGCNSPIRV-NEXT:    br i1 [[COND_I_I_I]], label [[WHILE_COND_I28_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP12]]
+// AMDGCNSPIRV-NEXT:    [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
+// AMDGCNSPIRV-NEXT:    [[INCDEC_PTR_I36_I_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I_I6]], i64 1
+// AMDGCNSPIRV-NEXT:    [[TMP7]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I36_I_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I31_I_I:%.*]] = icmp eq i8 [[TMP7]], 0
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I31_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I32_I_I]], !llvm.loop [[LOOP12]]
 // AMDGCNSPIRV:       while.cond.i.i.i:
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I_I_I:%.*]], [[WHILE_BODY_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN_I_I]] ]
 // AMDGCNSPIRV-NEXT:    [[__R_0_I_I_I:%.*]] = phi i64 [ [[__R_1_I_I_I:%.*]], [[WHILE_BODY_I_I_I]] ], [ 0, [[IF_THEN_I_I]] ]
-// AMDGCNSPIRV-NEXT:    [[TMP6:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[TBAA5]]
-// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP6]], 0
+// AMDGCNSPIRV-NEXT:    [[TMP8:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP8]], 0
 // AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I]]
 // AMDGCNSPIRV:       while.body.i.i.i:
-// AMDGCNSPIRV-NEXT:    [[TMP7:%.*]] = and i8 [[TMP6]], -8
-// AMDGCNSPIRV-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP7]], 48
+// AMDGCNSPIRV-NEXT:    [[TMP9:%.*]] = and i8 [[TMP8]], -8
+// AMDGCNSPIRV-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP9]], 48
 // AMDGCNSPIRV-NEXT:    [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I]], 3
-// AMDGCNSPIRV-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64
+// AMDGCNSPIRV-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64
 // AMDGCNSPIRV-NEXT:    [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48
 // AMDGCNSPIRV-NEXT:    [[SUB_I_I_I:%.*]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_1_I_I_I_IDX:%.*]] = zext i1 [[OR_COND_I_I_I]] to i64
@@ -4121,14 +4075,14 @@ extern "C" __device__ double test_modf(double x, double* y) {
 // AMDGCNSPIRV:       while.cond.i14.i.i:
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I15_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I25_I_I:%.*]], [[WHILE_BODY_I18_I_I:%.*]] ], [ [[TAG]], [[ENTRY:%.*]] ]
 // AMDGCNSPIRV-NEXT:    [[__R_0_I16_I_I:%.*]] = phi i64 [ [[__R_1_I26_I_I:%.*]], [[WHILE_BODY_I18_I_I]] ], [ 0, [[ENTRY]] ]
-// AMDGCNSPIRV-NEXT:    [[TMP8:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[TBAA5]]
-// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP8]], 0
+// AMDGCNSPIRV-NEXT:    [[TMP10:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP10]], 0
 // AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I18_I_I]]
 // AMDGCNSPIRV:       while.body.i18.i.i:
-// AMDGCNSPIRV-NEXT:    [[TMP9:%.*]] = add i8 [[TMP8]], -48
-// AMDGCNSPIRV-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP9]], 10
+// AMDGCNSPIRV-NEXT:    [[TMP11:%.*]] = add i8 [[TMP10]], -48
+// AMDGCNSPIRV-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP11]], 10
 // AMDGCNSPIRV-NEXT:    [[MUL_I20_I_I:%.*]] = mul i64 [[__R_0_I16_I_I]], 10
-// AMDGCNSPIRV-NEXT:    [[CONV5_I21_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64
+// AMDGCNSPIRV-NEXT:    [[CONV5_I21_I_I:%.*]] = zext nneg i8 [[TMP10]] to i64
 // AMDGCNSPIRV-NEXT:    [[ADD_I22_I_I:%.*]] = add i64 [[MUL_I20_I_I]], -48
 // AMDGCNSPIRV-NEXT:    [[SUB_I23_I_I:%.*]] = add i64 [[ADD_I22_I_I]], [[CONV5_I21_I_I]]
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_1_I25_I_I_IDX:%.*]] = zext i1 [[OR_COND_I19_I_I]] to i64
@@ -4136,12 +4090,12 @@ extern "C" __device__ double test_modf(double x, double* y) {
 // AMDGCNSPIRV-NEXT:    [[__R_1_I26_I_I]] = select i1 [[OR_COND_I19_I_I]], i64 [[SUB_I23_I_I]], i64 [[__R_0_I16_I_I]]
 // AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[WHILE_COND_I14_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP11]]
 // AMDGCNSPIRV:       _ZL4nanfPKc.exit:
-// AMDGCNSPIRV-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[WHILE_BODY_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_COND_I_I_I]] ], [ 0, [[CLEANUP_I_I_I]] ], [ [[__R_0_I30_I_I]], [[WHILE_COND_I28_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_COND_I14_I_I]] ]
+// AMDGCNSPIRV-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[IF_THEN5_I_I]] ], [ 0, [[WHILE_BODY_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_COND_I_I_I]] ], [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ 0, [[IF_ELSE17_I_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_COND_I14_I_I]] ]
 // AMDGCNSPIRV-NEXT:    [[CONV_I:%.*]] = trunc i64 [[RETVAL_0_I_I]] to i32
 // AMDGCNSPIRV-NEXT:    [[BF_VALUE_I:%.*]] = and i32 [[CONV_I]], 4194303
 // AMDGCNSPIRV-NEXT:    [[BF_SET9_I:%.*]] = or disjoint i32 [[BF_VALUE_I]], 2143289344
-// AMDGCNSPIRV-NEXT:    [[TMP10:%.*]] = bitcast i32 [[BF_SET9_I]] to float
-// AMDGCNSPIRV-NEXT:    ret float [[TMP10]]
+// AMDGCNSPIRV-NEXT:    [[TMP12:%.*]] = bitcast i32 [[BF_SET9_I]] to float
+// AMDGCNSPIRV-NEXT:    ret float [[TMP12]]
 //
 extern "C" __device__ float test_nanf(const char *tag) {
   return nanf(tag);
@@ -4151,95 +4105,89 @@ extern "C" __device__ float test_nanf(const char *tag) {
 // DEFAULT-NEXT:  entry:
 // DEFAULT-NEXT:    [[TMP0:%.*]] = load i8, ptr [[TAG:%.*]], align 1, !tbaa [[TBAA4]]
 // DEFAULT-NEXT:    [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48
-// DEFAULT-NEXT:    br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I:%.*]]
+// DEFAULT-NEXT:    br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I_PREHEADER:%.*]]
+// DEFAULT:       while.cond.i14.i.i.preheader:
+// DEFAULT-NEXT:    [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    [[CMP_NOT_I17_I_I5:%.*]] = icmp eq i8 [[TMP1]], 0
+// DEFAULT-NEXT:    br i1 [[CMP_NOT_I17_I_I5]], label [[_ZL3NANPKC_EXIT:%.*]], label [[WHILE_BODY_I18_I_I:%.*]]
 // DEFAULT:       if.then.i.i:
 // DEFAULT-NEXT:    [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TAG]], i64 1
-// DEFAULT-NEXT:    [[TMP1:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
-// DEFAULT-NEXT:    switch i8 [[TMP1]], label [[WHILE_COND_I_I_I:%.*]] [
-// DEFAULT-NEXT:      i8 120, label [[WHILE_COND_I30_I_I_PREHEADER:%.*]]
-// DEFAULT-NEXT:      i8 88, label [[WHILE_COND_I30_I_I_PREHEADER]]
+// DEFAULT-NEXT:    [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    switch i8 [[TMP2]], label [[WHILE_COND_I_I_I_PREHEADER:%.*]] [
+// DEFAULT-NEXT:      i8 120, label [[IF_THEN5_I_I:%.*]]
+// DEFAULT-NEXT:      i8 88, label [[IF_THEN5_I_I]]
 // DEFAULT-NEXT:    ]
-// DEFAULT:       while.cond.i30.i.i.preheader:
-// DEFAULT-NEXT:    br label [[WHILE_COND_I30_I_I:%.*]]
-// DEFAULT:       while.cond.i30.i.i:
-// DEFAULT-NEXT:    [[__TAGP_ADDR_0_I31_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I37_I_I:%.*]], [[CLEANUP_I36_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I30_I_I_PREHEADER]] ]
-// DEFAULT-NEXT:    [[__R_0_I32_I_I:%.*]] = phi i64 [ [[__R_2_I_I_I:%.*]], [[CLEANUP_I36_I_I]] ], [ 0, [[WHILE_COND_I30_I_I_PREHEADER]] ]
-// DEFAULT-NEXT:    [[TMP2:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I31_I_I]], align 1, !tbaa [[TBAA4]]
-// DEFAULT-NEXT:    [[CMP_NOT_I33_I_I:%.*]] = icmp eq i8 [[TMP2]], 0
-// DEFAULT-NEXT:    br i1 [[CMP_NOT_I33_I_I]], label [[_ZL3NANPKC_EXIT:%.*]], label [[WHILE_BODY_I34_I_I:%.*]]
-// DEFAULT:       while.body.i34.i.i:
-// DEFAULT-NEXT:    [[TMP3:%.*]] = add i8 [[TMP2]], -48
-// DEFAULT-NEXT:    [[OR_COND_I35_I_I:%.*]] = icmp ult i8 [[TMP3]], 10
-// DEFAULT-NEXT:    br i1 [[OR_COND_I35_I_I]], label [[IF_END31_I_I_I:%.*]], label [[IF_ELSE_I_I_I:%.*]]
+// DEFAULT:       while.cond.i.i.i.preheader:
+// DEFAULT-NEXT:    [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    [[CMP_NOT_I_I_I14:%.*]] = icmp eq i8 [[TMP3]], 0
+// DEFAULT-NEXT:    br i1 [[CMP_NOT_I_I_I14]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]]
+// DEFAULT:       if.then5.i.i:
+// DEFAULT-NEXT:    [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    [[CMP_NOT_I30_I_I9:%.*]] = icmp eq i8 [[TMP4]], 0
+// DEFAULT-NEXT:    br i1 [[CMP_NOT_I30_I_I9]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I31_I_I:%.*]]
+// DEFAULT:       while.body.i31.i.i:
+// DEFAULT-NEXT:    [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], [[IF_END31_I_I_I:%.*]] ], [ [[TMP4]], [[IF_THEN5_I_I]] ]
+// DEFAULT-NEXT:    [[__R_0_I29_I_I11:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], [[IF_END31_I_I_I]] ], [ 0, [[IF_THEN5_I_I]] ]
+// DEFAULT-NEXT:    [[__TAGP_ADDR_0_I28_I_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I_I:%.*]], [[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN5_I_I]] ]
+// DEFAULT-NEXT:    [[TMP6:%.*]] = add i8 [[TMP5]], -48
+// DEFAULT-NEXT:    [[OR_COND_I32_I_I:%.*]] = icmp ult i8 [[TMP6]], 10
+// DEFAULT-NEXT:    br i1 [[OR_COND_I32_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE_I_I_I:%.*]]
 // DEFAULT:       if.else.i.i.i:
-// DEFAULT-NEXT:    [[TMP4:%.*]] = add i8 [[TMP2]], -97
-// DEFAULT-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP4]], 6
+// DEFAULT-NEXT:    [[TMP7:%.*]] = add i8 [[TMP5]], -97
+// DEFAULT-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP7]], 6
 // DEFAULT-NEXT:    br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]]
 // DEFAULT:       if.else17.i.i.i:
-// DEFAULT-NEXT:    [[TMP5:%.*]] = add i8 [[TMP2]], -65
-// DEFAULT-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP5]], 6
-// DEFAULT-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[CLEANUP_I36_I_I]]
+// DEFAULT-NEXT:    [[TMP8:%.*]] = add i8 [[TMP5]], -65
+// DEFAULT-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP8]], 6
+// DEFAULT-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[_ZL3NANPKC_EXIT]]
 // DEFAULT:       if.end31.i.i.i:
-// DEFAULT-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I34_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ]
-// DEFAULT-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I32_I_I]], 4
-// DEFAULT-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64
+// DEFAULT-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I31_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ]
+// DEFAULT-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I29_I_I11]], 4
+// DEFAULT-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP5]] to i64
 // DEFAULT-NEXT:    [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]]
-// DEFAULT-NEXT:    [[ADD28_I_I_I:%.*]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
-// DEFAULT-NEXT:    [[INCDEC_PTR_I40_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I31_I_I]], i64 1
-// DEFAULT-NEXT:    br label [[CLEANUP_I36_I_I]]
-// DEFAULT:       cleanup.i36.i.i:
-// DEFAULT-NEXT:    [[__TAGP_ADDR_1_I37_I_I]] = phi ptr [ [[INCDEC_PTR_I40_I_I]], [[IF_END31_I_I_I]] ], [ [[__TAGP_ADDR_0_I31_I_I]], [[IF_ELSE17_I_I_I]] ]
-// DEFAULT-NEXT:    [[__R_2_I_I_I]] = phi i64 [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ [[__R_0_I32_I_I]], [[IF_ELSE17_I_I_I]] ]
-// DEFAULT-NEXT:    [[COND_I_I_I:%.*]] = phi i1 [ true, [[IF_END31_I_I_I]] ], [ false, [[IF_ELSE17_I_I_I]] ]
-// DEFAULT-NEXT:    br i1 [[COND_I_I_I]], label [[WHILE_COND_I30_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP11]]
-// DEFAULT:       while.cond.i.i.i:
-// DEFAULT-NEXT:    [[__TAGP_ADDR_0_I_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I_I_I:%.*]], [[CLEANUP_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN_I_I]] ]
-// DEFAULT-NEXT:    [[__R_0_I_I_I:%.*]] = phi i64 [ [[__R_1_I_I_I:%.*]], [[CLEANUP_I_I_I]] ], [ 0, [[IF_THEN_I_I]] ]
-// DEFAULT-NEXT:    [[TMP6:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[TBAA4]]
-// DEFAULT-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP6]], 0
-// DEFAULT-NEXT:    br i1 [[CMP_NOT_I_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]]
+// DEFAULT-NEXT:    [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
+// DEFAULT-NEXT:    [[INCDEC_PTR_I34_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I28_I_I10]], i64 1
+// DEFAULT-NEXT:    [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    [[CMP_NOT_I30_I_I:%.*]] = icmp eq i8 [[TMP9]], 0
+// DEFAULT-NEXT:    br i1 [[CMP_NOT_I30_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]]
 // DEFAULT:       while.body.i.i.i:
-// DEFAULT-NEXT:    [[TMP7:%.*]] = and i8 [[TMP6]], -8
-// DEFAULT-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP7]], 48
-// DEFAULT-NEXT:    br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I:%.*]], label [[CLEANUP_I_I_I]]
+// DEFAULT-NEXT:    [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], [[IF_THEN_I_I_I:%.*]] ], [ [[TMP3]], [[WHILE_COND_I_I_I_PREHEADER]] ]
+// DEFAULT-NEXT:    [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ]
+// DEFAULT-NEXT:    [[__TAGP_ADDR_0_I_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I_I_I_PREHEADER]] ]
+// DEFAULT-NEXT:    [[TMP11:%.*]] = and i8 [[TMP10]], -8
+// DEFAULT-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP11]], 48
+// DEFAULT-NEXT:    br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I]], label [[_ZL3NANPKC_EXIT]]
 // DEFAULT:       if.then.i.i.i:
-// DEFAULT-NEXT:    [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I]], 3
-// DEFAULT-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64
+// DEFAULT-NEXT:    [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I16]], 3
+// DEFAULT-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP10]] to i64
 // DEFAULT-NEXT:    [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48
-// DEFAULT-NEXT:    [[SUB_I_I_I:%.*]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
-// DEFAULT-NEXT:    [[INCDEC_PTR_I_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I]], i64 1
-// DEFAULT-NEXT:    br label [[CLEANUP_I_I_I]]
-// DEFAULT:       cleanup.i.i.i:
-// DEFAULT-NEXT:    [[__TAGP_ADDR_1_I_I_I]] = phi ptr [ [[INCDEC_PTR_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__TAGP_ADDR_0_I_I_I]], [[WHILE_BODY_I_I_I]] ]
-// DEFAULT-NEXT:    [[__R_1_I_I_I]] = phi i64 [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_BODY_I_I_I]] ]
-// DEFAULT-NEXT:    br i1 [[OR_COND_I_I_I]], label [[WHILE_COND_I_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP7]]
-// DEFAULT:       while.cond.i14.i.i:
-// DEFAULT-NEXT:    [[__TAGP_ADDR_0_I15_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I21_I_I:%.*]], [[CLEANUP_I20_I_I:%.*]] ], [ [[TAG]], [[ENTRY:%.*]] ]
-// DEFAULT-NEXT:    [[__R_0_I16_I_I:%.*]] = phi i64 [ [[__R_1_I22_I_I:%.*]], [[CLEANUP_I20_I_I]] ], [ 0, [[ENTRY]] ]
-// DEFAULT-NEXT:    [[TMP8:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[TBAA4]]
-// DEFAULT-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP8]], 0
-// DEFAULT-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I18_I_I:%.*]]
+// DEFAULT-NEXT:    [[SUB_I_I_I]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
+// DEFAULT-NEXT:    [[INCDEC_PTR_I_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I15]], i64 1
+// DEFAULT-NEXT:    [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP12]], 0
+// DEFAULT-NEXT:    br i1 [[CMP_NOT_I_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]]
 // DEFAULT:       while.body.i18.i.i:
-// DEFAULT-NEXT:    [[TMP9:%.*]] = add i8 [[TMP8]], -48
-// DEFAULT-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP9]], 10
-// DEFAULT-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I24_I_I:%.*]], label [[CLEANUP_I20_I_I]]
-// DEFAULT:       if.then.i24.i.i:
-// DEFAULT-NEXT:    [[MUL_I25_I_I:%.*]] = mul i64 [[__R_0_I16_I_I]], 10
-// DEFAULT-NEXT:    [[CONV5_I26_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64
-// DEFAULT-NEXT:    [[ADD_I27_I_I:%.*]] = add i64 [[MUL_I25_I_I]], -48
-// DEFAULT-NEXT:    [[SUB_I28_I_I:%.*]] = add i64 [[ADD_I27_I_I]], [[CONV5_I26_I_I]]
-// DEFAULT-NEXT:    [[INCDEC_PTR_I29_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I]], i64 1
-// DEFAULT-NEXT:    br label [[CLEANUP_I20_I_I]]
-// DEFAULT:       cleanup.i20.i.i:
-// DEFAULT-NEXT:    [[__TAGP_ADDR_1_I21_I_I]] = phi ptr [ [[INCDEC_PTR_I29_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__TAGP_ADDR_0_I15_I_I]], [[WHILE_BODY_I18_I_I]] ]
-// DEFAULT-NEXT:    [[__R_1_I22_I_I]] = phi i64 [ [[SUB_I28_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_BODY_I18_I_I]] ]
-// DEFAULT-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[WHILE_COND_I14_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP10]]
+// DEFAULT-NEXT:    [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], [[IF_THEN_I21_I_I:%.*]] ], [ [[TMP1]], [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// DEFAULT-NEXT:    [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// DEFAULT-NEXT:    [[__TAGP_ADDR_0_I15_I_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ [[TAG]], [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// DEFAULT-NEXT:    [[TMP14:%.*]] = add i8 [[TMP13]], -48
+// DEFAULT-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP14]], 10
+// DEFAULT-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I21_I_I]], label [[_ZL3NANPKC_EXIT]]
+// DEFAULT:       if.then.i21.i.i:
+// DEFAULT-NEXT:    [[MUL_I22_I_I:%.*]] = mul i64 [[__R_0_I16_I_I7]], 10
+// DEFAULT-NEXT:    [[CONV5_I23_I_I:%.*]] = zext nneg i8 [[TMP13]] to i64
+// DEFAULT-NEXT:    [[ADD_I24_I_I:%.*]] = add i64 [[MUL_I22_I_I]], -48
+// DEFAULT-NEXT:    [[SUB_I25_I_I]] = add i64 [[ADD_I24_I_I]], [[CONV5_I23_I_I]]
+// DEFAULT-NEXT:    [[INCDEC_PTR_I26_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I6]], i64 1
+// DEFAULT-NEXT:    [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP15]], 0
+// DEFAULT-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]]
 // DEFAULT:       _ZL3nanPKc.exit:
-// DEFAULT-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[CLEANUP_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_COND_I_I_I]] ], [ 0, [[CLEANUP_I36_I_I]] ], [ [[__R_0_I32_I_I]], [[WHILE_COND_I30_I_I]] ], [ 0, [[CLEANUP_I20_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_COND_I14_I_I]] ]
+// DEFAULT-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, [[IF_THEN5_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ 0, [[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ]
 // DEFAULT-NEXT:    [[BF_VALUE_I:%.*]] = and i64 [[RETVAL_0_I_I]], 2251799813685247
 // DEFAULT-NEXT:    [[BF_SET9_I:%.*]] = or disjoint i64 [[BF_VALUE_I]], 9221120237041090560
-// DEFAULT-NEXT:    [[TMP10:%.*]] = bitcast i64 [[BF_SET9_I]] to double
-// DEFAULT-NEXT:    ret double [[TMP10]]
+// DEFAULT-NEXT:    [[TMP16:%.*]] = bitcast i64 [[BF_SET9_I]] to double
+// DEFAULT-NEXT:    ret double [[TMP16]]
 //
 // FINITEONLY-LABEL: @test_nan(
 // FINITEONLY-NEXT:  entry:
@@ -4249,189 +4197,177 @@ extern "C" __device__ float test_nanf(const char *tag) {
 // APPROX-NEXT:  entry:
 // APPROX-NEXT:    [[TMP0:%.*]] = load i8, ptr [[TAG:%.*]], align 1, !tbaa [[TBAA4]]
 // APPROX-NEXT:    [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48
-// APPROX-NEXT:    br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I:%.*]]
+// APPROX-NEXT:    br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I_PREHEADER:%.*]]
+// APPROX:       while.cond.i14.i.i.preheader:
+// APPROX-NEXT:    [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    [[CMP_NOT_I17_I_I5:%.*]] = icmp eq i8 [[TMP1]], 0
+// APPROX-NEXT:    br i1 [[CMP_NOT_I17_I_I5]], label [[_ZL3NANPKC_EXIT:%.*]], label [[WHILE_BODY_I18_I_I:%.*]]
 // APPROX:       if.then.i.i:
 // APPROX-NEXT:    [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TAG]], i64 1
-// APPROX-NEXT:    [[TMP1:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
-// APPROX-NEXT:    switch i8 [[TMP1]], label [[WHILE_COND_I_I_I:%.*]] [
-// APPROX-NEXT:      i8 120, label [[WHILE_COND_I30_I_I_PREHEADER:%.*]]
-// APPROX-NEXT:      i8 88, label [[WHILE_COND_I30_I_I_PREHEADER]]
+// APPROX-NEXT:    [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    switch i8 [[TMP2]], label [[WHILE_COND_I_I_I_PREHEADER:%.*]] [
+// APPROX-NEXT:      i8 120, label [[IF_THEN5_I_I:%.*]]
+// APPROX-NEXT:      i8 88, label [[IF_THEN5_I_I]]
 // APPROX-NEXT:    ]
-// APPROX:       while.cond.i30.i.i.preheader:
-// APPROX-NEXT:    br label [[WHILE_COND_I30_I_I:%.*]]
-// APPROX:       while.cond.i30.i.i:
-// APPROX-NEXT:    [[__TAGP_ADDR_0_I31_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I37_I_I:%.*]], [[CLEANUP_I36_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I30_I_I_PREHEADER]] ]
-// APPROX-NEXT:    [[__R_0_I32_I_I:%.*]] = phi i64 [ [[__R_2_I_I_I:%.*]], [[CLEANUP_I36_I_I]] ], [ 0, [[WHILE_COND_I30_I_I_PREHEADER]] ]
-// APPROX-NEXT:    [[TMP2:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I31_I_I]], align 1, !tbaa [[TBAA4]]
-// APPROX-NEXT:    [[CMP_NOT_I33_I_I:%.*]] = icmp eq i8 [[TMP2]], 0
-// APPROX-NEXT:    br i1 [[CMP_NOT_I33_I_I]], label [[_ZL3NANPKC_EXIT:%.*]], label [[WHILE_BODY_I34_I_I:%.*]]
-// APPROX:       while.body.i34.i.i:
-// APPROX-NEXT:    [[TMP3:%.*]] = add i8 [[TMP2]], -48
-// APPROX-NEXT:    [[OR_COND_I35_I_I:%.*]] = icmp ult i8 [[TMP3]], 10
-// APPROX-NEXT:    br i1 [[OR_COND_I35_I_I]], label [[IF_END31_I_I_I:%.*]], label [[IF_ELSE_I_I_I:%.*]]
+// APPROX:       while.cond.i.i.i.preheader:
+// APPROX-NEXT:    [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    [[CMP_NOT_I_I_I14:%.*]] = icmp eq i8 [[TMP3]], 0
+// APPROX-NEXT:    br i1 [[CMP_NOT_I_I_I14]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]]
+// APPROX:       if.then5.i.i:
+// APPROX-NEXT:    [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    [[CMP_NOT_I30_I_I9:%.*]] = icmp eq i8 [[TMP4]], 0
+// APPROX-NEXT:    br i1 [[CMP_NOT_I30_I_I9]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I31_I_I:%.*]]
+// APPROX:       while.body.i31.i.i:
+// APPROX-NEXT:    [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], [[IF_END31_I_I_I:%.*]] ], [ [[TMP4]], [[IF_THEN5_I_I]] ]
+// APPROX-NEXT:    [[__R_0_I29_I_I11:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], [[IF_END31_I_I_I]] ], [ 0, [[IF_THEN5_I_I]] ]
+// APPROX-NEXT:    [[__TAGP_ADDR_0_I28_I_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I_I:%.*]], [[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN5_I_I]] ]
+// APPROX-NEXT:    [[TMP6:%.*]] = add i8 [[TMP5]], -48
+// APPROX-NEXT:    [[OR_COND_I32_I_I:%.*]] = icmp ult i8 [[TMP6]], 10
+// APPROX-NEXT:    br i1 [[OR_COND_I32_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE_I_I_I:%.*]]
 // APPROX:       if.else.i.i.i:
-// APPROX-NEXT:    [[TMP4:%.*]] = add i8 [[TMP2]], -97
-// APPROX-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP4]], 6
+// APPROX-NEXT:    [[TMP7:%.*]] = add i8 [[TMP5]], -97
+// APPROX-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP7]], 6
 // APPROX-NEXT:    br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]]
 // APPROX:       if.else17.i.i.i:
-// APPROX-NEXT:    [[TMP5:%.*]] = add i8 [[TMP2]], -65
-// APPROX-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP5]], 6
-// APPROX-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[CLEANUP_I36_I_I]]
+// APPROX-NEXT:    [[TMP8:%.*]] = add i8 [[TMP5]], -65
+// APPROX-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP8]], 6
+// APPROX-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[_ZL3NANPKC_EXIT]]
 // APPROX:       if.end31.i.i.i:
-// APPROX-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I34_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ]
-// APPROX-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I32_I_I]], 4
-// APPROX-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64
+// APPROX-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I31_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ]
+// APPROX-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I29_I_I11]], 4
+// APPROX-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP5]] to i64
 // APPROX-NEXT:    [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]]
-// APPROX-NEXT:    [[ADD28_I_I_I:%.*]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
-// APPROX-NEXT:    [[INCDEC_PTR_I40_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I31_I_I]], i64 1
-// APPROX-NEXT:    br label [[CLEANUP_I36_I_I]]
-// APPROX:       cleanup.i36.i.i:
-// APPROX-NEXT:    [[__TAGP_ADDR_1_I37_I_I]] = phi ptr [ [[INCDEC_PTR_I40_I_I]], [[IF_END31_I_I_I]] ], [ [[__TAGP_ADDR_0_I31_I_I]], [[IF_ELSE17_I_I_I]] ]
-// APPROX-NEXT:    [[__R_2_I_I_I]] = phi i64 [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ [[__R_0_I32_I_I]], [[IF_ELSE17_I_I_I]] ]
-// APPROX-NEXT:    [[COND_I_I_I:%.*]] = phi i1 [ true, [[IF_END31_I_I_I]] ], [ false, [[IF_ELSE17_I_I_I]] ]
-// APPROX-NEXT:    br i1 [[COND_I_I_I]], label [[WHILE_COND_I30_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP11]]
-// APPROX:       while.cond.i.i.i:
-// APPROX-NEXT:    [[__TAGP_ADDR_0_I_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I_I_I:%.*]], [[CLEANUP_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN_I_I]] ]
-// APPROX-NEXT:    [[__R_0_I_I_I:%.*]] = phi i64 [ [[__R_1_I_I_I:%.*]], [[CLEANUP_I_I_I]] ], [ 0, [[IF_THEN_I_I]] ]
-// APPROX-NEXT:    [[TMP6:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[TBAA4]]
-// APPROX-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP6]], 0
-// APPROX-NEXT:    br i1 [[CMP_NOT_I_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]]
+// APPROX-NEXT:    [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
+// APPROX-NEXT:    [[INCDEC_PTR_I34_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I28_I_I10]], i64 1
+// APPROX-NEXT:    [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    [[CMP_NOT_I30_I_I:%.*]] = icmp eq i8 [[TMP9]], 0
+// APPROX-NEXT:    br i1 [[CMP_NOT_I30_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]]
 // APPROX:       while.body.i.i.i:
-// APPROX-NEXT:    [[TMP7:%.*]] = and i8 [[TMP6]], -8
-// APPROX-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP7]], 48
-// APPROX-NEXT:    br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I:%.*]], label [[CLEANUP_I_I_I]]
+// APPROX-NEXT:    [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], [[IF_THEN_I_I_I:%.*]] ], [ [[TMP3]], [[WHILE_COND_I_I_I_PREHEADER]] ]
+// APPROX-NEXT:    [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ]
+// APPROX-NEXT:    [[__TAGP_ADDR_0_I_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I_I_I_PREHEADER]] ]
+// APPROX-NEXT:    [[TMP11:%.*]] = and i8 [[TMP10]], -8
+// APPROX-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP11]], 48
+// APPROX-NEXT:    br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I]], label [[_ZL3NANPKC_EXIT]]
 // APPROX:       if.then.i.i.i:
-// APPROX-NEXT:    [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I]], 3
-// APPROX-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64
+// APPROX-NEXT:    [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I16]], 3
+// APPROX-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP10]] to i64
 // APPROX-NEXT:    [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48
-// APPROX-NEXT:    [[SUB_I_I_I:%.*]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
-// APPROX-NEXT:    [[INCDEC_PTR_I_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I]], i64 1
-// APPROX-NEXT:    br label [[CLEANUP_I_I_I]]
-// APPROX:       cleanup.i.i.i:
-// APPROX-NEXT:    [[__TAGP_ADDR_1_I_I_I]] = phi ptr [ [[INCDEC_PTR_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__TAGP_ADDR_0_I_I_I]], [[WHILE_BODY_I_I_I]] ]
-// APPROX-NEXT:    [[__R_1_I_I_I]] = phi i64 [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_BODY_I_I_I]] ]
-// APPROX-NEXT:    br i1 [[OR_COND_I_I_I]], label [[WHILE_COND_I_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP7]]
-// APPROX:       while.cond.i14.i.i:
-// APPROX-NEXT:    [[__TAGP_ADDR_0_I15_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I21_I_I:%.*]], [[CLEANUP_I20_I_I:%.*]] ], [ [[TAG]], [[ENTRY:%.*]] ]
-// APPROX-NEXT:    [[__R_0_I16_I_I:%.*]] = phi i64 [ [[__R_1_I22_I_I:%.*]], [[CLEANUP_I20_I_I]] ], [ 0, [[ENTRY]] ]
-// APPROX-NEXT:    [[TMP8:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[TBAA4]]
-// APPROX-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP8]], 0
-// APPROX-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I18_I_I:%.*]]
+// APPROX-NEXT:    [[SUB_I_I_I]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
+// APPROX-NEXT:    [[INCDEC_PTR_I_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I15]], i64 1
+// APPROX-NEXT:    [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP12]], 0
+// APPROX-NEXT:    br i1 [[CMP_NOT_I_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]]
 // APPROX:       while.body.i18.i.i:
-// APPROX-NEXT:    [[TMP9:%.*]] = add i8 [[TMP8]], -48
-// APPROX-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP9]], 10
-// APPROX-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I24_I_I:%.*]], label [[CLEANUP_I20_I_I]]
-// APPROX:       if.then.i24.i.i:
-// APPROX-NEXT:    [[MUL_I25_I_I:%.*]] = mul i64 [[__R_0_I16_I_I]], 10
-// APPROX-NEXT:    [[CONV5_I26_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64
-// APPROX-NEXT:    [[ADD_I27_I_I:%.*]] = add i64 [[MUL_I25_I_I]], -48
-// APPROX-NEXT:    [[SUB_I28_I_I:%.*]] = add i64 [[ADD_I27_I_I]], [[CONV5_I26_I_I]]
-// APPROX-NEXT:    [[INCDEC_PTR_I29_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I]], i64 1
-// APPROX-NEXT:    br label [[CLEANUP_I20_I_I]]
-// APPROX:       cleanup.i20.i.i:
-// APPROX-NEXT:    [[__TAGP_ADDR_1_I21_I_I]] = phi ptr [ [[INCDEC_PTR_I29_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__TAGP_ADDR_0_I15_I_I]], [[WHILE_BODY_I18_I_I]] ]
-// APPROX-NEXT:    [[__R_1_I22_I_I]] = phi i64 [ [[SUB_I28_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_BODY_I18_I_I]] ]
-// APPROX-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[WHILE_COND_I14_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP10]]
+// APPROX-NEXT:    [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], [[IF_THEN_I21_I_I:%.*]] ], [ [[TMP1]], [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// APPROX-NEXT:    [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// APPROX-NEXT:    [[__TAGP_ADDR_0_I15_I_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ [[TAG]], [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// APPROX-NEXT:    [[TMP14:%.*]] = add i8 [[TMP13]], -48
+// APPROX-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP14]], 10
+// APPROX-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I21_I_I]], label [[_ZL3NANPKC_EXIT]]
+// APPROX:       if.then.i21.i.i:
+// APPROX-NEXT:    [[MUL_I22_I_I:%.*]] = mul i64 [[__R_0_I16_I_I7]], 10
+// APPROX-NEXT:    [[CONV5_I23_I_I:%.*]] = zext nneg i8 [[TMP13]] to i64
+// APPROX-NEXT:    [[ADD_I24_I_I:%.*]] = add i64 [[MUL_I22_I_I]], -48
+// APPROX-NEXT:    [[SUB_I25_I_I]] = add i64 [[ADD_I24_I_I]], [[CONV5_I23_I_I]]
+// APPROX-NEXT:    [[INCDEC_PTR_I26_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I6]], i64 1
+// APPROX-NEXT:    [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP15]], 0
+// APPROX-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]]
 // APPROX:       _ZL3nanPKc.exit:
-// APPROX-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[CLEANUP_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_COND_I_I_I]] ], [ 0, [[CLEANUP_I36_I_I]] ], [ [[__R_0_I32_I_I]], [[WHILE_COND_I30_I_I]] ], [ 0, [[CLEANUP_I20_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_COND_I14_I_I]] ]
+// APPROX-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, [[IF_THEN5_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ 0, [[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ]
 // APPROX-NEXT:    [[BF_VALUE_I:%.*]] = and i64 [[RETVAL_0_I_I]], 2251799813685247
 // APPROX-NEXT:    [[BF_SET9_I:%.*]] = or disjoint i64 [[BF_VALUE_I]], 9221120237041090560
-// APPROX-NEXT:    [[TMP10:%.*]] = bitcast i64 [[BF_SET9_I]] to double
-// APPROX-NEXT:    ret double [[TMP10]]
+// APPROX-NEXT:    [[TMP16:%.*]] = bitcast i64 [[BF_SET9_I]] to double
+// APPROX-NEXT:    ret double [[TMP16]]
 //
 // NCRDIV-LABEL: @test_nan(
 // NCRDIV-NEXT:  entry:
 // NCRDIV-NEXT:    [[TMP0:%.*]] = load i8, ptr [[TAG:%.*]], align 1, !tbaa [[TBAA4]]
 // NCRDIV-NEXT:    [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48
-// NCRDIV-NEXT:    br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I:%.*]]
+// NCRDIV-NEXT:    br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I_PREHEADER:%.*]]
+// NCRDIV:       while.cond.i14.i.i.preheader:
+// NCRDIV-NEXT:    [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    [[CMP_NOT_I17_I_I5:%.*]] = icmp eq i8 [[TMP1]], 0
+// NCRDIV-NEXT:    br i1 [[CMP_NOT_I17_I_I5]], label [[_ZL3NANPKC_EXIT:%.*]], label [[WHILE_BODY_I18_I_I:%.*]]
 // NCRDIV:       if.then.i.i:
 // NCRDIV-NEXT:    [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TAG]], i64 1
-// NCRDIV-NEXT:    [[TMP1:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
-// NCRDIV-NEXT:    switch i8 [[TMP1]], label [[WHILE_COND_I_I_I:%.*]] [
-// NCRDIV-NEXT:      i8 120, label [[WHILE_COND_I30_I_I_PREHEADER:%.*]]
-// NCRDIV-NEXT:      i8 88, label [[WHILE_COND_I30_I_I_PREHEADER]]
+// NCRDIV-NEXT:    [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    switch i8 [[TMP2]], label [[WHILE_COND_I_I_I_PREHEADER:%.*]] [
+// NCRDIV-NEXT:      i8 120, label [[IF_THEN5_I_I:%.*]]
+// NCRDIV-NEXT:      i8 88, label [[IF_THEN5_I_I]]
 // NCRDIV-NEXT:    ]
-// NCRDIV:       while.cond.i30.i.i.preheader:
-// NCRDIV-NEXT:    br label [[WHILE_COND_I30_I_I:%.*]]
-// NCRDIV:       while.cond.i30.i.i:
-// NCRDIV-NEXT:    [[__TAGP_ADDR_0_I31_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I37_I_I:%.*]], [[CLEANUP_I36_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I30_I_I_PREHEADER]] ]
-// NCRDIV-NEXT:    [[__R_0_I32_I_I:%.*]] = phi i64 [ [[__R_2_I_I_I:%.*]], [[CLEANUP_I36_I_I]] ], [ 0, [[WHILE_COND_I30_I_I_PREHEADER]] ]
-// NCRDIV-NEXT:    [[TMP2:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I31_I_I]], align 1, !tbaa [[TBAA4]]
-// NCRDIV-NEXT:    [[CMP_NOT_I33_I_I:%.*]] = icmp eq i8 [[TMP2]], 0
-// NCRDIV-NEXT:    br i1 [[CMP_NOT_I33_I_I]], label [[_ZL3NANPKC_EXIT:%.*]], label [[WHILE_BODY_I34_I_I:%.*]]
-// NCRDIV:       while.body.i34.i.i:
-// NCRDIV-NEXT:    [[TMP3:%.*]] = add i8 [[TMP2]], -48
-// NCRDIV-NEXT:    [[OR_COND_I35_I_I:%.*]] = icmp ult i8 [[TMP3]], 10
-// NCRDIV-NEXT:    br i1 [[OR_COND_I35_I_I]], label [[IF_END31_I_I_I:%.*]], label [[IF_ELSE_I_I_I:%.*]]
+// NCRDIV:       while.cond.i.i.i.preheader:
+// NCRDIV-NEXT:    [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    [[CMP_NOT_I_I_I14:%.*]] = icmp eq i8 [[TMP3]], 0
+// NCRDIV-NEXT:    br i1 [[CMP_NOT_I_I_I14]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]]
+// NCRDIV:       if.then5.i.i:
+// NCRDIV-NEXT:    [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    [[CMP_NOT_I30_I_I9:%.*]] = icmp eq i8 [[TMP4]], 0
+// NCRDIV-NEXT:    br i1 [[CMP_NOT_I30_I_I9]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I31_I_I:%.*]]
+// NCRDIV:       while.body.i31.i.i:
+// NCRDIV-NEXT:    [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], [[IF_END31_I_I_I:%.*]] ], [ [[TMP4]], [[IF_THEN5_I_I]] ]
+// NCRDIV-NEXT:    [[__R_0_I29_I_I11:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], [[IF_END31_I_I_I]] ], [ 0, [[IF_THEN5_I_I]] ]
+// NCRDIV-NEXT:    [[__TAGP_ADDR_0_I28_I_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I_I:%.*]], [[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN5_I_I]] ]
+// NCRDIV-NEXT:    [[TMP6:%.*]] = add i8 [[TMP5]], -48
+// NCRDIV-NEXT:    [[OR_COND_I32_I_I:%.*]] = icmp ult i8 [[TMP6]], 10
+// NCRDIV-NEXT:    br i1 [[OR_COND_I32_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE_I_I_I:%.*]]
 // NCRDIV:       if.else.i.i.i:
-// NCRDIV-NEXT:    [[TMP4:%.*]] = add i8 [[TMP2]], -97
-// NCRDIV-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP4]], 6
+// NCRDIV-NEXT:    [[TMP7:%.*]] = add i8 [[TMP5]], -97
+// NCRDIV-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP7]], 6
 // NCRDIV-NEXT:    br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]]
 // NCRDIV:       if.else17.i.i.i:
-// NCRDIV-NEXT:    [[TMP5:%.*]] = add i8 [[TMP2]], -65
-// NCRDIV-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP5]], 6
-// NCRDIV-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[CLEANUP_I36_I_I]]
+// NCRDIV-NEXT:    [[TMP8:%.*]] = add i8 [[TMP5]], -65
+// NCRDIV-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP8]], 6
+// NCRDIV-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[_ZL3NANPKC_EXIT]]
 // NCRDIV:       if.end31.i.i.i:
-// NCRDIV-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I34_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ]
-// NCRDIV-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I32_I_I]], 4
-// NCRDIV-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64
+// NCRDIV-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I31_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ]
+// NCRDIV-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I29_I_I11]], 4
+// NCRDIV-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP5]] to i64
 // NCRDIV-NEXT:    [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]]
-// NCRDIV-NEXT:    [[ADD28_I_I_I:%.*]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
-// NCRDIV-NEXT:    [[INCDEC_PTR_I40_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I31_I_I]], i64 1
-// NCRDIV-NEXT:    br label [[CLEANUP_I36_I_I]]
-// NCRDIV:       cleanup.i36.i.i:
-// NCRDIV-NEXT:    [[__TAGP_ADDR_1_I37_I_I]] = phi ptr [ [[INCDEC_PTR_I40_I_I]], [[IF_END31_I_I_I]] ], [ [[__TAGP_ADDR_0_I31_I_I]], [[IF_ELSE17_I_I_I]] ]
-// NCRDIV-NEXT:    [[__R_2_I_I_I]] = phi i64 [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ [[__R_0_I32_I_I]], [[IF_ELSE17_I_I_I]] ]
-// NCRDIV-NEXT:    [[COND_I_I_I:%.*]] = phi i1 [ true, [[IF_END31_I_I_I]] ], [ false, [[IF_ELSE17_I_I_I]] ]
-// NCRDIV-NEXT:    br i1 [[COND_I_I_I]], label [[WHILE_COND_I30_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP11]]
-// NCRDIV:       while.cond.i.i.i:
-// NCRDIV-NEXT:    [[__TAGP_ADDR_0_I_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I_I_I:%.*]], [[CLEANUP_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN_I_I]] ]
-// NCRDIV-NEXT:    [[__R_0_I_I_I:%.*]] = phi i64 [ [[__R_1_I_I_I:%.*]], [[CLEANUP_I_I_I]] ], [ 0, [[IF_THEN_I_I]] ]
-// NCRDIV-NEXT:    [[TMP6:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[TBAA4]]
-// NCRDIV-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP6]], 0
-// NCRDIV-NEXT:    br i1 [[CMP_NOT_I_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]]
+// NCRDIV-NEXT:    [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
+// NCRDIV-NEXT:    [[INCDEC_PTR_I34_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I28_I_I10]], i64 1
+// NCRDIV-NEXT:    [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    [[CMP_NOT_I30_I_I:%.*]] = icmp eq i8 [[TMP9]], 0
+// NCRDIV-NEXT:    br i1 [[CMP_NOT_I30_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]]
 // NCRDIV:       while.body.i.i.i:
-// NCRDIV-NEXT:    [[TMP7:%.*]] = and i8 [[TMP6]], -8
-// NCRDIV-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP7]], 48
-// NCRDIV-NEXT:    br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I:%.*]], label [[CLEANUP_I_I_I]]
+// NCRDIV-NEXT:    [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], [[IF_THEN_I_I_I:%.*]] ], [ [[TMP3]], [[WHILE_COND_I_I_I_PREHEADER]] ]
+// NCRDIV-NEXT:    [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ]
+// NCRDIV-NEXT:    [[__TAGP_ADDR_0_I_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I_I_I_PREHEADER]] ]
+// NCRDIV-NEXT:    [[TMP11:%.*]] = and i8 [[TMP10]], -8
+// NCRDIV-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP11]], 48
+// NCRDIV-NEXT:    br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I]], label [[_ZL3NANPKC_EXIT]]
 // NCRDIV:       if.then.i.i.i:
-// NCRDIV-NEXT:    [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I]], 3
-// NCRDIV-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64
+// NCRDIV-NEXT:    [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I16]], 3
+// NCRDIV-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP10]] to i64
 // NCRDIV-NEXT:    [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48
-// NCRDIV-NEXT:    [[SUB_I_I_I:%.*]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
-// NCRDIV-NEXT:    [[INCDEC_PTR_I_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I]], i64 1
-// NCRDIV-NEXT:    br label [[CLEANUP_I_I_I]]
-// NCRDIV:       cleanup.i.i.i:
-// NCRDIV-NEXT:    [[__TAGP_ADDR_1_I_I_I]] = phi ptr [ [[INCDEC_PTR_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__TAGP_ADDR_0_I_I_I]], [[WHILE_BODY_I_I_I]] ]
-// NCRDIV-NEXT:    [[__R_1_I_I_I]] = phi i64 [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_BODY_I_I_I]] ]
-// NCRDIV-NEXT:    br i1 [[OR_COND_I_I_I]], label [[WHILE_COND_I_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP7]]
-// NCRDIV:       while.cond.i14.i.i:
-// NCRDIV-NEXT:    [[__TAGP_ADDR_0_I15_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I21_I_I:%.*]], [[CLEANUP_I20_I_I:%.*]] ], [ [[TAG]], [[ENTRY:%.*]] ]
-// NCRDIV-NEXT:    [[__R_0_I16_I_I:%.*]] = phi i64 [ [[__R_1_I22_I_I:%.*]], [[CLEANUP_I20_I_I]] ], [ 0, [[ENTRY]] ]
-// NCRDIV-NEXT:    [[TMP8:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[TBAA4]]
-// NCRDIV-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP8]], 0
-// NCRDIV-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I18_I_I:%.*]]
+// NCRDIV-NEXT:    [[SUB_I_I_I]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
+// NCRDIV-NEXT:    [[INCDEC_PTR_I_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I15]], i64 1
+// NCRDIV-NEXT:    [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP12]], 0
+// NCRDIV-NEXT:    br i1 [[CMP_NOT_I_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]]
 // NCRDIV:       while.body.i18.i.i:
-// NCRDIV-NEXT:    [[TMP9:%.*]] = add i8 [[TMP8]], -48
-// NCRDIV-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP9]], 10
-// NCRDIV-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I24_I_I:%.*]], label [[CLEANUP_I20_I_I]]
-// NCRDIV:       if.then.i24.i.i:
-// NCRDIV-NEXT:    [[MUL_I25_I_I:%.*]] = mul i64 [[__R_0_I16_I_I]], 10
-// NCRDIV-NEXT:    [[CONV5_I26_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64
-// NCRDIV-NEXT:    [[ADD_I27_I_I:%.*]] = add i64 [[MUL_I25_I_I]], -48
-// NCRDIV-NEXT:    [[SUB_I28_I_I:%.*]] = add i64 [[ADD_I27_I_I]], [[CONV5_I26_I_I]]
-// NCRDIV-NEXT:    [[INCDEC_PTR_I29_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I]], i64 1
-// NCRDIV-NEXT:    br label [[CLEANUP_I20_I_I]]
-// NCRDIV:       cleanup.i20.i.i:
-// NCRDIV-NEXT:    [[__TAGP_ADDR_1_I21_I_I]] = phi ptr [ [[INCDEC_PTR_I29_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__TAGP_ADDR_0_I15_I_I]], [[WHILE_BODY_I18_I_I]] ]
-// NCRDIV-NEXT:    [[__R_1_I22_I_I]] = phi i64 [ [[SUB_I28_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_BODY_I18_I_I]] ]
-// NCRDIV-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[WHILE_COND_I14_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP10]]
+// NCRDIV-NEXT:    [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], [[IF_THEN_I21_I_I:%.*]] ], [ [[TMP1]], [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// NCRDIV-NEXT:    [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// NCRDIV-NEXT:    [[__TAGP_ADDR_0_I15_I_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ [[TAG]], [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// NCRDIV-NEXT:    [[TMP14:%.*]] = add i8 [[TMP13]], -48
+// NCRDIV-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP14]], 10
+// NCRDIV-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I21_I_I]], label [[_ZL3NANPKC_EXIT]]
+// NCRDIV:       if.then.i21.i.i:
+// NCRDIV-NEXT:    [[MUL_I22_I_I:%.*]] = mul i64 [[__R_0_I16_I_I7]], 10
+// NCRDIV-NEXT:    [[CONV5_I23_I_I:%.*]] = zext nneg i8 [[TMP13]] to i64
+// NCRDIV-NEXT:    [[ADD_I24_I_I:%.*]] = add i64 [[MUL_I22_I_I]], -48
+// NCRDIV-NEXT:    [[SUB_I25_I_I]] = add i64 [[ADD_I24_I_I]], [[CONV5_I23_I_I]]
+// NCRDIV-NEXT:    [[INCDEC_PTR_I26_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I6]], i64 1
+// NCRDIV-NEXT:    [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP15]], 0
+// NCRDIV-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]]
 // NCRDIV:       _ZL3nanPKc.exit:
-// NCRDIV-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[CLEANUP_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_COND_I_I_I]] ], [ 0, [[CLEANUP_I36_I_I]] ], [ [[__R_0_I32_I_I]], [[WHILE_COND_I30_I_I]] ], [ 0, [[CLEANUP_I20_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_COND_I14_I_I]] ]
+// NCRDIV-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, [[IF_THEN5_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ 0, [[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ]
 // NCRDIV-NEXT:    [[BF_VALUE_I:%.*]] = and i64 [[RETVAL_0_I_I]], 2251799813685247
 // NCRDIV-NEXT:    [[BF_SET9_I:%.*]] = or disjoint i64 [[BF_VALUE_I]], 9221120237041090560
-// NCRDIV-NEXT:    [[TMP10:%.*]] = bitcast i64 [[BF_SET9_I]] to double
-// NCRDIV-NEXT:    ret double [[TMP10]]
+// NCRDIV-NEXT:    [[TMP16:%.*]] = bitcast i64 [[BF_SET9_I]] to double
+// NCRDIV-NEXT:    ret double [[TMP16]]
 //
 // AMDGCNSPIRV-LABEL: @test_nan(
 // AMDGCNSPIRV-NEXT:  entry:
@@ -4442,53 +4378,49 @@ extern "C" __device__ float test_nanf(const char *tag) {
 // AMDGCNSPIRV-NEXT:    [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TAG]], i64 1
 // AMDGCNSPIRV-NEXT:    [[TMP1:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA5]]
 // AMDGCNSPIRV-NEXT:    switch i8 [[TMP1]], label [[WHILE_COND_I_I_I:%.*]] [
-// AMDGCNSPIRV-NEXT:      i8 120, label [[WHILE_COND_I28_I_I_PREHEADER:%.*]]
-// AMDGCNSPIRV-NEXT:      i8 88, label [[WHILE_COND_I28_I_I_PREHEADER]]
+// AMDGCNSPIRV-NEXT:      i8 120, label [[IF_THEN5_I_I:%.*]]
+// AMDGCNSPIRV-NEXT:      i8 88, label [[IF_THEN5_I_I]]
 // AMDGCNSPIRV-NEXT:    ]
-// AMDGCNSPIRV:       while.cond.i28.i.i.preheader:
-// AMDGCNSPIRV-NEXT:    br label [[WHILE_COND_I28_I_I:%.*]]
-// AMDGCNSPIRV:       while.cond.i28.i.i:
-// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I29_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I34_I_I:%.*]], [[CLEANUP_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I28_I_I_PREHEADER]] ]
-// AMDGCNSPIRV-NEXT:    [[__R_0_I30_I_I:%.*]] = phi i64 [ [[__R_2_I_I_I:%.*]], [[CLEANUP_I_I_I]] ], [ 0, [[WHILE_COND_I28_I_I_PREHEADER]] ]
-// AMDGCNSPIRV-NEXT:    [[TMP2:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I_I]], align 1, !tbaa [[TBAA5]]
-// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I31_I_I:%.*]] = icmp eq i8 [[TMP2]], 0
-// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I31_I_I]], label [[_ZL3NANPKC_EXIT:%.*]], label [[WHILE_BODY_I32_I_I:%.*]]
+// AMDGCNSPIRV:       if.then5.i.i:
+// AMDGCNSPIRV-NEXT:    [[TMP2:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I31_I_I5:%.*]] = icmp eq i8 [[TMP2]], 0
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I31_I_I5]], label [[_ZL3NANPKC_EXIT:%.*]], label [[WHILE_BODY_I32_I_I:%.*]]
 // AMDGCNSPIRV:       while.body.i32.i.i:
-// AMDGCNSPIRV-NEXT:    [[TMP3:%.*]] = add i8 [[TMP2]], -48
-// AMDGCNSPIRV-NEXT:    [[OR_COND_I33_I_I:%.*]] = icmp ult i8 [[TMP3]], 10
-// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I33_I_I]], label [[IF_END31_I_I_I:%.*]], label [[IF_ELSE_I_I_I:%.*]]
+// AMDGCNSPIRV-NEXT:    [[TMP3:%.*]] = phi i8 [ [[TMP7:%.*]], [[IF_END31_I_I_I:%.*]] ], [ [[TMP2]], [[IF_THEN5_I_I]] ]
+// AMDGCNSPIRV-NEXT:    [[__R_0_I30_I_I7:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], [[IF_END31_I_I_I]] ], [ 0, [[IF_THEN5_I_I]] ]
+// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I29_I_I6:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I36_I_I:%.*]], [[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN5_I_I]] ]
+// AMDGCNSPIRV-NEXT:    [[TMP4:%.*]] = add i8 [[TMP3]], -48
+// AMDGCNSPIRV-NEXT:    [[OR_COND_I33_I_I:%.*]] = icmp ult i8 [[TMP4]], 10
+// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I33_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE_I_I_I:%.*]]
 // AMDGCNSPIRV:       if.else.i.i.i:
-// AMDGCNSPIRV-NEXT:    [[TMP4:%.*]] = add i8 [[TMP2]], -97
-// AMDGCNSPIRV-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP4]], 6
+// AMDGCNSPIRV-NEXT:    [[TMP5:%.*]] = add i8 [[TMP3]], -97
+// AMDGCNSPIRV-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP5]], 6
 // AMDGCNSPIRV-NEXT:    br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]]
 // AMDGCNSPIRV:       if.else17.i.i.i:
-// AMDGCNSPIRV-NEXT:    [[TMP5:%.*]] = add i8 [[TMP2]], -65
-// AMDGCNSPIRV-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP5]], 6
-// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[CLEANUP_I_I_I]]
+// AMDGCNSPIRV-NEXT:    [[TMP6:%.*]] = add i8 [[TMP3]], -65
+// AMDGCNSPIRV-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP6]], 6
+// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[_ZL3NANPKC_EXIT]]
 // AMDGCNSPIRV:       if.end31.i.i.i:
 // AMDGCNSPIRV-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I32_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ]
-// AMDGCNSPIRV-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I30_I_I]], 4
-// AMDGCNSPIRV-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64
+// AMDGCNSPIRV-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I30_I_I7]], 4
+// AMDGCNSPIRV-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP3]] to i64
 // AMDGCNSPIRV-NEXT:    [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]]
-// AMDGCNSPIRV-NEXT:    [[ADD28_I_I_I:%.*]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
-// AMDGCNSPIRV-NEXT:    [[INCDEC_PTR_I37_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I_I]], i64 1
-// AMDGCNSPIRV-NEXT:    br label [[CLEANUP_I_I_I]]
-// AMDGCNSPIRV:       cleanup.i.i.i:
-// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_1_I34_I_I]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I37_I_I]], [[IF_END31_I_I_I]] ], [ [[__TAGP_ADDR_0_I29_I_I]], [[IF_ELSE17_I_I_I]] ]
-// AMDGCNSPIRV-NEXT:    [[__R_2_I_I_I]] = phi i64 [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ [[__R_0_I30_I_I]], [[IF_ELSE17_I_I_I]] ]
-// AMDGCNSPIRV-NEXT:    [[COND_I_I_I:%.*]] = phi i1 [ true, [[IF_END31_I_I_I]] ], [ false, [[IF_ELSE17_I_I_I]] ]
-// AMDGCNSPIRV-NEXT:    br i1 [[COND_I_I_I]], label [[WHILE_COND_I28_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP12]]
+// AMDGCNSPIRV-NEXT:    [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
+// AMDGCNSPIRV-NEXT:    [[INCDEC_PTR_I36_I_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I_I6]], i64 1
+// AMDGCNSPIRV-NEXT:    [[TMP7]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I36_I_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I31_I_I:%.*]] = icmp eq i8 [[TMP7]], 0
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I31_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I32_I_I]], !llvm.loop [[LOOP12]]
 // AMDGCNSPIRV:       while.cond.i.i.i:
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I_I_I:%.*]], [[WHILE_BODY_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN_I_I]] ]
 // AMDGCNSPIRV-NEXT:    [[__R_0_I_I_I:%.*]] = phi i64 [ [[__R_1_I_I_I:%.*]], [[WHILE_BODY_I_I_I]] ], [ 0, [[IF_THEN_I_I]] ]
-// AMDGCNSPIRV-NEXT:    [[TMP6:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[TBAA5]]
-// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP6]], 0
+// AMDGCNSPIRV-NEXT:    [[TMP8:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP8]], 0
 // AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I]]
 // AMDGCNSPIRV:       while.body.i.i.i:
-// AMDGCNSPIRV-NEXT:    [[TMP7:%.*]] = and i8 [[TMP6]], -8
-// AMDGCNSPIRV-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP7]], 48
+// AMDGCNSPIRV-NEXT:    [[TMP9:%.*]] = and i8 [[TMP8]], -8
+// AMDGCNSPIRV-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP9]], 48
 // AMDGCNSPIRV-NEXT:    [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I]], 3
-// AMDGCNSPIRV-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64
+// AMDGCNSPIRV-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64
 // AMDGCNSPIRV-NEXT:    [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48
 // AMDGCNSPIRV-NEXT:    [[SUB_I_I_I:%.*]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_1_I_I_I_IDX:%.*]] = zext i1 [[OR_COND_I_I_I]] to i64
@@ -4498,14 +4430,14 @@ extern "C" __device__ float test_nanf(const char *tag) {
 // AMDGCNSPIRV:       while.cond.i14.i.i:
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I15_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I25_I_I:%.*]], [[WHILE_BODY_I18_I_I:%.*]] ], [ [[TAG]], [[ENTRY:%.*]] ]
 // AMDGCNSPIRV-NEXT:    [[__R_0_I16_I_I:%.*]] = phi i64 [ [[__R_1_I26_I_I:%.*]], [[WHILE_BODY_I18_I_I]] ], [ 0, [[ENTRY]] ]
-// AMDGCNSPIRV-NEXT:    [[TMP8:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[TBAA5]]
-// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP8]], 0
+// AMDGCNSPIRV-NEXT:    [[TMP10:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP10]], 0
 // AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I18_I_I]]
 // AMDGCNSPIRV:       while.body.i18.i.i:
-// AMDGCNSPIRV-NEXT:    [[TMP9:%.*]] = add i8 [[TMP8]], -48
-// AMDGCNSPIRV-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP9]], 10
+// AMDGCNSPIRV-NEXT:    [[TMP11:%.*]] = add i8 [[TMP10]], -48
+// AMDGCNSPIRV-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP11]], 10
 // AMDGCNSPIRV-NEXT:    [[MUL_I20_I_I:%.*]] = mul i64 [[__R_0_I16_I_I]], 10
-// AMDGCNSPIRV-NEXT:    [[CONV5_I21_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64
+// AMDGCNSPIRV-NEXT:    [[CONV5_I21_I_I:%.*]] = zext nneg i8 [[TMP10]] to i64
 // AMDGCNSPIRV-NEXT:    [[ADD_I22_I_I:%.*]] = add i64 [[MUL_I20_I_I]], -48
 // AMDGCNSPIRV-NEXT:    [[SUB_I23_I_I:%.*]] = add i64 [[ADD_I22_I_I]], [[CONV5_I21_I_I]]
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_1_I25_I_I_IDX:%.*]] = zext i1 [[OR_COND_I19_I_I]] to i64
@@ -4513,11 +4445,11 @@ extern "C" __device__ float test_nanf(const char *tag) {
 // AMDGCNSPIRV-NEXT:    [[__R_1_I26_I_I]] = select i1 [[OR_COND_I19_I_I]], i64 [[SUB_I23_I_I]], i64 [[__R_0_I16_I_I]]
 // AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[WHILE_COND_I14_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP11]]
 // AMDGCNSPIRV:       _ZL3nanPKc.exit:
-// AMDGCNSPIRV-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[WHILE_BODY_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_COND_I_I_I]] ], [ 0, [[CLEANUP_I_I_I]] ], [ [[__R_0_I30_I_I]], [[WHILE_COND_I28_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_COND_I14_I_I]] ]
+// AMDGCNSPIRV-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[IF_THEN5_I_I]] ], [ 0, [[WHILE_BODY_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_COND_I_I_I]] ], [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ 0, [[IF_ELSE17_I_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_COND_I14_I_I]] ]
 // AMDGCNSPIRV-NEXT:    [[BF_VALUE_I:%.*]] = and i64 [[RETVAL_0_I_I]], 2251799813685247
 // AMDGCNSPIRV-NEXT:    [[BF_SET9_I:%.*]] = or disjoint i64 [[BF_VALUE_I]], 9221120237041090560
-// AMDGCNSPIRV-NEXT:    [[TMP10:%.*]] = bitcast i64 [[BF_SET9_I]] to double
-// AMDGCNSPIRV-NEXT:    ret double [[TMP10]]
+// AMDGCNSPIRV-NEXT:    [[TMP12:%.*]] = bitcast i64 [[BF_SET9_I]] to double
+// AMDGCNSPIRV-NEXT:    ret double [[TMP12]]
 //
 extern "C" __device__ double test_nan(const char *tag) {
   return nan(tag);
@@ -4679,22 +4611,22 @@ extern "C" __device__ double test_nearbyint(double x) {
 
 // DEFAULT-LABEL: @test_nextafterf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_nextafter_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_nextafter_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_nextafterf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_nextafter_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_nextafter_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_nextafterf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_nextafter_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_nextafter_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_nextafterf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_nextafter_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_nextafter_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_nextafterf(
@@ -4708,22 +4640,22 @@ extern "C" __device__ float test_nextafterf(float x, float y) {
 
 // DEFAULT-LABEL: @test_nextafter(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_nextafter_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_nextafter_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_nextafter(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_nextafter_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_nextafter_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_nextafter(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_nextafter_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_nextafter_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_nextafter(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_nextafter_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_nextafter_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_nextafter(
@@ -4737,22 +4669,22 @@ extern "C" __device__ double test_nextafter(double x, double y) {
 
 // DEFAULT-LABEL: @test_norm3df(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_norm3df(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_len3_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_len3_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_norm3df(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_norm3df(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_norm3df(
@@ -4766,22 +4698,22 @@ extern "C" __device__ float test_norm3df(float x, float y, float z) {
 
 // DEFAULT-LABEL: @test_norm3d(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_norm3d(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_len3_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_len3_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_norm3d(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_norm3d(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_norm3d(
@@ -4795,22 +4727,22 @@ extern "C" __device__ double test_norm3d(double x, double y, double z) {
 
 // DEFAULT-LABEL: @test_norm4df(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_norm4df(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_len4_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]], float noundef nofpclass(nan inf) [[W:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_len4_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]], float noundef nofpclass(nan inf) [[W:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_norm4df(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_norm4df(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_norm4df(
@@ -4824,22 +4756,22 @@ extern "C" __device__ float test_norm4df(float x, float y, float z, float w) {
 
 // DEFAULT-LABEL: @test_norm4d(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_norm4d(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_len4_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]], double noundef nofpclass(nan inf) [[W:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_len4_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]], double noundef nofpclass(nan inf) [[W:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_norm4d(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_norm4d(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_norm4d(
@@ -4853,22 +4785,22 @@ extern "C" __device__ double test_norm4d(double x, double y, double z, double w)
 
 // DEFAULT-LABEL: @test_normcdff(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdf_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdf_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_normcdff(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_ncdf_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_ncdf_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_normcdff(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdf_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdf_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_normcdff(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdf_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdf_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_normcdff(
@@ -4882,22 +4814,22 @@ extern "C" __device__ float test_normcdff(float x) {
 
 // DEFAULT-LABEL: @test_normcdf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdf_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdf_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_normcdf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_ncdf_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_ncdf_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_normcdf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdf_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdf_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_normcdf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdf_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdf_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_normcdf(
@@ -4911,22 +4843,22 @@ extern "C" __device__ double test_normcdf(double x) {
 
 // DEFAULT-LABEL: @test_normcdfinvf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdfinv_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdfinv_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_normcdfinvf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_ncdfinv_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_ncdfinv_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_normcdfinvf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdfinv_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdfinv_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_normcdfinvf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdfinv_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdfinv_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_normcdfinvf(
@@ -4940,22 +4872,22 @@ extern "C" __device__ float test_normcdfinvf(float x) {
 
 // DEFAULT-LABEL: @test_normcdfinv(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdfinv_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdfinv_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_normcdfinv(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_ncdfinv_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_ncdfinv_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_normcdfinv(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdfinv_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdfinv_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_normcdfinv(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdfinv_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdfinv_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_normcdfinv(
@@ -4981,11 +4913,13 @@ extern "C" __device__ double test_normcdfinv(double x) {
 // DEFAULT-NEXT:    [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]]
 // DEFAULT-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4
 // DEFAULT-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// DEFAULT-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP20:![0-9]+]]
+// DEFAULT-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP20:![0-9]+]]
+// DEFAULT:       _ZL5normfiPKf.exit.loopexit:
+// DEFAULT-NEXT:    [[TMP1:%.*]] = tail call contract float @llvm.sqrt.f32(float [[ADD_I]])
+// DEFAULT-NEXT:    br label [[_ZL5NORMFIPKF_EXIT]]
 // DEFAULT:       _ZL5normfiPKf.exit:
-// DEFAULT-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
-// DEFAULT-NEXT:    [[TMP1:%.*]] = tail call contract noundef float @llvm.sqrt.f32(float [[__R_0_I_LCSSA]])
-// DEFAULT-NEXT:    ret float [[TMP1]]
+// DEFAULT-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL5NORMFIPKF_EXIT_LOOPEXIT]] ]
+// DEFAULT-NEXT:    ret float [[__R_0_I_LCSSA]]
 //
 // FINITEONLY-LABEL: @test_normf(
 // FINITEONLY-NEXT:  entry:
@@ -5001,11 +4935,13 @@ extern "C" __device__ double test_normcdfinv(double x) {
 // FINITEONLY-NEXT:    [[ADD_I]] = fadd nnan ninf contract float [[__R_0_I4]], [[MUL_I]]
 // FINITEONLY-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4
 // FINITEONLY-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// FINITEONLY-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP20:![0-9]+]]
+// FINITEONLY-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP20:![0-9]+]]
+// FINITEONLY:       _ZL5normfiPKf.exit.loopexit:
+// FINITEONLY-NEXT:    [[TMP1:%.*]] = tail call nnan ninf contract float @llvm.sqrt.f32(float [[ADD_I]])
+// FINITEONLY-NEXT:    br label [[_ZL5NORMFIPKF_EXIT]]
 // FINITEONLY:       _ZL5normfiPKf.exit:
-// FINITEONLY-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
-// FINITEONLY-NEXT:    [[TMP1:%.*]] = tail call nnan ninf contract noundef float @llvm.sqrt.f32(float [[__R_0_I_LCSSA]])
-// FINITEONLY-NEXT:    ret float [[TMP1]]
+// FINITEONLY-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL5NORMFIPKF_EXIT_LOOPEXIT]] ]
+// FINITEONLY-NEXT:    ret float [[__R_0_I_LCSSA]]
 //
 // APPROX-LABEL: @test_normf(
 // APPROX-NEXT:  entry:
@@ -5021,11 +4957,13 @@ extern "C" __device__ double test_normcdfinv(double x) {
 // APPROX-NEXT:    [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]]
 // APPROX-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4
 // APPROX-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// APPROX-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP20:![0-9]+]]
+// APPROX-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP20:![0-9]+]]
+// APPROX:       _ZL5normfiPKf.exit.loopexit:
+// APPROX-NEXT:    [[TMP1:%.*]] = tail call contract float @llvm.sqrt.f32(float [[ADD_I]])
+// APPROX-NEXT:    br label [[_ZL5NORMFIPKF_EXIT]]
 // APPROX:       _ZL5normfiPKf.exit:
-// APPROX-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
-// APPROX-NEXT:    [[TMP1:%.*]] = tail call contract noundef float @llvm.sqrt.f32(float [[__R_0_I_LCSSA]])
-// APPROX-NEXT:    ret float [[TMP1]]
+// APPROX-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL5NORMFIPKF_EXIT_LOOPEXIT]] ]
+// APPROX-NEXT:    ret float [[__R_0_I_LCSSA]]
 //
 // NCRDIV-LABEL: @test_normf(
 // NCRDIV-NEXT:  entry:
@@ -5041,11 +4979,13 @@ extern "C" __device__ double test_normcdfinv(double x) {
 // NCRDIV-NEXT:    [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]]
 // NCRDIV-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4
 // NCRDIV-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// NCRDIV-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]]
+// NCRDIV-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]]
+// NCRDIV:       _ZL5normfiPKf.exit.loopexit:
+// NCRDIV-NEXT:    [[TMP1:%.*]] = tail call contract float @llvm.sqrt.f32(float [[ADD_I]])
+// NCRDIV-NEXT:    br label [[_ZL5NORMFIPKF_EXIT]]
 // NCRDIV:       _ZL5normfiPKf.exit:
-// NCRDIV-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
-// NCRDIV-NEXT:    [[TMP1:%.*]] = tail call contract noundef float @llvm.sqrt.f32(float [[__R_0_I_LCSSA]]), !fpmath [[META22:![0-9]+]]
-// NCRDIV-NEXT:    ret float [[TMP1]]
+// NCRDIV-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL5NORMFIPKF_EXIT_LOOPEXIT]] ]
+// NCRDIV-NEXT:    ret float [[__R_0_I_LCSSA]]
 //
 // AMDGCNSPIRV-LABEL: @test_normf(
 // AMDGCNSPIRV-NEXT:  entry:
@@ -5061,11 +5001,13 @@ extern "C" __device__ double test_normcdfinv(double x) {
 // AMDGCNSPIRV-NEXT:    [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]]
 // AMDGCNSPIRV-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__A_ADDR_0_I3]], i64 4
 // AMDGCNSPIRV-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// AMDGCNSPIRV-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]]
+// AMDGCNSPIRV-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]]
+// AMDGCNSPIRV:       _ZL5normfiPKf.exit.loopexit:
+// AMDGCNSPIRV-NEXT:    [[TMP1:%.*]] = tail call contract addrspace(4) float @llvm.sqrt.f32(float [[ADD_I]])
+// AMDGCNSPIRV-NEXT:    br label [[_ZL5NORMFIPKF_EXIT]]
 // AMDGCNSPIRV:       _ZL5normfiPKf.exit:
-// AMDGCNSPIRV-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
-// AMDGCNSPIRV-NEXT:    [[TMP1:%.*]] = tail call contract noundef addrspace(4) float @llvm.sqrt.f32(float [[__R_0_I_LCSSA]])
-// AMDGCNSPIRV-NEXT:    ret float [[TMP1]]
+// AMDGCNSPIRV-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL5NORMFIPKF_EXIT_LOOPEXIT]] ]
+// AMDGCNSPIRV-NEXT:    ret float [[__R_0_I_LCSSA]]
 //
 extern "C" __device__ float test_normf(int x, const float *y) {
   return normf(x, y);
@@ -5085,11 +5027,13 @@ extern "C" __device__ float test_normf(int x, const float *y) {
 // DEFAULT-NEXT:    [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]]
 // DEFAULT-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8
 // DEFAULT-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// DEFAULT-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]]
+// DEFAULT-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]]
+// DEFAULT:       _ZL4normiPKd.exit.loopexit:
+// DEFAULT-NEXT:    [[TMP1:%.*]] = tail call contract double @llvm.sqrt.f64(double [[ADD_I]])
+// DEFAULT-NEXT:    br label [[_ZL4NORMIPKD_EXIT]]
 // DEFAULT:       _ZL4normiPKd.exit:
-// DEFAULT-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
-// DEFAULT-NEXT:    [[TMP1:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[__R_0_I_LCSSA]])
-// DEFAULT-NEXT:    ret double [[TMP1]]
+// DEFAULT-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL4NORMIPKD_EXIT_LOOPEXIT]] ]
+// DEFAULT-NEXT:    ret double [[__R_0_I_LCSSA]]
 //
 // FINITEONLY-LABEL: @test_norm(
 // FINITEONLY-NEXT:  entry:
@@ -5105,11 +5049,13 @@ extern "C" __device__ float test_normf(int x, const float *y) {
 // FINITEONLY-NEXT:    [[ADD_I]] = fadd nnan ninf contract double [[__R_0_I4]], [[MUL_I]]
 // FINITEONLY-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8
 // FINITEONLY-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// FINITEONLY-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]]
+// FINITEONLY-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]]
+// FINITEONLY:       _ZL4normiPKd.exit.loopexit:
+// FINITEONLY-NEXT:    [[TMP1:%.*]] = tail call nnan ninf contract double @llvm.sqrt.f64(double [[ADD_I]])
+// FINITEONLY-NEXT:    br label [[_ZL4NORMIPKD_EXIT]]
 // FINITEONLY:       _ZL4normiPKd.exit:
-// FINITEONLY-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
-// FINITEONLY-NEXT:    [[TMP1:%.*]] = tail call nnan ninf contract noundef double @llvm.sqrt.f64(double [[__R_0_I_LCSSA]])
-// FINITEONLY-NEXT:    ret double [[TMP1]]
+// FINITEONLY-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL4NORMIPKD_EXIT_LOOPEXIT]] ]
+// FINITEONLY-NEXT:    ret double [[__R_0_I_LCSSA]]
 //
 // APPROX-LABEL: @test_norm(
 // APPROX-NEXT:  entry:
@@ -5125,11 +5071,13 @@ extern "C" __device__ float test_normf(int x, const float *y) {
 // APPROX-NEXT:    [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]]
 // APPROX-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8
 // APPROX-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// APPROX-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]]
+// APPROX-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]]
+// APPROX:       _ZL4normiPKd.exit.loopexit:
+// APPROX-NEXT:    [[TMP1:%.*]] = tail call contract double @llvm.sqrt.f64(double [[ADD_I]])
+// APPROX-NEXT:    br label [[_ZL4NORMIPKD_EXIT]]
 // APPROX:       _ZL4normiPKd.exit:
-// APPROX-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
-// APPROX-NEXT:    [[TMP1:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[__R_0_I_LCSSA]])
-// APPROX-NEXT:    ret double [[TMP1]]
+// APPROX-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL4NORMIPKD_EXIT_LOOPEXIT]] ]
+// APPROX-NEXT:    ret double [[__R_0_I_LCSSA]]
 //
 // NCRDIV-LABEL: @test_norm(
 // NCRDIV-NEXT:  entry:
@@ -5145,11 +5093,13 @@ extern "C" __device__ float test_normf(int x, const float *y) {
 // NCRDIV-NEXT:    [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]]
 // NCRDIV-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8
 // NCRDIV-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// NCRDIV-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]]
+// NCRDIV-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]]
+// NCRDIV:       _ZL4normiPKd.exit.loopexit:
+// NCRDIV-NEXT:    [[TMP1:%.*]] = tail call contract double @llvm.sqrt.f64(double [[ADD_I]])
+// NCRDIV-NEXT:    br label [[_ZL4NORMIPKD_EXIT]]
 // NCRDIV:       _ZL4normiPKd.exit:
-// NCRDIV-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
-// NCRDIV-NEXT:    [[TMP1:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[__R_0_I_LCSSA]])
-// NCRDIV-NEXT:    ret double [[TMP1]]
+// NCRDIV-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL4NORMIPKD_EXIT_LOOPEXIT]] ]
+// NCRDIV-NEXT:    ret double [[__R_0_I_LCSSA]]
 //
 // AMDGCNSPIRV-LABEL: @test_norm(
 // AMDGCNSPIRV-NEXT:  entry:
@@ -5165,11 +5115,13 @@ extern "C" __device__ float test_normf(int x, const float *y) {
 // AMDGCNSPIRV-NEXT:    [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]]
 // AMDGCNSPIRV-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__A_ADDR_0_I3]], i64 8
 // AMDGCNSPIRV-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// AMDGCNSPIRV-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]]
+// AMDGCNSPIRV-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]]
+// AMDGCNSPIRV:       _ZL4normiPKd.exit.loopexit:
+// AMDGCNSPIRV-NEXT:    [[TMP1:%.*]] = tail call contract addrspace(4) double @llvm.sqrt.f64(double [[ADD_I]])
+// AMDGCNSPIRV-NEXT:    br label [[_ZL4NORMIPKD_EXIT]]
 // AMDGCNSPIRV:       _ZL4normiPKd.exit:
-// AMDGCNSPIRV-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
-// AMDGCNSPIRV-NEXT:    [[TMP1:%.*]] = tail call contract noundef addrspace(4) double @llvm.sqrt.f64(double [[__R_0_I_LCSSA]])
-// AMDGCNSPIRV-NEXT:    ret double [[TMP1]]
+// AMDGCNSPIRV-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL4NORMIPKD_EXIT_LOOPEXIT]] ]
+// AMDGCNSPIRV-NEXT:    ret double [[__R_0_I_LCSSA]]
 //
 extern "C" __device__ double test_norm(int x, const double *y) {
   return norm(x, y);
@@ -5177,22 +5129,22 @@ extern "C" __device__ double test_norm(int x, const double *y) {
 
 // DEFAULT-LABEL: @test_powf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_powf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_pow_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_pow_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_powf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_powf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_powf(
@@ -5206,22 +5158,22 @@ extern "C" __device__ float test_powf(float x, float y) {
 
 // DEFAULT-LABEL: @test_pow(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pow_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pow_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_pow(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_pow_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_pow_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_pow(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pow_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pow_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_pow(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pow_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pow_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_pow(
@@ -5235,22 +5187,22 @@ extern "C" __device__ double test_pow(double x, double y) {
 
 // DEFAULT-LABEL: @test_powif(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pown_f32(float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pown_f32(float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_powif(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_pown_f32(float noundef nofpclass(nan inf) [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_pown_f32(float noundef nofpclass(nan inf) [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_powif(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pown_f32(float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pown_f32(float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_powif(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pown_f32(float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pown_f32(float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_powif(
@@ -5264,22 +5216,22 @@ extern "C" __device__ float test_powif(float x, int y) {
 
 // DEFAULT-LABEL: @test_powi(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pown_f64(double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pown_f64(double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_powi(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_pown_f64(double noundef nofpclass(nan inf) [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_pown_f64(double noundef nofpclass(nan inf) [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_powi(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pown_f64(double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pown_f64(double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_powi(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pown_f64(double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pown_f64(double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_powi(
@@ -5293,22 +5245,22 @@ extern "C" __device__ double test_powi(double x, int y) {
 
 // DEFAULT-LABEL: @test_rcbrtf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rcbrt_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rcbrt_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_rcbrtf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rcbrt_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rcbrt_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_rcbrtf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rcbrt_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rcbrt_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_rcbrtf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rcbrt_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rcbrt_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_rcbrtf(
@@ -5322,22 +5274,22 @@ extern "C" __device__ float test_rcbrtf(float x) {
 
 // DEFAULT-LABEL: @test_rcbrt(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rcbrt_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rcbrt_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_rcbrt(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rcbrt_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rcbrt_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_rcbrt(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rcbrt_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rcbrt_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_rcbrt(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rcbrt_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rcbrt_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_rcbrt(
@@ -5351,22 +5303,22 @@ extern "C" __device__ double test_rcbrt(double x) {
 
 // DEFAULT-LABEL: @test_remainderf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_remainder_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_remainder_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_remainderf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_remainder_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_remainder_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_remainderf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_remainder_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_remainder_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_remainderf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_remainder_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_remainder_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_remainderf(
@@ -5380,22 +5332,22 @@ extern "C" __device__ float test_remainderf(float x, float y) {
 
 // DEFAULT-LABEL: @test_remainder(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_remainder_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_remainder_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_remainder(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_remainder_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_remainder_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_remainder(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_remainder_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_remainder_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_remainder(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_remainder_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_remainder_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_remainder(
@@ -5410,41 +5362,41 @@ extern "C" __device__ double test_remainder(double x, double y) {
 // DEFAULT-LABEL: @test_remquof(
 // DEFAULT-NEXT:  entry:
 // DEFAULT-NEXT:    [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5)
-// DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract noundef float @__ocml_remquo_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract noundef float @__ocml_remquo_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // DEFAULT-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA12]]
 // DEFAULT-NEXT:    store i32 [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA12]]
-// DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_remquof(
 // FINITEONLY-NEXT:  entry:
 // FINITEONLY-NEXT:    [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5)
-// FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_remquo_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_remquo_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA12]]
 // FINITEONLY-NEXT:    store i32 [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA12]]
-// FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_remquof(
 // APPROX-NEXT:  entry:
 // APPROX-NEXT:    [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5)
-// APPROX-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// APPROX-NEXT:    [[CALL_I:%.*]] = call contract noundef float @__ocml_remquo_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// APPROX-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = call contract noundef float @__ocml_remquo_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // APPROX-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA12]]
 // APPROX-NEXT:    store i32 [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA12]]
-// APPROX-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// APPROX-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_remquof(
 // NCRDIV-NEXT:  entry:
 // NCRDIV-NEXT:    [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5)
-// NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract noundef float @__ocml_remquo_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract noundef float @__ocml_remquo_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // NCRDIV-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA13]]
 // NCRDIV-NEXT:    store i32 [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA13]]
-// NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_remquof(
@@ -5465,41 +5417,41 @@ extern "C" __device__ float test_remquof(float x, float y, int* z) {
 // DEFAULT-LABEL: @test_remquo(
 // DEFAULT-NEXT:  entry:
 // DEFAULT-NEXT:    [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5)
-// DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract noundef double @__ocml_remquo_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract noundef double @__ocml_remquo_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // DEFAULT-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA12]]
 // DEFAULT-NEXT:    store i32 [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA12]]
-// DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_remquo(
 // FINITEONLY-NEXT:  entry:
 // FINITEONLY-NEXT:    [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5)
-// FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_remquo_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_remquo_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA12]]
 // FINITEONLY-NEXT:    store i32 [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA12]]
-// FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_remquo(
 // APPROX-NEXT:  entry:
 // APPROX-NEXT:    [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5)
-// APPROX-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// APPROX-NEXT:    [[CALL_I:%.*]] = call contract noundef double @__ocml_remquo_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// APPROX-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = call contract noundef double @__ocml_remquo_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // APPROX-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA12]]
 // APPROX-NEXT:    store i32 [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA12]]
-// APPROX-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// APPROX-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_remquo(
 // NCRDIV-NEXT:  entry:
 // NCRDIV-NEXT:    [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5)
-// NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract noundef double @__ocml_remquo_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract noundef double @__ocml_remquo_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // NCRDIV-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA13]]
 // NCRDIV-NEXT:    store i32 [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA13]]
-// NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_remquo(
@@ -5519,22 +5471,22 @@ extern "C" __device__ double test_remquo(double x, double y, int* z) {
 
 // DEFAULT-LABEL: @test_rhypotf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rhypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rhypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_rhypotf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rhypot_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rhypot_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_rhypotf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rhypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rhypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_rhypotf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rhypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rhypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_rhypotf(
@@ -5548,22 +5500,22 @@ extern "C" __device__ float test_rhypotf(float x, float y) {
 
 // DEFAULT-LABEL: @test_rhypot(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rhypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rhypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_rhypot(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rhypot_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rhypot_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_rhypot(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rhypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rhypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_rhypot(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rhypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rhypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_rhypot(
@@ -5650,7 +5602,7 @@ extern "C" __device__ double test_rint(double x) {
 // DEFAULT-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL6RNORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]]
 // DEFAULT:       _ZL6rnormfiPKf.exit:
 // DEFAULT-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[__R_0_I_LCSSA]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[__R_0_I_LCSSA]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_rnormf(
@@ -5670,7 +5622,7 @@ extern "C" __device__ double test_rint(double x) {
 // FINITEONLY-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL6RNORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]]
 // FINITEONLY:       _ZL6rnormfiPKf.exit:
 // FINITEONLY-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rsqrt_f32(float noundef nofpclass(nan inf) [[__R_0_I_LCSSA]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rsqrt_f32(float noundef nofpclass(nan inf) [[__R_0_I_LCSSA]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_rnormf(
@@ -5690,7 +5642,7 @@ extern "C" __device__ double test_rint(double x) {
 // APPROX-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL6RNORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]]
 // APPROX:       _ZL6rnormfiPKf.exit:
 // APPROX-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[__R_0_I_LCSSA]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[__R_0_I_LCSSA]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_rnormf(
@@ -5707,10 +5659,10 @@ extern "C" __device__ double test_rint(double x) {
 // NCRDIV-NEXT:    [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]]
 // NCRDIV-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4
 // NCRDIV-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// NCRDIV-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL6RNORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP24:![0-9]+]]
+// NCRDIV-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL6RNORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]]
 // NCRDIV:       _ZL6rnormfiPKf.exit:
 // NCRDIV-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[__R_0_I_LCSSA]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[__R_0_I_LCSSA]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_rnormf(
@@ -5754,7 +5706,7 @@ extern "C" __device__ float test_rnormf(int x, const float* y) {
 // DEFAULT-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5RNORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]]
 // DEFAULT:       _ZL5rnormiPKd.exit:
 // DEFAULT-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[__R_0_I_LCSSA]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[__R_0_I_LCSSA]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_rnorm(
@@ -5774,7 +5726,7 @@ extern "C" __device__ float test_rnormf(int x, const float* y) {
 // FINITEONLY-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5RNORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]]
 // FINITEONLY:       _ZL5rnormiPKd.exit:
 // FINITEONLY-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rsqrt_f64(double noundef nofpclass(nan inf) [[__R_0_I_LCSSA]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rsqrt_f64(double noundef nofpclass(nan inf) [[__R_0_I_LCSSA]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_rnorm(
@@ -5794,7 +5746,7 @@ extern "C" __device__ float test_rnormf(int x, const float* y) {
 // APPROX-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5RNORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]]
 // APPROX:       _ZL5rnormiPKd.exit:
 // APPROX-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[__R_0_I_LCSSA]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[__R_0_I_LCSSA]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_rnorm(
@@ -5811,10 +5763,10 @@ extern "C" __device__ float test_rnormf(int x, const float* y) {
 // NCRDIV-NEXT:    [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]]
 // NCRDIV-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8
 // NCRDIV-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// NCRDIV-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5RNORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP25:![0-9]+]]
+// NCRDIV-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5RNORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP24:![0-9]+]]
 // NCRDIV:       _ZL5rnormiPKd.exit:
 // NCRDIV-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[__R_0_I_LCSSA]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[__R_0_I_LCSSA]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_rnorm(
@@ -5843,22 +5795,22 @@ extern "C" __device__ double test_rnorm(int x, const double* y) {
 
 // DEFAULT-LABEL: @test_rnorm3df(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_rnorm3df(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rlen3_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rlen3_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_rnorm3df(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_rnorm3df(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_rnorm3df(
@@ -5872,22 +5824,22 @@ extern "C" __device__ float test_rnorm3df(float x, float y, float z) {
 
 // DEFAULT-LABEL: @test_rnorm3d(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_rnorm3d(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rlen3_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rlen3_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_rnorm3d(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_rnorm3d(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_rnorm3d(
@@ -5901,22 +5853,22 @@ extern "C" __device__ double test_rnorm3d(double x, double y, double z) {
 
 // DEFAULT-LABEL: @test_rnorm4df(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_rnorm4df(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rlen4_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]], float noundef nofpclass(nan inf) [[W:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rlen4_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]], float noundef nofpclass(nan inf) [[W:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_rnorm4df(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_rnorm4df(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_rnorm4df(
@@ -5930,22 +5882,22 @@ extern "C" __device__ float test_rnorm4df(float x, float y, float z, float w) {
 
 // DEFAULT-LABEL: @test_rnorm4d(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_rnorm4d(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rlen4_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]], double noundef nofpclass(nan inf) [[W:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rlen4_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]], double noundef nofpclass(nan inf) [[W:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_rnorm4d(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_rnorm4d(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_rnorm4d(
@@ -6017,22 +5969,22 @@ extern "C" __device__ double test_round(double x) {
 
 // DEFAULT-LABEL: @test_rsqrtf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_rsqrtf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rsqrt_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rsqrt_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_rsqrtf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_rsqrtf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_rsqrtf(
@@ -6046,22 +5998,22 @@ extern "C" __device__ float test_rsqrtf(float x) {
 
 // DEFAULT-LABEL: @test_rsqrt(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_rsqrt(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rsqrt_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rsqrt_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_rsqrt(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_rsqrt(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_rsqrt(
@@ -6246,45 +6198,45 @@ extern "C" __device__ BOOL_TYPE test___signbit(double x) {
 // DEFAULT-LABEL: @test_sincosf(
 // DEFAULT-NEXT:  entry:
 // DEFAULT-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5)
-// DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract float @__ocml_sincos_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract float @__ocml_sincos_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // DEFAULT-NEXT:    store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]]
 // DEFAULT-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA16]]
 // DEFAULT-NEXT:    store float [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]]
-// DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // DEFAULT-NEXT:    ret void
 //
 // FINITEONLY-LABEL: @test_sincosf(
 // FINITEONLY-NEXT:  entry:
 // FINITEONLY-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5)
-// FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) float @__ocml_sincos_f32(float noundef nofpclass(nan inf) [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) float @__ocml_sincos_f32(float noundef nofpclass(nan inf) [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]]
 // FINITEONLY-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA16]]
 // FINITEONLY-NEXT:    store float [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]]
-// FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // FINITEONLY-NEXT:    ret void
 //
 // APPROX-LABEL: @test_sincosf(
 // APPROX-NEXT:  entry:
 // APPROX-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5)
-// APPROX-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// APPROX-NEXT:    [[CALL_I:%.*]] = call contract float @__ocml_sincos_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// APPROX-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = call contract float @__ocml_sincos_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // APPROX-NEXT:    store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]]
 // APPROX-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA16]]
 // APPROX-NEXT:    store float [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]]
-// APPROX-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// APPROX-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // APPROX-NEXT:    ret void
 //
 // NCRDIV-LABEL: @test_sincosf(
 // NCRDIV-NEXT:  entry:
 // NCRDIV-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5)
-// NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract float @__ocml_sincos_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract float @__ocml_sincos_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // NCRDIV-NEXT:    store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA17]]
 // NCRDIV-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA17]]
 // NCRDIV-NEXT:    store float [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA17]]
-// NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // NCRDIV-NEXT:    ret void
 //
 // AMDGCNSPIRV-LABEL: @test_sincosf(
@@ -6306,45 +6258,45 @@ extern "C" __device__ void test_sincosf(float x, float *y, float *z) {
 // DEFAULT-LABEL: @test_sincos(
 // DEFAULT-NEXT:  entry:
 // DEFAULT-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5)
-// DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract double @__ocml_sincos_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract double @__ocml_sincos_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // DEFAULT-NEXT:    store double [[CALL_I]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA18]]
 // DEFAULT-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA18]]
 // DEFAULT-NEXT:    store double [[TMP0]], ptr [[Z:%.*]], align 8, !tbaa [[TBAA18]]
-// DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // DEFAULT-NEXT:    ret void
 //
 // FINITEONLY-LABEL: @test_sincos(
 // FINITEONLY-NEXT:  entry:
 // FINITEONLY-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5)
-// FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) double @__ocml_sincos_f64(double noundef nofpclass(nan inf) [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) double @__ocml_sincos_f64(double noundef nofpclass(nan inf) [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    store double [[CALL_I]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA18]]
 // FINITEONLY-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA18]]
 // FINITEONLY-NEXT:    store double [[TMP0]], ptr [[Z:%.*]], align 8, !tbaa [[TBAA18]]
-// FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // FINITEONLY-NEXT:    ret void
 //
 // APPROX-LABEL: @test_sincos(
 // APPROX-NEXT:  entry:
 // APPROX-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5)
-// APPROX-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// APPROX-NEXT:    [[CALL_I:%.*]] = call contract double @__ocml_sincos_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// APPROX-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = call contract double @__ocml_sincos_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // APPROX-NEXT:    store double [[CALL_I]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA18]]
 // APPROX-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA18]]
 // APPROX-NEXT:    store double [[TMP0]], ptr [[Z:%.*]], align 8, !tbaa [[TBAA18]]
-// APPROX-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// APPROX-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // APPROX-NEXT:    ret void
 //
 // NCRDIV-LABEL: @test_sincos(
 // NCRDIV-NEXT:  entry:
 // NCRDIV-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5)
-// NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract double @__ocml_sincos_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract double @__ocml_sincos_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // NCRDIV-NEXT:    store double [[CALL_I]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA19]]
 // NCRDIV-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA19]]
 // NCRDIV-NEXT:    store double [[TMP0]], ptr [[Z:%.*]], align 8, !tbaa [[TBAA19]]
-// NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // NCRDIV-NEXT:    ret void
 //
 // AMDGCNSPIRV-LABEL: @test_sincos(
@@ -6366,45 +6318,45 @@ extern "C" __device__ void test_sincos(double x, double *y, double *z) {
 // DEFAULT-LABEL: @test_sincospif(
 // DEFAULT-NEXT:  entry:
 // DEFAULT-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5)
-// DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract float @__ocml_sincospi_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract float @__ocml_sincospi_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // DEFAULT-NEXT:    store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]]
 // DEFAULT-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA16]]
 // DEFAULT-NEXT:    store float [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]]
-// DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // DEFAULT-NEXT:    ret void
 //
 // FINITEONLY-LABEL: @test_sincospif(
 // FINITEONLY-NEXT:  entry:
 // FINITEONLY-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5)
-// FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) float @__ocml_sincospi_f32(float noundef nofpclass(nan inf) [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) float @__ocml_sincospi_f32(float noundef nofpclass(nan inf) [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]]
 // FINITEONLY-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA16]]
 // FINITEONLY-NEXT:    store float [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]]
-// FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // FINITEONLY-NEXT:    ret void
 //
 // APPROX-LABEL: @test_sincospif(
 // APPROX-NEXT:  entry:
 // APPROX-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5)
-// APPROX-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// APPROX-NEXT:    [[CALL_I:%.*]] = call contract float @__ocml_sincospi_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// APPROX-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = call contract float @__ocml_sincospi_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // APPROX-NEXT:    store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]]
 // APPROX-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA16]]
 // APPROX-NEXT:    store float [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]]
-// APPROX-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// APPROX-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // APPROX-NEXT:    ret void
 //
 // NCRDIV-LABEL: @test_sincospif(
 // NCRDIV-NEXT:  entry:
 // NCRDIV-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5)
-// NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract float @__ocml_sincospi_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract float @__ocml_sincospi_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // NCRDIV-NEXT:    store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA17]]
 // NCRDIV-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA17]]
 // NCRDIV-NEXT:    store float [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA17]]
-// NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // NCRDIV-NEXT:    ret void
 //
 // AMDGCNSPIRV-LABEL: @test_sincospif(
@@ -6426,45 +6378,45 @@ extern "C" __device__ void test_sincospif(float x, float *y, float *z) {
 // DEFAULT-LABEL: @test_sincospi(
 // DEFAULT-NEXT:  entry:
 // DEFAULT-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5)
-// DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract double @__ocml_sincospi_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract double @__ocml_sincospi_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // DEFAULT-NEXT:    store double [[CALL_I]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA18]]
 // DEFAULT-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA18]]
 // DEFAULT-NEXT:    store double [[TMP0]], ptr [[Z:%.*]], align 8, !tbaa [[TBAA18]]
-// DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // DEFAULT-NEXT:    ret void
 //
 // FINITEONLY-LABEL: @test_sincospi(
 // FINITEONLY-NEXT:  entry:
 // FINITEONLY-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5)
-// FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) double @__ocml_sincospi_f64(double noundef nofpclass(nan inf) [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) double @__ocml_sincospi_f64(double noundef nofpclass(nan inf) [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    store double [[CALL_I]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA18]]
 // FINITEONLY-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA18]]
 // FINITEONLY-NEXT:    store double [[TMP0]], ptr [[Z:%.*]], align 8, !tbaa [[TBAA18]]
-// FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // FINITEONLY-NEXT:    ret void
 //
 // APPROX-LABEL: @test_sincospi(
 // APPROX-NEXT:  entry:
 // APPROX-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5)
-// APPROX-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// APPROX-NEXT:    [[CALL_I:%.*]] = call contract double @__ocml_sincospi_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// APPROX-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = call contract double @__ocml_sincospi_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // APPROX-NEXT:    store double [[CALL_I]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA18]]
 // APPROX-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA18]]
 // APPROX-NEXT:    store double [[TMP0]], ptr [[Z:%.*]], align 8, !tbaa [[TBAA18]]
-// APPROX-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// APPROX-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // APPROX-NEXT:    ret void
 //
 // NCRDIV-LABEL: @test_sincospi(
 // NCRDIV-NEXT:  entry:
 // NCRDIV-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5)
-// NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract double @__ocml_sincospi_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract double @__ocml_sincospi_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // NCRDIV-NEXT:    store double [[CALL_I]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA19]]
 // NCRDIV-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA19]]
 // NCRDIV-NEXT:    store double [[TMP0]], ptr [[Z:%.*]], align 8, !tbaa [[TBAA19]]
-// NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // NCRDIV-NEXT:    ret void
 //
 // AMDGCNSPIRV-LABEL: @test_sincospi(
@@ -6485,22 +6437,22 @@ extern "C" __device__ void test_sincospi(double x, double *y, double *z) {
 
 // DEFAULT-LABEL: @test_sinf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_sinf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_sin_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_sin_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_sinf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I1:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I1:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I1]]
 //
 // NCRDIV-LABEL: @test_sinf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_sinf(
@@ -6514,22 +6466,22 @@ extern "C" __device__ float test_sinf(float x) {
 
 // DEFAULT-LABEL: @test_sin(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sin_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sin_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_sin(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_sin_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_sin_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_sin(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sin_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sin_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_sin(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sin_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sin_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_sin(
@@ -6543,22 +6495,22 @@ extern "C" __device__ double test_sin(double x) {
 
 // DEFAULT-LABEL: @test_sinpif(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sinpi_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sinpi_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_sinpif(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_sinpi_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_sinpi_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_sinpif(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sinpi_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sinpi_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_sinpif(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sinpi_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sinpi_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_sinpif(
@@ -6572,22 +6524,22 @@ extern "C" __device__ float test_sinpif(float x) {
 
 // DEFAULT-LABEL: @test_sinpi(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sinpi_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sinpi_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_sinpi(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_sinpi_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_sinpi_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_sinpi(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sinpi_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sinpi_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_sinpi(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sinpi_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sinpi_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_sinpi(
@@ -6616,7 +6568,7 @@ extern "C" __device__ double test_sinpi(double x) {
 //
 // NCRDIV-LABEL: @test_sqrtf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.sqrt.f32(float [[X:%.*]]), !fpmath [[META22]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.sqrt.f32(float [[X:%.*]]), !fpmath [[META25:![0-9]+]]
 // NCRDIV-NEXT:    ret float [[TMP0]]
 //
 // AMDGCNSPIRV-LABEL: @test_sqrtf(
@@ -6659,22 +6611,22 @@ extern "C" __device__ double test_sqrt(double x) {
 
 // DEFAULT-LABEL: @test_tanf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tan_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tan_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_tanf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_tan_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_tan_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_tanf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tan_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tan_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_tanf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tan_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tan_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_tanf(
@@ -6688,22 +6640,22 @@ extern "C" __device__ float test_tanf(float x) {
 
 // DEFAULT-LABEL: @test_tan(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tan_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tan_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_tan(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_tan_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_tan_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_tan(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tan_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tan_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_tan(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tan_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tan_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_tan(
@@ -6717,22 +6669,22 @@ extern "C" __device__ double test_tan(double x) {
 
 // DEFAULT-LABEL: @test_tanhf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tanh_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tanh_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_tanhf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_tanh_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_tanh_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_tanhf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tanh_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tanh_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_tanhf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tanh_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tanh_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_tanhf(
@@ -6746,22 +6698,22 @@ extern "C" __device__ float test_tanhf(float x) {
 
 // DEFAULT-LABEL: @test_tanh(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tanh_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tanh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_tanh(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_tanh_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_tanh_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_tanh(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tanh_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tanh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_tanh(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tanh_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tanh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_tanh(
@@ -6775,22 +6727,22 @@ extern "C" __device__ double test_tanh(double x) {
 
 // DEFAULT-LABEL: @test_tgammaf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tgamma_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tgamma_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_tgammaf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_tgamma_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_tgamma_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_tgammaf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tgamma_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tgamma_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_tgammaf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tgamma_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tgamma_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_tgammaf(
@@ -6804,22 +6756,22 @@ extern "C" __device__ float test_tgammaf(float x) {
 
 // DEFAULT-LABEL: @test_tgamma(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tgamma_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tgamma_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_tgamma(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_tgamma_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_tgamma_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_tgamma(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tgamma_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tgamma_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_tgamma(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tgamma_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tgamma_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_tgamma(
@@ -6891,22 +6843,22 @@ extern "C" __device__ double test_trunc(double x) {
 
 // DEFAULT-LABEL: @test_y0f(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_y0f(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y0_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y0_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_y0f(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_y0f(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_y0f(
@@ -6920,22 +6872,22 @@ extern "C" __device__ float test_y0f(float x) {
 
 // DEFAULT-LABEL: @test_y0(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_y0(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y0_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y0_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_y0(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_y0(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_y0(
@@ -6949,22 +6901,22 @@ extern "C" __device__ double test_y0(double x) {
 
 // DEFAULT-LABEL: @test_y1f(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_y1f(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y1_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y1_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_y1f(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_y1f(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_y1f(
@@ -6978,22 +6930,22 @@ extern "C" __device__ float test_y1f(float x) {
 
 // DEFAULT-LABEL: @test_y1(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_y1(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y1_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y1_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_y1(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_y1(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_y1(
@@ -7012,14 +6964,14 @@ extern "C" __device__ double test_y1(double x) {
 // DEFAULT-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
 // DEFAULT-NEXT:    ]
 // DEFAULT:       if.then.i:
-// DEFAULT-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    br label [[_ZL3YNFIF_EXIT:%.*]]
 // DEFAULT:       if.then2.i:
-// DEFAULT-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR16]]
 // DEFAULT-NEXT:    br label [[_ZL3YNFIF_EXIT]]
 // DEFAULT:       if.end4.i:
-// DEFAULT-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR14]]
-// DEFAULT-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR16]]
+// DEFAULT-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR16]]
 // DEFAULT-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
 // DEFAULT-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3YNFIF_EXIT]]
 // DEFAULT:       for.body.i:
@@ -7045,14 +6997,14 @@ extern "C" __device__ double test_y1(double x) {
 // FINITEONLY-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
 // FINITEONLY-NEXT:    ]
 // FINITEONLY:       if.then.i:
-// FINITEONLY-NEXT:    [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y0_f32(float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y0_f32(float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    br label [[_ZL3YNFIF_EXIT:%.*]]
 // FINITEONLY:       if.then2.i:
-// FINITEONLY-NEXT:    [[CALL_I22_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y1_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I22_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y1_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    br label [[_ZL3YNFIF_EXIT]]
 // FINITEONLY:       if.end4.i:
-// FINITEONLY-NEXT:    [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y0_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
-// FINITEONLY-NEXT:    [[CALL_I21_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y1_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y0_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
+// FINITEONLY-NEXT:    [[CALL_I21_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y1_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
 // FINITEONLY-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3YNFIF_EXIT]]
 // FINITEONLY:       for.body.i:
@@ -7078,14 +7030,14 @@ extern "C" __device__ double test_y1(double x) {
 // APPROX-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
 // APPROX-NEXT:    ]
 // APPROX:       if.then.i:
-// APPROX-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    br label [[_ZL3YNFIF_EXIT:%.*]]
 // APPROX:       if.then2.i:
-// APPROX-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR16]]
 // APPROX-NEXT:    br label [[_ZL3YNFIF_EXIT]]
 // APPROX:       if.end4.i:
-// APPROX-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR14]]
-// APPROX-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR16]]
+// APPROX-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR16]]
 // APPROX-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
 // APPROX-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3YNFIF_EXIT]]
 // APPROX:       for.body.i:
@@ -7111,14 +7063,14 @@ extern "C" __device__ double test_y1(double x) {
 // NCRDIV-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
 // NCRDIV-NEXT:    ]
 // NCRDIV:       if.then.i:
-// NCRDIV-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    br label [[_ZL3YNFIF_EXIT:%.*]]
 // NCRDIV:       if.then2.i:
-// NCRDIV-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR16]]
 // NCRDIV-NEXT:    br label [[_ZL3YNFIF_EXIT]]
 // NCRDIV:       if.end4.i:
-// NCRDIV-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR14]]
-// NCRDIV-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR16]]
+// NCRDIV-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR16]]
 // NCRDIV-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
 // NCRDIV-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3YNFIF_EXIT]]
 // NCRDIV:       for.body.i:
@@ -7181,14 +7133,14 @@ extern "C" __device__ float test_ynf(int x, float y) {
 // DEFAULT-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
 // DEFAULT-NEXT:    ]
 // DEFAULT:       if.then.i:
-// DEFAULT-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    br label [[_ZL2YNID_EXIT:%.*]]
 // DEFAULT:       if.then2.i:
-// DEFAULT-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR16]]
 // DEFAULT-NEXT:    br label [[_ZL2YNID_EXIT]]
 // DEFAULT:       if.end4.i:
-// DEFAULT-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR14]]
-// DEFAULT-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR16]]
+// DEFAULT-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR16]]
 // DEFAULT-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
 // DEFAULT-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2YNID_EXIT]]
 // DEFAULT:       for.body.i:
@@ -7214,14 +7166,14 @@ extern "C" __device__ float test_ynf(int x, float y) {
 // FINITEONLY-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
 // FINITEONLY-NEXT:    ]
 // FINITEONLY:       if.then.i:
-// FINITEONLY-NEXT:    [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y0_f64(double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y0_f64(double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    br label [[_ZL2YNID_EXIT:%.*]]
 // FINITEONLY:       if.then2.i:
-// FINITEONLY-NEXT:    [[CALL_I22_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y1_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I22_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y1_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    br label [[_ZL2YNID_EXIT]]
 // FINITEONLY:       if.end4.i:
-// FINITEONLY-NEXT:    [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y0_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
-// FINITEONLY-NEXT:    [[CALL_I21_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y1_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y0_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
+// FINITEONLY-NEXT:    [[CALL_I21_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y1_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
 // FINITEONLY-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2YNID_EXIT]]
 // FINITEONLY:       for.body.i:
@@ -7247,14 +7199,14 @@ extern "C" __device__ float test_ynf(int x, float y) {
 // APPROX-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
 // APPROX-NEXT:    ]
 // APPROX:       if.then.i:
-// APPROX-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    br label [[_ZL2YNID_EXIT:%.*]]
 // APPROX:       if.then2.i:
-// APPROX-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR16]]
 // APPROX-NEXT:    br label [[_ZL2YNID_EXIT]]
 // APPROX:       if.end4.i:
-// APPROX-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR14]]
-// APPROX-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR16]]
+// APPROX-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR16]]
 // APPROX-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
 // APPROX-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2YNID_EXIT]]
 // APPROX:       for.body.i:
@@ -7280,14 +7232,14 @@ extern "C" __device__ float test_ynf(int x, float y) {
 // NCRDIV-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
 // NCRDIV-NEXT:    ]
 // NCRDIV:       if.then.i:
-// NCRDIV-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    br label [[_ZL2YNID_EXIT:%.*]]
 // NCRDIV:       if.then2.i:
-// NCRDIV-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR16]]
 // NCRDIV-NEXT:    br label [[_ZL2YNID_EXIT]]
 // NCRDIV:       if.end4.i:
-// NCRDIV-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR14]]
-// NCRDIV-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR16]]
+// NCRDIV-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR16]]
 // NCRDIV-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
 // NCRDIV-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2YNID_EXIT]]
 // NCRDIV:       for.body.i:
@@ -7345,22 +7297,22 @@ extern "C" __device__ double test_yn(int x, double y) {
 
 // DEFAULT-LABEL: @test___cosf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test___cosf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_cos_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_cos_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test___cosf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test___cosf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test___cosf(
@@ -7616,22 +7568,22 @@ extern "C" __device__ float test___frsqrt_rn(float x) {
 
 // DEFAULT-LABEL: @test___fsqrt_rn(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sqrt_f32(float noundef [[X:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sqrt_f32(float noundef [[X:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test___fsqrt_rn(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_sqrt_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_sqrt_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test___fsqrt_rn(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sqrt_f32(float noundef [[X:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sqrt_f32(float noundef [[X:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test___fsqrt_rn(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sqrt_f32(float noundef [[X:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sqrt_f32(float noundef [[X:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test___fsqrt_rn(
@@ -7761,22 +7713,22 @@ extern "C" __device__ float test___logf(float x) {
 
 // DEFAULT-LABEL: @test___powf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test___powf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_pow_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_pow_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test___powf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test___powf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test___powf(
@@ -7834,33 +7786,33 @@ extern "C" __device__ float test___saturatef(float x) {
 
 // DEFAULT-LABEL: @test___sincosf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]]
-// DEFAULT-NEXT:    [[CALL1_I:%.*]] = tail call contract float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL1_I:%.*]] = tail call contract float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]]
 // DEFAULT-NEXT:    store float [[CALL1_I]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]]
 // DEFAULT-NEXT:    ret void
 //
 // FINITEONLY-LABEL: @test___sincosf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract nofpclass(nan inf) float @__ocml_native_sin_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract nofpclass(nan inf) float @__ocml_native_sin_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]]
-// FINITEONLY-NEXT:    [[CALL1_I:%.*]] = tail call nnan ninf contract nofpclass(nan inf) float @__ocml_native_cos_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL1_I:%.*]] = tail call nnan ninf contract nofpclass(nan inf) float @__ocml_native_cos_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    store float [[CALL1_I]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]]
 // FINITEONLY-NEXT:    ret void
 //
 // APPROX-LABEL: @test___sincosf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]]
-// APPROX-NEXT:    [[CALL1_I:%.*]] = tail call contract float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL1_I:%.*]] = tail call contract float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]]
 // APPROX-NEXT:    store float [[CALL1_I]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]]
 // APPROX-NEXT:    ret void
 //
 // NCRDIV-LABEL: @test___sincosf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA17]]
-// NCRDIV-NEXT:    [[CALL1_I:%.*]] = tail call contract float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL1_I:%.*]] = tail call contract float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]]
 // NCRDIV-NEXT:    store float [[CALL1_I]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA17]]
 // NCRDIV-NEXT:    ret void
 //
@@ -7878,22 +7830,22 @@ extern "C" __device__ void test___sincosf(float x, float *y, float *z) {
 
 // DEFAULT-LABEL: @test___sinf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test___sinf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_sin_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_sin_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test___sinf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test___sinf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test___sinf(
@@ -7907,32 +7859,32 @@ extern "C" __device__ float test___sinf(float x) {
 
 // DEFAULT-LABEL: @test___tanf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I3_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]]
-// DEFAULT-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I3_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// DEFAULT-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]]
 // DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.amdgcn.rcp.f32(float [[CALL_I_I]])
 // DEFAULT-NEXT:    [[MUL_I:%.*]] = fmul contract float [[CALL_I3_I]], [[TMP0]]
 // DEFAULT-NEXT:    ret float [[MUL_I]]
 //
 // FINITEONLY-LABEL: @test___tanf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I3_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_sin_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
-// FINITEONLY-NEXT:    [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_cos_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I3_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_sin_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
+// FINITEONLY-NEXT:    [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_cos_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract float @llvm.amdgcn.rcp.f32(float [[CALL_I_I]])
 // FINITEONLY-NEXT:    [[MUL_I:%.*]] = fmul nnan ninf contract float [[CALL_I3_I]], [[TMP0]]
 // FINITEONLY-NEXT:    ret float [[MUL_I]]
 //
 // APPROX-LABEL: @test___tanf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I3_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]]
-// APPROX-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I3_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// APPROX-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]]
 // APPROX-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.amdgcn.rcp.f32(float [[CALL_I_I]])
 // APPROX-NEXT:    [[MUL_I:%.*]] = fmul contract float [[CALL_I3_I]], [[TMP0]]
 // APPROX-NEXT:    ret float [[MUL_I]]
 //
 // NCRDIV-LABEL: @test___tanf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I3_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]]
-// NCRDIV-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I3_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// NCRDIV-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]]
 // NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.amdgcn.rcp.f32(float [[CALL_I_I]])
 // NCRDIV-NEXT:    [[MUL_I:%.*]] = fmul contract float [[CALL_I3_I]], [[TMP0]]
 // NCRDIV-NEXT:    ret float [[MUL_I]]
diff --git a/clang/test/Headers/__cpuidex_conflict.c b/clang/test/Headers/__cpuidex_conflict.c
index 8687a6a..74f4532 100644
--- a/clang/test/Headers/__cpuidex_conflict.c
+++ b/clang/test/Headers/__cpuidex_conflict.c
@@ -1,16 +1,17 @@
 // Make sure that __cpuidex in cpuid.h doesn't conflict with the MS
 // extensions built in by ensuring compilation succeeds:
-// RUN: %clang_cc1 %s -ffreestanding -fms-extensions -fms-compatibility \
-// RUN:  -fms-compatibility-version=19.00 -triple x86_64-pc-windows-msvc -emit-llvm -o -
-// %clang_cc1 %s -ffreestanding -triple x86_64-w64-windows-gnu -fms-extensions -emit-llvm -o -
-// RUN: %clang_cc1 %s -ffreestanding -fopenmp -fopenmp-is-target-device -aux-triple x86_64-unknown-linux-gnu
+// RUN: %clang_cc1 %s -DIS_STATIC="" -ffreestanding -fms-extensions -fms-compatibility -fms-compatibility-version=19.00 -triple x86_64-pc-windows-msvc -emit-llvm -o -
+// RUN: %clang_cc1 %s -DIS_STATIC="" -ffreestanding -triple x86_64-w64-windows-gnu -fms-extensions -emit-llvm -o -
+
+// Ensure that we do not run into conflicts when offloading.
+// RUN: %clang_cc1 %s -DIS_STATIC=static -ffreestanding -fopenmp -fopenmp-is-target-device -aux-triple x86_64-unknown-linux-gnu
 
 typedef __SIZE_TYPE__ size_t;
 
 // We declare __cpuidex here as where the buitlin should be exposed (MSVC), the
 // declaration is in <intrin.h>, but <intrin.h> is not available from all the
 // targets that are being tested here.
-void __cpuidex (int[4], int, int);
+IS_STATIC void __cpuidex (int[4], int, int);
 
 #include <cpuid.h>
 
@@ -19,4 +20,3 @@ int cpuid_info[4];
 void test_cpuidex(unsigned level, unsigned count) {
   __cpuidex(cpuid_info, level, count);
 }
-
diff --git a/clang/test/Interpreter/fail.cpp b/clang/test/Interpreter/fail.cpp
index 633d927..4963df8 100644
--- a/clang/test/Interpreter/fail.cpp
+++ b/clang/test/Interpreter/fail.cpp
@@ -18,4 +18,11 @@ extern "C" int printf(const char *, ...);
 int i = 42;
 auto r1 = printf("i = %d\n", i);
 // CHECK: i = 42
+
+1aap = 42; // expected-error {{invalid digit 'a' in decimal constant}}
+1aap = 42; i = 5; // expected-error {{invalid digit 'a' in decimal constant}}
+
+printf("i = %d\n", i);
+// CHECK: i = 42
+
 %quit
diff --git a/clang/test/Interpreter/pretty-print.c b/clang/test/Interpreter/pretty-print.c
index 56488a1..e1408c0 100644
--- a/clang/test/Interpreter/pretty-print.c
+++ b/clang/test/Interpreter/pretty-print.c
@@ -3,7 +3,7 @@
 // RUN: cat %s | clang-repl -Xcc -xc  | FileCheck %s
 // RUN: cat %s | clang-repl -Xcc -std=c++11 | FileCheck %s
 
-// UNSUPPORTED: hwasan
+// UNSUPPORTED: hwasan, msan
 
 
 char c = 'a'; c
diff --git a/clang/test/Interpreter/pretty-print.cpp b/clang/test/Interpreter/pretty-print.cpp
index 612c0bc..e1036ab 100644
--- a/clang/test/Interpreter/pretty-print.cpp
+++ b/clang/test/Interpreter/pretty-print.cpp
@@ -1,6 +1,7 @@
 // RUN: clang-repl "int i = 10;" 'extern "C" int printf(const char*,...);' \
 // RUN:            'auto r1 = printf("i = %d\n", i);' | FileCheck --check-prefix=CHECK-DRIVER %s
-// UNSUPPORTED: system-aix, system-zos, asan
+// The test is flaky with asan https://github.com/llvm/llvm-project/pull/148701.
+// UNSUPPORTED: system-aix, asan
 // CHECK-DRIVER: i = 10
 // RUN: cat %s | clang-repl -Xcc -std=c++11 -Xcc -fno-delayed-template-parsing | FileCheck %s
 extern "C" int printf(const char*,...);
diff --git a/clang/test/Layout/ms-no-unique-address.cpp b/clang/test/Layout/ms-no-unique-address.cpp
index 51cfd9a..fd92056 100644
--- a/clang/test/Layout/ms-no-unique-address.cpp
+++ b/clang/test/Layout/ms-no-unique-address.cpp
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -std=c++2a -fsyntax-only -triple x86_64-windows-msvc -fms-compatibility -fdump-record-layouts %s | FileCheck %s
+// RUN: %clang_cc1 -std=c++2a -fsyntax-only -triple x86_64-uefi -fms-compatibility -fdump-record-layouts %s | FileCheck %s
 
 namespace Empty {
   struct A {};
diff --git a/clang/test/Misc/time-passes.c b/clang/test/Misc/time-passes.c
index 370f52e..6afbcd4 100644
--- a/clang/test/Misc/time-passes.c
+++ b/clang/test/Misc/time-passes.c
@@ -19,6 +19,12 @@
 // RUN:     -ftime-report-json %s -o /dev/null \
 // RUN:     -mllvm -info-output-file=%t
 // RUN: cat %t | FileCheck %s --check-prefixes=JSON
+// Check that -stats-file-timers only outputs pass time info in the stats file
+// and not stderr.
+// RUN: %clang_cc1 -emit-obj -O1 \
+// RUN:     %s -o /dev/null \
+// RUN:     -stats-file=%t -stats-file-timers 2>&1 | count 0
+// RUN: FileCheck %s -input-file=%t -check-prefixes=JSON
 
 // TIME: Pass execution timing report
 // TIME: Total Execution Time:
diff --git a/clang/test/Modules/Exposure-2.cppm b/clang/test/Modules/Exposure-2.cppm
new file mode 100644
index 0000000..c09b739
--- /dev/null
+++ b/clang/test/Modules/Exposure-2.cppm
@@ -0,0 +1,27 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+//
+// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/A.cppm -o %t/A.pcm
+// RUN: %clang_cc1 -std=c++20 %t/A.cpp -fmodule-file=A=%t/A.pcm -fsyntax-only -verify
+
+//--- A.cppm
+export module A;
+export template <class T>
+class C {};
+
+export template <class T>
+void foo() {
+    C<T> value;
+    (void) value;
+}
+
+//--- A.cpp
+// expected-no-diagnostics
+import A;
+namespace {
+class Local {};
+}
+void test() {
+    foo<Local>();
+}
diff --git a/clang/test/Modules/Exposure.cppm b/clang/test/Modules/Exposure.cppm
new file mode 100644
index 0000000..651a89e
--- /dev/null
+++ b/clang/test/Modules/Exposure.cppm
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -std=c++20 %s -verify -fsyntax-only
+export module M;
+namespace {
+class TULocalClass {};
+}
+
+template <typename T>
+class Templ {};
+
+class C {
+    TULocalClass foo() { return TULocalClass(); } // expected-warning {{TU local entity 'TULocalClass' is exposed}}
+private:
+    TULocalClass Member; // expected-warning {{TU local entity 'TULocalClass' is exposed}}
+};
+
+static inline void its() {}
+template<int> void g() { its(); }
+
+void f0() {
+    g<1>();
+}
+
+inline void f1() {
+    g<1>(); // expected-warning {{TU local entity 'g<1>' is exposed}}
+}
diff --git a/clang/test/Modules/mingw-exceptions.cppm b/clang/test/Modules/mingw-exceptions.cppm
index db7aa2c..be9d61d 100644
--- a/clang/test/Modules/mingw-exceptions.cppm
+++ b/clang/test/Modules/mingw-exceptions.cppm
@@ -1,5 +1,6 @@
 // REQUIRES: x86-registered-target
-// RUN: %clang -target x86_64-windows-gnu -x c++-module -std=gnu++23 -c -o /dev/null -Xclang -disable-llvm-passes %s
+// RUN: %clang -target x86_64-windows-gnu -x c++-module -std=gnu++23 -fno-modules-reduced-bmi \
+// RUN:     -c -o /dev/null -Xclang -disable-llvm-passes %s
 
 // Make sure the command succeeds and doesn't break on the -exception-model flag in cc1.
 export module empty;
diff --git a/clang/test/Modules/specializations-lazy-load-parentmap-crash.cpp b/clang/test/Modules/specializations-lazy-load-parentmap-crash.cpp
new file mode 100644
index 0000000..bd07ada
--- /dev/null
+++ b/clang/test/Modules/specializations-lazy-load-parentmap-crash.cpp
@@ -0,0 +1,99 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file --leading-lines %s %t
+//
+// Prepare the BMIs.
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -emit-module-interface -o %t/mod_a-part1.pcm %t/mod_a-part1.cppm
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -emit-module-interface -o %t/mod_a-part2.pcm %t/mod_a-part2.cppm
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -emit-module-interface -o %t/mod_a.pcm %t/mod_a.cppm -fmodule-file=mod_a:part2=%t/mod_a-part2.pcm -fmodule-file=mod_a:part1=%t/mod_a-part1.pcm
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -emit-module-interface -o %t/mod_b.pcm %t/mod_b.cppm -fmodule-file=mod_a:part2=%t/mod_a-part2.pcm -fmodule-file=mod_a=%t/mod_a.pcm -fmodule-file=mod_a:part1=%t/mod_a-part1.pcm
+
+// Below are two examples to trigger the construction of the parent map (which is necessary to trigger the bug this regression test is for).
+// Using ArrayBoundV2 checker:
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -analyze -analyzer-checker=security,alpha.security -analyzer-output=text %t/test-array-bound-v2.cpp -fmodule-file=mod_a:part2=%t/mod_a-part2.pcm -fmodule-file=mod_a=%t/mod_a.pcm -fmodule-file=mod_a:part1=%t/mod_a-part1.pcm -fmodule-file=mod_b=%t/mod_b.pcm
+// Using a sanitized build:
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fsanitize=unsigned-integer-overflow -fsanitize-undefined-ignore-overflow-pattern=all -emit-llvm -o %t/ignored %t/test-sanitized-build.cpp -fmodule-file=mod_a:part2=%t/mod_a-part2.pcm -fmodule-file=mod_a=%t/mod_a.pcm -fmodule-file=mod_a:part1=%t/mod_a-part1.pcm -fmodule-file=mod_b=%t/mod_b.pcm
+
+//--- mod_a-part1.cppm
+module;
+namespace mod_a {
+template <int> struct Important;
+}
+
+namespace mod_a {
+Important<0>& instantiate1();
+} // namespace mod_a
+export module mod_a:part1;
+
+export namespace mod_a {
+using ::mod_a::instantiate1;
+}
+
+//--- mod_a-part2.cppm
+module;
+namespace mod_a {
+template <int> struct Important;
+}
+
+namespace mod_a {
+template <int N> Important<N>& instantiate2();
+namespace part2InternalInstantiations {
+// During the construction of the parent map, we iterate over ClassTemplateDecl::specializations() for 'Important'.
+// After GH119333, the following instantiations get loaded between the call to spec_begin() and spec_end().
+// This used to invalidate the begin iterator returned by spec_begin() by the time the end iterator is returned.
+// This is a regression test for that.
+Important<1> fn1();
+Important<2> fn2();
+Important<3> fn3();
+Important<4> fn4();
+Important<5> fn5();
+Important<6> fn6();
+Important<7> fn7();
+Important<8> fn8();
+Important<9> fn9();
+Important<10> fn10();
+Important<11> fn11();
+}
+} // namespace mod_a
+export module mod_a:part2;
+
+export namespace mod_a {
+using ::mod_a::instantiate2;
+}
+
+//--- mod_a.cppm
+export module mod_a;
+export import :part1;
+export import :part2;
+
+//--- mod_b.cppm
+export module mod_b;
+import mod_a;
+
+void a() {
+  mod_a::instantiate1();
+  mod_a::instantiate2<42>();
+}
+
+//--- test-array-bound-v2.cpp
+import mod_b;
+
+extern void someFunc(char* first, char* last);
+void triggerParentMapContextCreationThroughArrayBoundV2() {
+  // This code currently causes the ArrayBoundV2 checker to create the ParentMapContext.
+  // Once it detects an access to buf[100], the checker looks through the parents to find '&' operator.
+  // (this is needed since taking the address of past-the-end pointer is allowed by the checker)
+  char buf[100];
+  someFunc(&buf[0], &buf[100]);
+}
+
+//--- test-sanitized-build.cpp
+import mod_b;
+
+extern void some();
+void triggerParentMapContextCreationThroughSanitizedBuild(unsigned i) {
+  // This code currently causes UBSan to create the ParentMapContext.
+  // UBSan currently excludes the pattern below to avoid noise, and it relies on ParentMapContext to detect it.
+  while (i--)
+    some();
+}
diff --git a/clang/test/OpenMP/copy-gaps-1.cpp b/clang/test/OpenMP/copy-gaps-1.cpp
new file mode 100644
index 0000000..3d4fae3
--- /dev/null
+++ b/clang/test/OpenMP/copy-gaps-1.cpp
@@ -0,0 +1,52 @@
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+
+struct S {
+  int x;
+  int y;
+  int z;
+  int *p1;
+  int *p2;
+};
+
+struct T : public S {
+  int a;
+  int b;
+  int c;
+};
+
+int main() {
+  T v;
+
+#pragma omp target map(tofrom: v, v.x, v.y, v.z, v.p1[:8], v.a, v.b, v.c)
+  {
+    v.x++;
+    v.y += 2;
+    v.z += 3;
+    v.p1[0] += 4;
+    v.a += 7;
+    v.b += 5;
+    v.c += 6;
+  }
+
+  return 0;
+}
+
+// CHECK: [[CSTSZ:@.+]] = private {{.*}}constant [10 x i64] [i64 0, i64 0, i64 0, i64 4, i64 4, i64 4, i64 32, i64 4, i64 4, i64 4]
+// CHECK: [[CSTTY:@.+]] = private {{.*}}constant [10 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000013]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]]]
+
+// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]])
+// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
+// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8
+// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [10 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0
+
+// Check for filling of four non-constant size elements here: the whole struct
+// size, the (padded) region covering p1 & p2, and the padding at the end of
+// struct T.
+
+// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [10 x i64], ptr [[SIZES]], i32 0, i32 0
+// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8
+// CHECK-DAG: [[P1P2:%.+]] = getelementptr inbounds [10 x i64], ptr [[SIZES]], i32 0, i32 1
+// CHECK-DAG: store i64 %{{.+}}, ptr [[P1P2]], align 8
+// CHECK-DAG: [[PAD:%.+]] = getelementptr inbounds [10 x i64], ptr [[SIZES]], i32 0, i32 2
+// CHECK-DAG: store i64 %{{.+}}, ptr [[PAD]], align 8
diff --git a/clang/test/OpenMP/copy-gaps-2.cpp b/clang/test/OpenMP/copy-gaps-2.cpp
new file mode 100644
index 0000000..5bf603a
--- /dev/null
+++ b/clang/test/OpenMP/copy-gaps-2.cpp
@@ -0,0 +1,52 @@
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+
+struct S {
+  int x;
+  int y;
+  int z;
+};
+
+struct M : public S {
+  int mid;
+};
+
+struct T : public M {
+  int a;
+  int b;
+  int c;
+};
+
+int main() {
+  T v;
+
+#pragma omp target map(tofrom: v, v.y, v.z, v.a)
+  {
+    v.y++;
+    v.z += 2;
+    v.a += 3;
+    v.mid += 5;
+  }
+
+  return 0;
+}
+
+// CHECK: [[CSTSZ:@.+]] = private {{.*}}constant [7 x i64] [i64 0, i64 0, i64 0, i64 0, i64 4, i64 4, i64 4]
+// CHECK: [[CSTTY:@.+]] = private {{.*}}constant [7 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]]]
+
+// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]])
+// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
+// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8
+// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [7 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0
+
+// Fill four non-constant size elements here: the whole struct size, the region
+// covering v.x, the region covering v.mid and the region covering v.b and v.c.
+
+// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 0
+// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8
+// CHECK-DAG: [[X:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 1
+// CHECK-DAG: store i64 %{{.+}}, ptr [[X]], align 8
+// CHECK-DAG: [[MID:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 2
+// CHECK-DAG: store i64 %{{.+}}, ptr [[MID]], align 8
+// CHECK-DAG: [[BC:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 3
+// CHECK-DAG: store i64 %{{.+}}, ptr [[BC]], align 8
diff --git a/clang/test/OpenMP/copy-gaps-3.cpp b/clang/test/OpenMP/copy-gaps-3.cpp
new file mode 100644
index 0000000..5febb18
--- /dev/null
+++ b/clang/test/OpenMP/copy-gaps-3.cpp
@@ -0,0 +1,46 @@
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+
+struct S {
+  int x;
+  int y;
+  int z;
+};
+
+struct T : public S {
+  int a;
+  int b;
+  int c;
+};
+
+int main() {
+  T v;
+
+  // This one should have no gap between v.z & v.a.
+#pragma omp target map(tofrom: v, v.y, v.z, v.a)
+  {
+    v.y++;
+    v.z += 2;
+    v.a += 3;
+  }
+
+  return 0;
+}
+
+// CHECK: [[CSTSZ:@.+]] = private {{.*}}constant [6 x i64] [i64 0, i64 0, i64 0, i64 4, i64 4, i64 4]
+// CHECK: [[CSTTY:@.+]] = private {{.*}}constant [6 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]]]
+
+// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]])
+// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
+// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8
+// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [6 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0
+
+// Fill three non-constant size elements here: the whole struct size, the region
+// covering v.x, and the region covering v.b and v.c.
+
+// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [6 x i64], ptr [[SIZES]], i32 0, i32 0
+// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8
+// CHECK-DAG: [[X:%.+]] = getelementptr inbounds [6 x i64], ptr [[SIZES]], i32 0, i32 1
+// CHECK-DAG: store i64 %{{.+}}, ptr [[X]], align 8
+// CHECK-DAG: [[BC:%.+]] = getelementptr inbounds [6 x i64], ptr [[SIZES]], i32 0, i32 2
+// CHECK-DAG: store i64 %{{.+}}, ptr [[BC]], align 8
diff --git a/clang/test/OpenMP/copy-gaps-4.cpp b/clang/test/OpenMP/copy-gaps-4.cpp
new file mode 100644
index 0000000..7060fe3
--- /dev/null
+++ b/clang/test/OpenMP/copy-gaps-4.cpp
@@ -0,0 +1,48 @@
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+
+struct S {
+  int x;
+  int y;
+  char z; // Hidden padding after here...
+};
+
+struct T : public S {
+  int a;
+  int b;
+  int c;
+};
+
+int main() {
+  T v;
+
+#pragma omp target map(tofrom: v, v.y, v.z, v.a)
+  {
+    v.y++;
+    v.z += 2;
+    v.a += 3;
+  }
+
+  return 0;
+}
+
+// CHECK: [[CSTSZ:@.+]] = private {{.*}}constant [7 x i64] [i64 0, i64 0, i64 0, i64 0, i64 4, i64 1, i64 4]
+// CHECK: [[CSTTY:@.+]] = private {{.*}}constant [7 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]]]
+
+// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]])
+// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
+// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8
+// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [7 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0
+
+// Fill four non-constant size elements here: the whole struct size, the region
+// covering v.x, the region covering padding after v.z and the region covering
+// v.b and v.c.
+
+// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 0
+// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8
+// CHECK-DAG: [[X:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 1
+// CHECK-DAG: store i64 %{{.+}}, ptr [[X]], align 8
+// CHECK-DAG: [[PAD:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 2
+// CHECK-DAG: store i64 %{{.+}}, ptr [[PAD]], align 8
+// CHECK-DAG: [[BC:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 3
+// CHECK-DAG: store i64 %{{.+}}, ptr [[BC]], align 8
diff --git a/clang/test/OpenMP/copy-gaps-5.cpp b/clang/test/OpenMP/copy-gaps-5.cpp
new file mode 100644
index 0000000..fae675d
--- /dev/null
+++ b/clang/test/OpenMP/copy-gaps-5.cpp
@@ -0,0 +1,50 @@
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+
+template<typename C>
+struct S {
+  C x;
+  C y;
+  char z; // Hidden padding after here...
+};
+
+template<typename C>
+struct T : public S<C> {
+  C a;
+  C b;
+  C c;
+};
+
+int main() {
+  T<int> v;
+
+#pragma omp target map(tofrom: v, v.y, v.z, v.a)
+  {
+    v.y++;
+    v.z += 2;
+    v.a += 3;
+  }
+
+  return 0;
+}
+
+// CHECK: [[CSTSZ:@.+]] = private {{.*}}constant [7 x i64] [i64 0, i64 0, i64 0, i64 0, i64 4, i64 1, i64 4]
+// CHECK: [[CSTTY:@.+]] = private {{.*}}constant [7 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]]]
+
+// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]])
+// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
+// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8
+// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [7 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0
+
+// Fill four non-constant size elements here: the whole struct size, the region
+// covering v.x, the region covering padding after v.z and the region covering
+// v.b and v.c.
+
+// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 0
+// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8
+// CHECK-DAG: [[X:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 1
+// CHECK-DAG: store i64 %{{.+}}, ptr [[X]], align 8
+// CHECK-DAG: [[PAD:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 2
+// CHECK-DAG: store i64 %{{.+}}, ptr [[PAD]], align 8
+// CHECK-DAG: [[BC:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 3
+// CHECK-DAG: store i64 %{{.+}}, ptr [[BC]], align 8
diff --git a/clang/test/OpenMP/copy-gaps-6.cpp b/clang/test/OpenMP/copy-gaps-6.cpp
new file mode 100644
index 0000000..9c62fde
--- /dev/null
+++ b/clang/test/OpenMP/copy-gaps-6.cpp
@@ -0,0 +1,87 @@
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+
+struct S {
+  int x;
+  int *arr;
+  int y;
+  int z;
+};
+
+int main() {
+  S v;
+
+#pragma omp target map(tofrom: v, v.x, v.z)
+  {
+    v.x++;
+    v.y += 2;
+    v.z += 3;
+  }
+
+#pragma omp target map(tofrom: v, v.x, v.arr[:1])
+  {
+    v.x++;
+    v.y += 2;
+    v.arr[0] += 2;
+    v.z += 4;
+  }
+
+#pragma omp target map(tofrom: v, v.arr[:1])
+  {
+    v.x++;
+    v.y += 2;
+    v.arr[0] += 2;
+    v.z += 4;
+  }
+
+  return 0;
+}
+
+// CHECK: [[CSTSZ0:@.+]] = private {{.*}}constant [4 x i64] [i64 0, i64 0, i64 4, i64 4]
+// CHECK: [[CSTTY0:@.+]] = private {{.*}}constant [4 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]]]
+
+// CHECK: [[CSTSZ1:@.+]] = private {{.*}}constant [4 x i64] [i64 0, i64 0, i64 4, i64 4]
+// CHECK: [[CSTTY1:@.+]] = private {{.*}}constant [4 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000013]]]
+
+// CHECK: [[CSTSZ2:@.+]] = private {{.*}}constant [3 x i64] [i64 0, i64 24, i64 4]
+// CHECK: [[CSTTY2:@.+]] = private {{.*}}constant [3 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000013]]]
+
+// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]])
+// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
+// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8
+// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [4 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0
+
+// Fill two non-constant size elements here: the whole struct size, and the
+// region covering v.arr and v.y.
+
+// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [4 x i64], ptr [[SIZES]], i32 0, i32 0
+// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8
+// CHECK-DAG: [[ARRY:%.+]] = getelementptr inbounds [4 x i64], ptr [[SIZES]], i32 0, i32 1
+// CHECK-DAG: store i64 %{{.+}}, ptr [[ARRY]], align 8
+
+// CHECK: call void
+
+// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]])
+// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
+// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8
+// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [4 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0
+
+// Fill two non-constant size elements here: the whole struct size, and the
+// region covering v.arr, v.y and v.z.
+
+// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [4 x i64], ptr [[SIZES]], i32 0, i32 0
+// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8
+// CHECK-DAG: [[ARRYZ:%.+]] = getelementptr inbounds [4 x i64], ptr [[SIZES]], i32 0, i32 1
+// CHECK-DAG: store i64 %{{.+}}, ptr [[ARRYZ]], align 8
+
+// CHECK: call void
+
+// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]])
+// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
+// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8
+// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [3 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0
+
+// Fill one non-constant size element here: the whole struct size.
+
+// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [3 x i64], ptr [[SIZES]], i32 0, i32 0
+// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8
diff --git a/clang/test/OpenMP/declare_variant_clauses_ast_print.cpp b/clang/test/OpenMP/declare_variant_clauses_ast_print.cpp
index c14e19c..e98a23e 100644
--- a/clang/test/OpenMP/declare_variant_clauses_ast_print.cpp
+++ b/clang/test/OpenMP/declare_variant_clauses_ast_print.cpp
@@ -38,11 +38,11 @@
 #ifndef HEADER
 #define HEADER
 
-void foo_v1(float *AAA, float *BBB, int *I) {return;}
-void foo_v2(float *AAA, float *BBB, int *I) {return;}
-void foo_v3(float *AAA, float *BBB, int *I) {return;}
+void foo_v1(float *AAA, float *BBB, int &CCC, int *I) {return;}
+void foo_v2(float *AAA, float *BBB, int &CCC, int *I) {return;}
+void foo_v3(float *AAA, float *BBB, int &CCC, int *I) {return;}
 
-//DUMP: FunctionDecl{{.*}} foo 'void (float *, float *, int *)'
+//DUMP: FunctionDecl{{.*}} foo 'void (float *, float *, int &, int *)'
 //DUMP: OMPDeclareVariantAttr{{.*}}device={arch(x86, x86_64)}
 //DUMP: DeclRefExpr{{.*}}Function{{.*}}foo_v3
 //DUMP: DeclRefExpr{{.*}}ParmVar{{.*}}'I'
@@ -54,9 +54,9 @@ void foo_v3(float *AAA, float *BBB, int *I) {return;}
 //DUMP: DeclRefExpr{{.*}}Function{{.*}}foo_v1
 //DUMP: DeclRefExpr{{.*}}ParmVar{{.*}}'AAA'
 //DUMP: DeclRefExpr{{.*}}ParmVar{{.*}}'BBB'
-//PRINT: #pragma omp declare variant(foo_v3) match(construct={dispatch}, device={arch(x86, x86_64)}) adjust_args(nothing:I) adjust_args(need_device_ptr:BBB) adjust_args(need_device_addr:AAA)
+//PRINT: #pragma omp declare variant(foo_v3) match(construct={dispatch}, device={arch(x86, x86_64)}) adjust_args(nothing:I) adjust_args(need_device_ptr:BBB) adjust_args(need_device_addr:CCC)
 
-//PRINT: #pragma omp declare variant(foo_v2) match(construct={dispatch}, device={arch(ppc)}) adjust_args(need_device_ptr:AAA) adjust_args(need_device_addr:BBB)
+//PRINT: #pragma omp declare variant(foo_v2) match(construct={dispatch}, device={arch(ppc)}) adjust_args(need_device_ptr:AAA) adjust_args(need_device_addr:CCC)
 
 //PRINT: omp declare variant(foo_v1) match(construct={dispatch}, device={arch(arm)}) adjust_args(need_device_ptr:AAA,BBB)
 
@@ -67,33 +67,33 @@ void foo_v3(float *AAA, float *BBB, int *I) {return;}
 #pragma omp declare variant(foo_v2)                        \
    match(construct={dispatch}, device={arch(ppc)}),        \
    adjust_args(need_device_ptr:AAA)                        \
-   adjust_args(need_device_addr:BBB)
+   adjust_args(need_device_addr:CCC)
 
 #pragma omp declare variant(foo_v3)                        \
    adjust_args(need_device_ptr:BBB) adjust_args(nothing:I) \
-   adjust_args(need_device_addr:AAA)                      \
+   adjust_args(need_device_addr:CCC)                       \
    match(construct={dispatch}, device={arch(x86,x86_64)})
 
-void foo(float *AAA, float *BBB, int *I) {return;}
+void foo(float *AAA, float *BBB, int &CCC, int *I) {return;}
 
-void Foo_Var(float *AAA, float *BBB, float *CCC) {return;}
+void Foo_Var(float *AAA, float *BBB, float *&CCC) {return;}
 
 #pragma omp declare variant(Foo_Var) \
    match(construct={dispatch}, device={arch(x86_64)}) \
    adjust_args(need_device_ptr:AAA) adjust_args(nothing:BBB) \
    adjust_args(need_device_addr:CCC)
 template<typename T>
-void Foo(T *AAA, T *BBB, T *CCC) {return;}
+void Foo(T *AAA, T *BBB, T *&CCC) {return;}
 
 //PRINT: #pragma omp declare variant(Foo_Var) match(construct={dispatch}, device={arch(x86_64)}) adjust_args(nothing:BBB) adjust_args(need_device_ptr:AAA) adjust_args(need_device_addr:CCC)
-//DUMP: FunctionDecl{{.*}} Foo 'void (T *, T *, T *)'
+//DUMP: FunctionDecl{{.*}} Foo 'void (T *, T *, T *&)'
 //DUMP: OMPDeclareVariantAttr{{.*}}device={arch(x86_64)}
 //DUMP: DeclRefExpr{{.*}}Function{{.*}}Foo_Var
 //DUMP: DeclRefExpr{{.*}}ParmVar{{.*}}'BBB'
 //DUMP: DeclRefExpr{{.*}}ParmVar{{.*}}'AAA'
 //DUMP: DeclRefExpr{{.*}}ParmVar{{.*}}'CCC'
 //
-//DUMP: FunctionDecl{{.*}} Foo 'void (float *, float *, float *)'
+//DUMP: FunctionDecl{{.*}} Foo 'void (float *, float *, float *&)'
 //DUMP: OMPDeclareVariantAttr{{.*}}device={arch(x86_64)}
 //DUMP: DeclRefExpr{{.*}}Function{{.*}}Foo_Var
 //DUMP: DeclRefExpr{{.*}}ParmVar{{.*}}'BBB'
diff --git a/clang/test/OpenMP/declare_variant_clauses_messages.cpp b/clang/test/OpenMP/declare_variant_clauses_messages.cpp
index bca9148..916d15f 100644
--- a/clang/test/OpenMP/declare_variant_clauses_messages.cpp
+++ b/clang/test/OpenMP/declare_variant_clauses_messages.cpp
@@ -91,6 +91,7 @@ void foo_v1(float *AAA, float *BBB, int *I) { return; }
 void foo_v2(float *AAA, float *BBB, int *I) { return; }
 void foo_v3(float *AAA, float *BBB, int *I) { return; }
 void foo_v4(float *AAA, float *BBB, int *I, omp_interop_t IOp) { return; }
+void foo_v5(float *AAA, float *BBB, int I) { return; }
 
 #if _OPENMP >= 202011 // At least OpenMP 5.1
 void vararg_foo(const char *fmt, omp_interop_t it, ...);
@@ -129,6 +130,11 @@ void vararg_bar2(const char *fmt) { return; }
    adjust_args(nothing:J)                                    \
    match(construct={dispatch}, device={arch(x86,x86_64)})
 
+// expected-error@+2 {{expected reference type argument on 'adjust_args' clause with 'need_device_addr' modifier}}
+#pragma omp declare variant(foo_v1)                          \
+   adjust_args(need_device_addr:AAA)                         \
+   match(construct={dispatch}, device={arch(x86,x86_64)})
+
 // expected-error@+2 {{expected reference to one of the parameters of function 'foo'}}
 #pragma omp declare variant(foo_v3)                          \
    adjust_args(nothing:Other)                                \
@@ -218,6 +224,12 @@ void vararg_bar2(const char *fmt) { return; }
 
 void foo(float *AAA, float *BBB, int *I) { return; }
 
+// expected-error@+2 {{expected reference type argument on 'adjust_args' clause with 'need_device_addr' modifier}}
+#pragma omp declare variant(foo_v5)                          \
+   adjust_args(need_device_addr:I)                         \
+   match(construct={dispatch}, device={arch(x86,x86_64)})
+void foo5(float *AAA, float *BBB, int I) { return; }
+
 #endif // NO_INTEROP_T_DEF
 
 #ifdef C
diff --git a/clang/test/OpenMP/target_map_array_section_no_length_codegen.cpp b/clang/test/OpenMP/target_map_array_section_no_length_codegen.cpp
new file mode 100644
index 0000000..43fd509
--- /dev/null
+++ b/clang/test/OpenMP/target_map_array_section_no_length_codegen.cpp
@@ -0,0 +1,361 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 5
+// RUN: %clang_cc1 -verify -triple i386-unknown-unknown -fopenmp -fopenmp-version=60 -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -triple i386-unknown-unknown -fopenmp -fopenmp-version=60 -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -verify -triple i386-unknown-unknown -fopenmp -fopenmp-version=60 -fopenmp-targets=i386-pc-linux-gnu -include-pch %t -emit-llvm %s -o - | FileCheck %s
+
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+void array_section_no_length_map_clause(float *d, int index) {
+  float **f;
+
+  #pragma omp target map(tofrom : d[:])
+  {
+    d[3] += 2;
+  }
+
+  #pragma omp target map(to : d[2:])
+  {
+    d[3] += 3;
+  }
+
+  #pragma omp target map(alloc : f[index][:])
+  {
+    f[index][2] += 4;
+  }
+
+  #pragma omp target map(tofrom : f[index][index+1:])
+  {
+    f[index][index] += 5;
+  }
+}
+#endif
+// CHECK-LABEL: define dso_local void @_Z34array_section_no_length_map_clausePfi(
+// CHECK-SAME: ptr noundef [[D:%.*]], i32 noundef [[INDEX:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[D_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[INDEX_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[F:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4
+// CHECK-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
+// CHECK-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
+// CHECK-NEXT:    [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [1 x ptr], align 4
+// CHECK-NEXT:    [[DOTOFFLOAD_PTRS3:%.*]] = alloca [1 x ptr], align 4
+// CHECK-NEXT:    [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [1 x ptr], align 4
+// CHECK-NEXT:    [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK-NEXT:    [[INDEX_CASTED:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS11:%.*]] = alloca [3 x ptr], align 4
+// CHECK-NEXT:    [[DOTOFFLOAD_PTRS12:%.*]] = alloca [3 x ptr], align 4
+// CHECK-NEXT:    [[DOTOFFLOAD_MAPPERS13:%.*]] = alloca [3 x ptr], align 4
+// CHECK-NEXT:    [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK-NEXT:    [[INDEX_CASTED17:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS22:%.*]] = alloca [3 x ptr], align 4
+// CHECK-NEXT:    [[DOTOFFLOAD_PTRS23:%.*]] = alloca [3 x ptr], align 4
+// CHECK-NEXT:    [[DOTOFFLOAD_MAPPERS24:%.*]] = alloca [3 x ptr], align 4
+// CHECK-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [3 x i64], align 4
+// CHECK-NEXT:    [[KERNEL_ARGS25:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK-NEXT:    store ptr [[D]], ptr [[D_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[INDEX]], ptr [[INDEX_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[D_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[D_ADDR]], align 4
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i32 0
+// CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-NEXT:    store ptr [[TMP1]], ptr [[TMP3]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-NEXT:    store ptr [[ARRAYIDX]], ptr [[TMP4]], align 4
+// CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK-NEXT:    store ptr null, ptr [[TMP5]], align 4
+// CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK-NEXT:    store i32 3, ptr [[TMP8]], align 4
+// CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK-NEXT:    store i32 1, ptr [[TMP9]], align 4
+// CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK-NEXT:    store ptr [[TMP6]], ptr [[TMP10]], align 4
+// CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK-NEXT:    store ptr [[TMP7]], ptr [[TMP11]], align 4
+// CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK-NEXT:    store ptr @.offload_sizes, ptr [[TMP12]], align 4
+// CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK-NEXT:    store ptr @.offload_maptypes, ptr [[TMP13]], align 4
+// CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK-NEXT:    store ptr null, ptr [[TMP14]], align 4
+// CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK-NEXT:    store ptr null, ptr [[TMP15]], align 4
+// CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK-NEXT:    store i64 0, ptr [[TMP16]], align 8
+// CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
+// CHECK-NEXT:    store i64 0, ptr [[TMP17]], align 8
+// CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
+// CHECK-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP18]], align 4
+// CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
+// CHECK-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP19]], align 4
+// CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
+// CHECK-NEXT:    store i32 0, ptr [[TMP20]], align 4
+// CHECK-NEXT:    [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z34array_section_no_length_map_clausePfi_l14.region_id, ptr [[KERNEL_ARGS]])
+// CHECK-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
+// CHECK-NEXT:    br i1 [[TMP22]], label %[[OMP_OFFLOAD_FAILED:.*]], label %[[OMP_OFFLOAD_CONT:.*]]
+// CHECK:       [[OMP_OFFLOAD_FAILED]]:
+// CHECK-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z34array_section_no_length_map_clausePfi_l14(ptr [[TMP0]]) #[[ATTR2:[0-9]+]]
+// CHECK-NEXT:    br label %[[OMP_OFFLOAD_CONT]]
+// CHECK:       [[OMP_OFFLOAD_CONT]]:
+// CHECK-NEXT:    [[TMP23:%.*]] = load ptr, ptr [[D_ADDR]], align 4
+// CHECK-NEXT:    [[TMP24:%.*]] = load ptr, ptr [[D_ADDR]], align 4
+// CHECK-NEXT:    [[TMP25:%.*]] = load ptr, ptr [[D_ADDR]], align 4
+// CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw float, ptr [[TMP25]], i32 2
+// CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
+// CHECK-NEXT:    store ptr [[TMP24]], ptr [[TMP26]], align 4
+// CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
+// CHECK-NEXT:    store ptr [[ARRAYIDX1]], ptr [[TMP27]], align 4
+// CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 0
+// CHECK-NEXT:    store ptr null, ptr [[TMP28]], align 4
+// CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
+// CHECK-NEXT:    store i32 3, ptr [[TMP31]], align 4
+// CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
+// CHECK-NEXT:    store i32 1, ptr [[TMP32]], align 4
+// CHECK-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2
+// CHECK-NEXT:    store ptr [[TMP29]], ptr [[TMP33]], align 4
+// CHECK-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3
+// CHECK-NEXT:    store ptr [[TMP30]], ptr [[TMP34]], align 4
+// CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4
+// CHECK-NEXT:    store ptr @.offload_sizes.1, ptr [[TMP35]], align 4
+// CHECK-NEXT:    [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5
+// CHECK-NEXT:    store ptr @.offload_maptypes.2, ptr [[TMP36]], align 4
+// CHECK-NEXT:    [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6
+// CHECK-NEXT:    store ptr null, ptr [[TMP37]], align 4
+// CHECK-NEXT:    [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7
+// CHECK-NEXT:    store ptr null, ptr [[TMP38]], align 4
+// CHECK-NEXT:    [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8
+// CHECK-NEXT:    store i64 0, ptr [[TMP39]], align 8
+// CHECK-NEXT:    [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9
+// CHECK-NEXT:    store i64 0, ptr [[TMP40]], align 8
+// CHECK-NEXT:    [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10
+// CHECK-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP41]], align 4
+// CHECK-NEXT:    [[TMP42:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11
+// CHECK-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP42]], align 4
+// CHECK-NEXT:    [[TMP43:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12
+// CHECK-NEXT:    store i32 0, ptr [[TMP43]], align 4
+// CHECK-NEXT:    [[TMP44:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z34array_section_no_length_map_clausePfi_l19.region_id, ptr [[KERNEL_ARGS5]])
+// CHECK-NEXT:    [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0
+// CHECK-NEXT:    br i1 [[TMP45]], label %[[OMP_OFFLOAD_FAILED6:.*]], label %[[OMP_OFFLOAD_CONT7:.*]]
+// CHECK:       [[OMP_OFFLOAD_FAILED6]]:
+// CHECK-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z34array_section_no_length_map_clausePfi_l19(ptr [[TMP23]]) #[[ATTR2]]
+// CHECK-NEXT:    br label %[[OMP_OFFLOAD_CONT7]]
+// CHECK:       [[OMP_OFFLOAD_CONT7]]:
+// CHECK-NEXT:    [[TMP46:%.*]] = load ptr, ptr [[F]], align 4
+// CHECK-NEXT:    [[TMP47:%.*]] = load i32, ptr [[INDEX_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[TMP47]], ptr [[INDEX_CASTED]], align 4
+// CHECK-NEXT:    [[TMP48:%.*]] = load i32, ptr [[INDEX_CASTED]], align 4
+// CHECK-NEXT:    [[TMP49:%.*]] = load ptr, ptr [[F]], align 4
+// CHECK-NEXT:    [[TMP50:%.*]] = load ptr, ptr [[F]], align 4
+// CHECK-NEXT:    [[TMP51:%.*]] = load i32, ptr [[INDEX_ADDR]], align 4
+// CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds ptr, ptr [[TMP50]], i32 [[TMP51]]
+// CHECK-NEXT:    [[TMP52:%.*]] = load ptr, ptr [[F]], align 4
+// CHECK-NEXT:    [[TMP53:%.*]] = load i32, ptr [[INDEX_ADDR]], align 4
+// CHECK-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds ptr, ptr [[TMP52]], i32 [[TMP53]]
+// CHECK-NEXT:    [[TMP54:%.*]] = load ptr, ptr [[ARRAYIDX9]], align 4
+// CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw float, ptr [[TMP54]], i32 0
+// CHECK-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0
+// CHECK-NEXT:    store ptr [[TMP49]], ptr [[TMP55]], align 4
+// CHECK-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0
+// CHECK-NEXT:    store ptr [[ARRAYIDX8]], ptr [[TMP56]], align 4
+// CHECK-NEXT:    [[TMP57:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS13]], i32 0, i32 0
+// CHECK-NEXT:    store ptr null, ptr [[TMP57]], align 4
+// CHECK-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 1
+// CHECK-NEXT:    store ptr [[ARRAYIDX8]], ptr [[TMP58]], align 4
+// CHECK-NEXT:    [[TMP59:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 1
+// CHECK-NEXT:    store ptr [[ARRAYIDX10]], ptr [[TMP59]], align 4
+// CHECK-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS13]], i32 0, i32 1
+// CHECK-NEXT:    store ptr null, ptr [[TMP60]], align 4
+// CHECK-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 2
+// CHECK-NEXT:    store i32 [[TMP48]], ptr [[TMP61]], align 4
+// CHECK-NEXT:    [[TMP62:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 2
+// CHECK-NEXT:    store i32 [[TMP48]], ptr [[TMP62]], align 4
+// CHECK-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS13]], i32 0, i32 2
+// CHECK-NEXT:    store ptr null, ptr [[TMP63]], align 4
+// CHECK-NEXT:    [[TMP64:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0
+// CHECK-NEXT:    store i32 3, ptr [[TMP66]], align 4
+// CHECK-NEXT:    [[TMP67:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1
+// CHECK-NEXT:    store i32 3, ptr [[TMP67]], align 4
+// CHECK-NEXT:    [[TMP68:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 2
+// CHECK-NEXT:    store ptr [[TMP64]], ptr [[TMP68]], align 4
+// CHECK-NEXT:    [[TMP69:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 3
+// CHECK-NEXT:    store ptr [[TMP65]], ptr [[TMP69]], align 4
+// CHECK-NEXT:    [[TMP70:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 4
+// CHECK-NEXT:    store ptr @.offload_sizes.3, ptr [[TMP70]], align 4
+// CHECK-NEXT:    [[TMP71:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 5
+// CHECK-NEXT:    store ptr @.offload_maptypes.4, ptr [[TMP71]], align 4
+// CHECK-NEXT:    [[TMP72:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 6
+// CHECK-NEXT:    store ptr null, ptr [[TMP72]], align 4
+// CHECK-NEXT:    [[TMP73:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 7
+// CHECK-NEXT:    store ptr null, ptr [[TMP73]], align 4
+// CHECK-NEXT:    [[TMP74:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 8
+// CHECK-NEXT:    store i64 0, ptr [[TMP74]], align 8
+// CHECK-NEXT:    [[TMP75:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 9
+// CHECK-NEXT:    store i64 0, ptr [[TMP75]], align 8
+// CHECK-NEXT:    [[TMP76:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 10
+// CHECK-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP76]], align 4
+// CHECK-NEXT:    [[TMP77:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 11
+// CHECK-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP77]], align 4
+// CHECK-NEXT:    [[TMP78:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 12
+// CHECK-NEXT:    store i32 0, ptr [[TMP78]], align 4
+// CHECK-NEXT:    [[TMP79:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z34array_section_no_length_map_clausePfi_l24.region_id, ptr [[KERNEL_ARGS14]])
+// CHECK-NEXT:    [[TMP80:%.*]] = icmp ne i32 [[TMP79]], 0
+// CHECK-NEXT:    br i1 [[TMP80]], label %[[OMP_OFFLOAD_FAILED15:.*]], label %[[OMP_OFFLOAD_CONT16:.*]]
+// CHECK:       [[OMP_OFFLOAD_FAILED15]]:
+// CHECK-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z34array_section_no_length_map_clausePfi_l24(ptr [[TMP46]], i32 [[TMP48]]) #[[ATTR2]]
+// CHECK-NEXT:    br label %[[OMP_OFFLOAD_CONT16]]
+// CHECK:       [[OMP_OFFLOAD_CONT16]]:
+// CHECK-NEXT:    [[TMP81:%.*]] = load ptr, ptr [[F]], align 4
+// CHECK-NEXT:    [[TMP82:%.*]] = load i32, ptr [[INDEX_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[TMP82]], ptr [[INDEX_CASTED17]], align 4
+// CHECK-NEXT:    [[TMP83:%.*]] = load i32, ptr [[INDEX_CASTED17]], align 4
+// CHECK-NEXT:    [[TMP84:%.*]] = load ptr, ptr [[F]], align 4
+// CHECK-NEXT:    [[TMP85:%.*]] = load ptr, ptr [[F]], align 4
+// CHECK-NEXT:    [[TMP86:%.*]] = load i32, ptr [[INDEX_ADDR]], align 4
+// CHECK-NEXT:    [[ARRAYIDX18:%.*]] = getelementptr inbounds ptr, ptr [[TMP85]], i32 [[TMP86]]
+// CHECK-NEXT:    [[TMP87:%.*]] = load i32, ptr [[INDEX_ADDR]], align 4
+// CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP87]], 1
+// CHECK-NEXT:    [[TMP88:%.*]] = load ptr, ptr [[F]], align 4
+// CHECK-NEXT:    [[TMP89:%.*]] = load i32, ptr [[INDEX_ADDR]], align 4
+// CHECK-NEXT:    [[ARRAYIDX19:%.*]] = getelementptr inbounds ptr, ptr [[TMP88]], i32 [[TMP89]]
+// CHECK-NEXT:    [[TMP90:%.*]] = load ptr, ptr [[ARRAYIDX19]], align 4
+// CHECK-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds nuw float, ptr [[TMP90]], i32 [[ADD]]
+// CHECK-NEXT:    [[TMP91:%.*]] = load i32, ptr [[INDEX_ADDR]], align 4
+// CHECK-NEXT:    [[ADD21:%.*]] = add nsw i32 [[TMP91]], 1
+// CHECK-NEXT:    [[TMP92:%.*]] = mul nuw i32 [[ADD21]], 4
+// CHECK-NEXT:    [[TMP93:%.*]] = icmp ugt i32 4, [[TMP92]]
+// CHECK-NEXT:    [[TMP94:%.*]] = sub nuw i32 4, [[TMP92]]
+// CHECK-NEXT:    [[TMP95:%.*]] = select i1 [[TMP93]], i32 [[TMP94]], i32 0
+// CHECK-NEXT:    [[TMP96:%.*]] = sext i32 [[TMP95]] to i64
+// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes.5, i32 24, i1 false)
+// CHECK-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
+// CHECK-NEXT:    store ptr [[TMP84]], ptr [[TMP97]], align 4
+// CHECK-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
+// CHECK-NEXT:    store ptr [[ARRAYIDX18]], ptr [[TMP98]], align 4
+// CHECK-NEXT:    [[TMP99:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS24]], i32 0, i32 0
+// CHECK-NEXT:    store ptr null, ptr [[TMP99]], align 4
+// CHECK-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 1
+// CHECK-NEXT:    store ptr [[ARRAYIDX18]], ptr [[TMP100]], align 4
+// CHECK-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS23]], i32 0, i32 1
+// CHECK-NEXT:    store ptr [[ARRAYIDX20]], ptr [[TMP101]], align 4
+// CHECK-NEXT:    [[TMP102:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 1
+// CHECK-NEXT:    store i64 [[TMP96]], ptr [[TMP102]], align 4
+// CHECK-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS24]], i32 0, i32 1
+// CHECK-NEXT:    store ptr null, ptr [[TMP103]], align 4
+// CHECK-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 2
+// CHECK-NEXT:    store i32 [[TMP83]], ptr [[TMP104]], align 4
+// CHECK-NEXT:    [[TMP105:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS23]], i32 0, i32 2
+// CHECK-NEXT:    store i32 [[TMP83]], ptr [[TMP105]], align 4
+// CHECK-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS24]], i32 0, i32 2
+// CHECK-NEXT:    store ptr null, ptr [[TMP106]], align 4
+// CHECK-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP108:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS23]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-NEXT:    [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS25]], i32 0, i32 0
+// CHECK-NEXT:    store i32 3, ptr [[TMP110]], align 4
+// CHECK-NEXT:    [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS25]], i32 0, i32 1
+// CHECK-NEXT:    store i32 3, ptr [[TMP111]], align 4
+// CHECK-NEXT:    [[TMP112:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS25]], i32 0, i32 2
+// CHECK-NEXT:    store ptr [[TMP107]], ptr [[TMP112]], align 4
+// CHECK-NEXT:    [[TMP113:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS25]], i32 0, i32 3
+// CHECK-NEXT:    store ptr [[TMP108]], ptr [[TMP113]], align 4
+// CHECK-NEXT:    [[TMP114:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS25]], i32 0, i32 4
+// CHECK-NEXT:    store ptr [[TMP109]], ptr [[TMP114]], align 4
+// CHECK-NEXT:    [[TMP115:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS25]], i32 0, i32 5
+// CHECK-NEXT:    store ptr @.offload_maptypes.6, ptr [[TMP115]], align 4
+// CHECK-NEXT:    [[TMP116:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS25]], i32 0, i32 6
+// CHECK-NEXT:    store ptr null, ptr [[TMP116]], align 4
+// CHECK-NEXT:    [[TMP117:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS25]], i32 0, i32 7
+// CHECK-NEXT:    store ptr null, ptr [[TMP117]], align 4
+// CHECK-NEXT:    [[TMP118:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS25]], i32 0, i32 8
+// CHECK-NEXT:    store i64 0, ptr [[TMP118]], align 8
+// CHECK-NEXT:    [[TMP119:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS25]], i32 0, i32 9
+// CHECK-NEXT:    store i64 0, ptr [[TMP119]], align 8
+// CHECK-NEXT:    [[TMP120:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS25]], i32 0, i32 10
+// CHECK-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP120]], align 4
+// CHECK-NEXT:    [[TMP121:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS25]], i32 0, i32 11
+// CHECK-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP121]], align 4
+// CHECK-NEXT:    [[TMP122:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS25]], i32 0, i32 12
+// CHECK-NEXT:    store i32 0, ptr [[TMP122]], align 4
+// CHECK-NEXT:    [[TMP123:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z34array_section_no_length_map_clausePfi_l29.region_id, ptr [[KERNEL_ARGS25]])
+// CHECK-NEXT:    [[TMP124:%.*]] = icmp ne i32 [[TMP123]], 0
+// CHECK-NEXT:    br i1 [[TMP124]], label %[[OMP_OFFLOAD_FAILED26:.*]], label %[[OMP_OFFLOAD_CONT27:.*]]
+// CHECK:       [[OMP_OFFLOAD_FAILED26]]:
+// CHECK-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z34array_section_no_length_map_clausePfi_l29(ptr [[TMP81]], i32 [[TMP83]]) #[[ATTR2]]
+// CHECK-NEXT:    br label %[[OMP_OFFLOAD_CONT27]]
+// CHECK:       [[OMP_OFFLOAD_CONT27]]:
+// CHECK-NEXT:    ret void
+//
+//
+// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z34array_section_no_length_map_clausePfi_l14(
+// CHECK-SAME: ptr noundef [[D:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[D_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    store ptr [[D]], ptr [[D_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[D_ADDR]], align 4
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 3
+// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP1]], 2.000000e+00
+// CHECK-NEXT:    store float [[ADD]], ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT:    ret void
+//
+//
+// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z34array_section_no_length_map_clausePfi_l19(
+// CHECK-SAME: ptr noundef [[D:%.*]]) #[[ATTR1]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[D_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    store ptr [[D]], ptr [[D_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[D_ADDR]], align 4
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 3
+// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP1]], 3.000000e+00
+// CHECK-NEXT:    store float [[ADD]], ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT:    ret void
+//
+//
+// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z34array_section_no_length_map_clausePfi_l24(
+// CHECK-SAME: ptr noundef [[F:%.*]], i32 noundef [[INDEX:%.*]]) #[[ATTR1]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[F_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[INDEX_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[F]], ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[INDEX]], ptr [[INDEX_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[INDEX_ADDR]], align 4
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
+// CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP3]], 4.000000e+00
+// CHECK-NEXT:    store float [[ADD]], ptr [[ARRAYIDX1]], align 4
+// CHECK-NEXT:    ret void
+//
+//
+// CHECK-LABEL: define internal void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z34array_section_no_length_map_clausePfi_l29(
+// CHECK-SAME: ptr noundef [[F:%.*]], i32 noundef [[INDEX:%.*]]) #[[ATTR1]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[F_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[INDEX_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[F]], ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[INDEX]], ptr [[INDEX_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[INDEX_ADDR]], align 4
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[INDEX_ADDR]], align 4
+// CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 [[TMP3]]
+// CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
+// CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP4]], 5.000000e+00
+// CHECK-NEXT:    store float [[ADD]], ptr [[ARRAYIDX1]], align 4
+// CHECK-NEXT:    ret void
+//
diff --git a/clang/test/OpenMP/target_map_codegen_35.cpp b/clang/test/OpenMP/target_map_codegen_35.cpp
index afa0965..c4fc49c 100644
--- a/clang/test/OpenMP/target_map_codegen_35.cpp
+++ b/clang/test/OpenMP/target_map_codegen_35.cpp
@@ -27,11 +27,11 @@ public:
   void foo();
 };
 
-// CK35-DAG: [[SIZE_TO:@.+]] = private {{.*}}constant [4 x i64] [i64 0, i64 0, i64 0, i64 8]
+// CK35-DAG: [[SIZE_TO:@.+]] = private {{.*}}constant [3 x i64] [i64 0, i64 0, i64 8]
 // TARGET_PARAM = 0x20
 // MEMBER_OF_1 | TO = 0x1000000000001
 // MEMBER_OF_1 | PTR_AND_OBJ | TO = 0x1000000000011
-// CK35-DAG: [[MTYPE_TO:@.+]] = {{.+}}constant [4 x i64] [i64 [[#0x20]], i64 [[#0x1000000000001]], i64 [[#0x1000000000001]], i64 [[#0x1000000000011]]]
+// CK35-DAG: [[MTYPE_TO:@.+]] = {{.+}}constant [3 x i64] [i64 [[#0x20]], i64 [[#0x1000000000001]], i64 [[#0x1000000000011]]]
 // CK35-DAG: [[SIZE_FROM:@.+]] = private {{.*}}constant [2 x i64] [i64 0, i64 8]
 // TARGET_PARAM = 0x20
 // MEMBER_OF_1 | PTR_AND_OBJ | FROM = 0x1000000000012
@@ -86,35 +86,14 @@ void ref_map() {
   // CK35-DAG: [[B_BEGIN_INTPTR]] = ptrtoint ptr [[B_BEGIN_VOID:%.+]] to i64
   // CK35-DAG: [[B_ADDR:%.+]] = getelementptr inbounds nuw %class.S, ptr [[S_ADDR]], i32 0, i32 1
 
-  // pass MEMBER_OF_1 | TO {&s, &s.b+1, ((ptr)(&s+1)-(ptr)(&s.b+1))} to copy the data of remainder of s.
+  // pass MEMBER_OF_1 | PTR_AND_OBJ | TO {&s, &s.b, 8|4} to copy the data of s.b.
 
   // CK35-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
   // CK35-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
-  // CK35-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 2
 
 
   // CK35-DAG: store ptr [[S_ADDR]], ptr [[BP2]],
-  // CK35-DAG: store ptr [[B_END:%.+]], ptr [[P2]],
-  // CK35-DAG: store i64 [[REM_SIZE:%.+]], ptr [[S2]],
-
-  // CK35-DAG: [[B_END]] = getelementptr ptr, ptr [[B_ADDR]], i{{.+}} 1
-
-  // CK35-DAG: [[REM_SIZE]] = sdiv exact i64 [[SZ:%.+]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
-  // CK35-DAG: [[SZ]] = sub i64 [[S_END_INTPTR:%.+]], [[B_END_INTPTR:%.+]]
-  // CK35-DAG: [[B_END_INTPTR]] = ptrtoint ptr [[B_END_VOID:%.+]] to i64
-  // CK35-DAG: [[S_END_INTPTR]] = ptrtoint ptr [[S_END_VOID:%.+]] to i64
-  // CK35-DAG: [[S_END_VOID]] = getelementptr i8, ptr [[S_LAST:%.+]], i{{.+}} 1
-  // CK35-64-DAG: [[S_LAST]] = getelementptr i8, ptr [[S_VOIDPTR:%.+]], i64 15
-  // CK35-32-DAG: [[S_LAST]] = getelementptr i8, ptr [[S_VOIDPTR:%.+]], i32 7
-
-  // pass MEMBER_OF_1 | PTR_AND_OBJ | TO {&s, &s.b, 8|4} to copy the data of s.b.
-
-  // CK35-DAG: [[BP3:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 3
-  // CK35-DAG: [[P3:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 3
-
-
-  // CK35-DAG: store ptr [[S_ADDR]], ptr [[BP3]],
-  // CK35-DAG: store ptr [[B_ADDR:%.+]], ptr [[P3]],
+  // CK35-DAG: store ptr [[B_ADDR:%.+]], ptr [[P2]],
 
   // CK35-DAG: [[B_ADDR]] = load ptr, ptr [[B_REF:%.+]], align {{[0-9]+}}, !nonnull !{{[0-9]+}}, !align !{{[0-9]+}}
   // CK35-DAG: [[B_REF]] = getelementptr inbounds nuw %class.S, ptr [[S_ADDR]], i32 0, i32 1
diff --git a/clang/test/OpenMP/target_map_messages.cpp b/clang/test/OpenMP/target_map_messages.cpp
index 4a026584..0ee70be 100644
--- a/clang/test/OpenMP/target_map_messages.cpp
+++ b/clang/test/OpenMP/target_map_messages.cpp
@@ -122,9 +122,9 @@ struct SA {
     {}
     #pragma omp target map(always, tofrom: c,f[1:2])
     {}
-    #pragma omp target map(always, tofrom: c[:],f)   // expected-error {{section length is unspecified and cannot be inferred because subscripted value is not an array}}
+    #pragma omp target map(always, tofrom: c[:],f)   // lt60-error {{section length is unspecified and cannot be inferred because subscripted value is not an array}}
     {}
-    #pragma omp target map(always, tofrom: c,f[:])   // expected-error {{section length is unspecified and cannot be inferred because subscripted value is not an array}}
+    #pragma omp target map(always, tofrom: c,f[:])   // lt60-error {{section length is unspecified and cannot be inferred because subscripted value is not an array}}
     {}
     #pragma omp target map(always)   // expected-error {{use of undeclared identifier 'always'}}
     {}
@@ -134,9 +134,9 @@ struct SA {
     {}
     #pragma omp target map(self, tofrom: c,f[1:2])   // lt60-error {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}}
     {}
-    #pragma omp target map(self, tofrom: c[:],f)   // lt60-error {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}} // expected-error {{section length is unspecified and cannot be inferred because subscripted value is not an array}}
+    #pragma omp target map(self, tofrom: c[:],f)   // lt60-error {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}} // lt60-error {{section length is unspecified and cannot be inferred because subscripted value is not an array}}
     {}
-    #pragma omp target map(self, tofrom: c,f[:])   // lt60-error {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}} // expected-error {{section length is unspecified and cannot be inferred because subscripted value is not an array}} 
+    #pragma omp target map(self, tofrom: c,f[:])   // lt60-error {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}} // lt60-error {{section length is unspecified and cannot be inferred because subscripted value is not an array}} 
     {}
     #pragma omp target map(close, tofrom: c,f)
     {}
@@ -144,9 +144,9 @@ struct SA {
     {}
     #pragma omp target map(close, tofrom: c,f[1:2])
     {}
-    #pragma omp target map(close, tofrom: c[:],f)   // expected-error {{section length is unspecified and cannot be inferred because subscripted value is not an array}}
+    #pragma omp target map(close, tofrom: c[:],f)   // lt60-error {{section length is unspecified and cannot be inferred because subscripted value is not an array}}
     {}
-    #pragma omp target map(close, tofrom: c,f[:])   // expected-error {{section length is unspecified and cannot be inferred because subscripted value is not an array}}
+    #pragma omp target map(close, tofrom: c,f[:])   // lt60-error {{section length is unspecified and cannot be inferred because subscripted value is not an array}}
     {}
     #pragma omp target map(close)   // expected-error {{use of undeclared identifier 'close'}}
     {}
@@ -159,11 +159,11 @@ struct SA {
     // lt51-error@+1 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}}
     #pragma omp target map(present, tofrom: c,f[1:2])
     {}
-    // expected-error@+2 {{section length is unspecified and cannot be inferred because subscripted value is not an array}}
+    // lt60-error@+2 {{section length is unspecified and cannot be inferred because subscripted value is not an array}}
     // lt51-error@+1 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}}
     #pragma omp target map(present, tofrom: c[:],f)
     {}
-    // expected-error@+2 {{section length is unspecified and cannot be inferred because subscripted value is not an array}}
+    // lt60-error@+2 {{section length is unspecified and cannot be inferred because subscripted value is not an array}}
     // lt51-error@+1 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}}
     #pragma omp target map(present, tofrom: c,f[:])
     {}
@@ -190,14 +190,14 @@ struct SA {
     {}
     // ge60-error@+5 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator', 'self}}
     // ge52-error@+4 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator'}}
-    // expected-error@+3 {{section length is unspecified and cannot be inferred because subscripted value is not an array}}
+    // lt60-error@+3 {{section length is unspecified and cannot be inferred because subscripted value is not an array}}
     // ge51-omp-error@+2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present'}}
     // lt51-omp-error@+1 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}}
     #pragma omp target map(ompx_hold, tofrom: c[:],f)
     {}
     // ge60-error@+5 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator', 'self}}
     // ge52-error@+4 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present', 'iterator'}}
-    // expected-error@+3 {{section length is unspecified and cannot be inferred because subscripted value is not an array}}
+    // lt60-error@+3 {{section length is unspecified and cannot be inferred because subscripted value is not an array}}
     // ge51-omp-error@+2 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper', 'present'}}
     // lt51-omp-error@+1 {{incorrect map type modifier, expected one of: 'always', 'close', 'mapper'}}
     #pragma omp target map(ompx_hold, tofrom: c,f[:])
@@ -448,7 +448,7 @@ void SAclient(int arg) {
   {}
 #pragma omp target map(mptr[:1][:2] [0:2]) // expected-error {{array section does not specify contiguous storage}}
   {}
-#pragma omp target map(mptr[:1][:] [0:2])  // expected-error {{section length is unspecified and cannot be inferred because subscripted value is not an array}}
+#pragma omp target map(mptr[:1][:] [0:2])  // lt60-error {{section length is unspecified and cannot be inferred because subscripted value is not an array}}
   {}
 #pragma omp target map(mptr[:2][:1] [0:2]) // expected-error {{array section does not specify contiguous storage}}
   {}
@@ -517,7 +517,7 @@ void SAclient(int arg) {
   {}
 #pragma omp target map(r.S.Ptr [4:5])
   {}
-#pragma omp target map(r.S.Ptr[:]) // expected-error {{section length is unspecified and cannot be inferred because subscripted value is not an array}}
+#pragma omp target map(r.S.Ptr[:]) // lt60-error {{section length is unspecified and cannot be inferred because subscripted value is not an array}}
   {}
 #pragma omp target map((p + 1)->A) // lt50-error {{expected expression containing only member accesses and/or array sections based on named variables}}
   {}
diff --git a/clang/test/Parser/cxx23-assume.cpp b/clang/test/Parser/cxx23-assume.cpp
index 269fb7e..375c908 100644
--- a/clang/test/Parser/cxx23-assume.cpp
+++ b/clang/test/Parser/cxx23-assume.cpp
@@ -5,7 +5,7 @@ void f(int x, int y) {
   [[assume(1)]];
   [[assume(1.0)]];
   [[assume(1 + 2 == 3)]];
-  [[assume(x ? 1 : 2)]];
+  [[assume(x ? 1 : 2)]]; // expected-warning {{converting the result of '?:' with integer constants to a boolean always evaluates to 'true'}}
   [[assume(x && y)]];
   [[assume(true)]] [[assume(true)]];
 
diff --git a/clang/test/Parser/dep_template_spec_type.cpp b/clang/test/Parser/dep_template_spec_type.cpp
new file mode 100644
index 0000000..65544bb
--- /dev/null
+++ b/clang/test/Parser/dep_template_spec_type.cpp
@@ -0,0 +1,16 @@
+// RUN: seq 100 | xargs -Ifoo %clang_cc1 -fsyntax-only -verify %s
+// expected-no-diagnostics
+// This is a regression test for a non-deterministic stack-overflow.
+
+template <typename C, typename S1, int rbits>
+typename C::A Bar(const S1& x, const C& c = C()) {
+    using T = typename C::A;
+    T result;
+
+    using PreC = typename C::template boop<T::p + rbits>;
+    using ExactC = typename C::template bap<PreC::p + 2>;
+
+    using D = typename ExactC::A;
+
+    return result;
+}
diff --git a/clang/test/Preprocessor/Inputs/llvm-windres.h b/clang/test/Preprocessor/Inputs/llvm-windres.h
index 411ec48..ef6bc47 100644
--- a/clang/test/Preprocessor/Inputs/llvm-windres.h
+++ b/clang/test/Preprocessor/Inputs/llvm-windres.h
@@ -1,10 +1,12 @@
 #ifndef RC_INVOKED
-#error RC_INVOKED not defined
+#  error RC_INVOKED not defined
 #endif
-#ifndef _WIN32
-#error _WIN32 not defined
-#endif
-#ifndef __MINGW32__
-#error __MINGW32__ not defined
+#ifndef __CYGWIN__
+#  ifndef _WIN32
+#    error _WIN32 not defined
+#  endif
+#  ifndef __MINGW32__
+#    error __MINGW32__ not defined
+#  endif
 #endif
 #define MY_ID 42
diff --git a/clang/test/Preprocessor/arm-acle-6.4.c b/clang/test/Preprocessor/arm-acle-6.4.c
index 2c8f486..48deba7 100644
--- a/clang/test/Preprocessor/arm-acle-6.4.c
+++ b/clang/test/Preprocessor/arm-acle-6.4.c
@@ -188,6 +188,37 @@
 // RUN: %clang --target=arm-arm-none-eabi -mcpu=cortex-m33 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-M-DSP
 // RUN: %clang --target=arm-arm-none-eabi -march=armv8m.main+dsp -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-M-DSP
 
+// RUN: %clang -target arm-none-linux-eabi -march=armv8m.base -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-V8M-BASE
+
+// CHECK-V8M-BASE-NOT: __ARM_ARCH_ISA_ARM
+// CHECK-V8M-BASE-NOT: __ARM_FEATURE_DSP
+// CHECK-V8M-BASE-NOT: __ARM_FEATURE_SIMD32
+// CHECK-V8M-BASE:     __ARM_ARCH 8
+// CHECK-V8M-BASE:     __ARM_ARCH_ISA_THUMB 1
+// CHECK-V8M-BASE:     __ARM_ARCH_PROFILE 'M'
+// CHECK-V8M-BASE:     __ARM_FEATURE_CLZ 1
+// CHECK-V8M-BASE:     __ARM_FEATURE_IDIV 1
+// CHECK-V8M-BASE:     __ARM_FEATURE_LDREX 0x7
+// CHECK-V8M-BASE:     __ARM_FEATURE_QBIT 1
+// CHECK-V8M-BASE:     __ARM_FEATURE_SAT
+// CHECK-V8M-BASE-NOT: __ARM_FEATURE_UNALIGNED
+
+// RUN: %clang -target arm-none-linux-eabi -march=armv8m.main -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-V8M-MAIN
+// RUN: %clang -target arm-none-linux-eabi -march=armv8.1m.main -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-V8M-MAIN
+
+// CHECK-V8M-MAIN-NOT: __ARM_ARCH_ISA_ARM
+// CHECK-V8M-MAIN-NOT: __ARM_FEATURE_DSP
+// CHECK-V8M-MAIN-NOT: __ARM_FEATURE_SIMD32
+// CHECK-V8M-MAIN:     __ARM_ARCH 8
+// CHECK-V8M-MAIN:     __ARM_ARCH_ISA_THUMB 2
+// CHECK-V8M-MAIN:     __ARM_ARCH_PROFILE 'M'
+// CHECK-V8M-MAIN:     __ARM_FEATURE_CLZ 1
+// CHECK-V8M-MAIN:     __ARM_FEATURE_IDIV 1
+// CHECK-V8M-MAIN:     __ARM_FEATURE_LDREX 0x7
+// CHECK-V8M-MAIN:     __ARM_FEATURE_QBIT 1
+// CHECK-V8M-MAIN:     __ARM_FEATURE_SAT 1
+// CHECK-V8M-MAIN:     __ARM_FEATURE_UNALIGNED 1
+
 // CHECK-M-DSP: __ARM_FEATURE_DSP 1
 // CHECK-M-DSP: __ARM_FEATURE_SIMD32 1
 
diff --git a/clang/test/Preprocessor/builtin_aux_info.cpp b/clang/test/Preprocessor/builtin_aux_info.cpp
new file mode 100644
index 0000000..60c8c6c
--- /dev/null
+++ b/clang/test/Preprocessor/builtin_aux_info.cpp
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -fopenmp -triple=spirv64 -fopenmp-is-target-device \
+// RUN: -aux-triple x86_64-linux-unknown -E %s | FileCheck -implicit-check-not=BAD %s
+
+// RUN: %clang_cc1 -fopenmp -triple=nvptx64 -fopenmp-is-target-device \
+// RUN: -aux-triple x86_64-linux-unknown -E %s | FileCheck -implicit-check-not=BAD %s
+
+// RUN: %clang_cc1 -fopenmp -triple=amdgcn-amd-amdhsa -fopenmp-is-target-device \
+// RUN: -aux-triple x86_64-linux-unknown -E %s | FileCheck -implicit-check-not=BAD %s
+
+// RUN: %clang_cc1 -fopenmp -triple=aarch64 -fopenmp-is-target-device \
+// RUN: -aux-triple x86_64-linux-unknown -E %s | FileCheck -implicit-check-not=BAD %s
+
+// CHECK: GOOD
+#if __has_builtin(__builtin_ia32_pause)
+  BAD
+#else
+  GOOD
+#endif
diff --git a/clang/test/Preprocessor/init-mips.c b/clang/test/Preprocessor/init-mips.c
index 4fead33..125872a 100644
--- a/clang/test/Preprocessor/init-mips.c
+++ b/clang/test/Preprocessor/init-mips.c
@@ -80,10 +80,10 @@
 // MIPS32BE:#define __INTMAX_MAX__ 9223372036854775807LL
 // MIPS32BE:#define __INTMAX_TYPE__ long long int
 // MIPS32BE:#define __INTMAX_WIDTH__ 64
-// MIPS32BE:#define __INTPTR_FMTd__ "ld"
-// MIPS32BE:#define __INTPTR_FMTi__ "li"
-// MIPS32BE:#define __INTPTR_MAX__ 2147483647L
-// MIPS32BE:#define __INTPTR_TYPE__ long int
+// MIPS32BE:#define __INTPTR_FMTd__ "d"
+// MIPS32BE:#define __INTPTR_FMTi__ "i"
+// MIPS32BE:#define __INTPTR_MAX__ 2147483647
+// MIPS32BE:#define __INTPTR_TYPE__ int
 // MIPS32BE:#define __INTPTR_WIDTH__ 32
 // MIPS32BE:#define __INT_FAST16_FMTd__ "hd"
 // MIPS32BE:#define __INT_FAST16_FMTi__ "hi"
@@ -185,8 +185,8 @@
 // MIPS32BE:#define __UINTMAX_MAX__ 18446744073709551615ULL
 // MIPS32BE:#define __UINTMAX_TYPE__ long long unsigned int
 // MIPS32BE:#define __UINTMAX_WIDTH__ 64
-// MIPS32BE:#define __UINTPTR_MAX__ 4294967295UL
-// MIPS32BE:#define __UINTPTR_TYPE__ long unsigned int
+// MIPS32BE:#define __UINTPTR_MAX__ 4294967295U
+// MIPS32BE:#define __UINTPTR_TYPE__ unsigned int
 // MIPS32BE:#define __UINTPTR_WIDTH__ 32
 // MIPS32BE:#define __UINT_FAST16_MAX__ 65535
 // MIPS32BE:#define __UINT_FAST16_TYPE__ unsigned short
@@ -300,10 +300,10 @@
 // MIPS32EL:#define __INTMAX_MAX__ 9223372036854775807LL
 // MIPS32EL:#define __INTMAX_TYPE__ long long int
 // MIPS32EL:#define __INTMAX_WIDTH__ 64
-// MIPS32EL:#define __INTPTR_FMTd__ "ld"
-// MIPS32EL:#define __INTPTR_FMTi__ "li"
-// MIPS32EL:#define __INTPTR_MAX__ 2147483647L
-// MIPS32EL:#define __INTPTR_TYPE__ long int
+// MIPS32EL:#define __INTPTR_FMTd__ "d"
+// MIPS32EL:#define __INTPTR_FMTi__ "i"
+// MIPS32EL:#define __INTPTR_MAX__ 2147483647
+// MIPS32EL:#define __INTPTR_TYPE__ int
 // MIPS32EL:#define __INTPTR_WIDTH__ 32
 // MIPS32EL:#define __INT_FAST16_FMTd__ "hd"
 // MIPS32EL:#define __INT_FAST16_FMTi__ "hi"
@@ -402,8 +402,8 @@
 // MIPS32EL:#define __UINTMAX_MAX__ 18446744073709551615ULL
 // MIPS32EL:#define __UINTMAX_TYPE__ long long unsigned int
 // MIPS32EL:#define __UINTMAX_WIDTH__ 64
-// MIPS32EL:#define __UINTPTR_MAX__ 4294967295UL
-// MIPS32EL:#define __UINTPTR_TYPE__ long unsigned int
+// MIPS32EL:#define __UINTPTR_MAX__ 4294967295U
+// MIPS32EL:#define __UINTPTR_TYPE__ unsigned int
 // MIPS32EL:#define __UINTPTR_WIDTH__ 32
 // MIPS32EL:#define __UINT_FAST16_MAX__ 65535
 // MIPS32EL:#define __UINT_FAST16_TYPE__ unsigned short
@@ -547,10 +547,10 @@
 // MIPSN32BE: #define __INTMAX_MAX__ 9223372036854775807LL
 // MIPSN32BE: #define __INTMAX_TYPE__ long long int
 // MIPSN32BE: #define __INTMAX_WIDTH__ 64
-// MIPSN32BE: #define __INTPTR_FMTd__ "ld"
-// MIPSN32BE: #define __INTPTR_FMTi__ "li"
-// MIPSN32BE: #define __INTPTR_MAX__ 2147483647L
-// MIPSN32BE: #define __INTPTR_TYPE__ long int
+// MIPSN32BE: #define __INTPTR_FMTd__ "d"
+// MIPSN32BE: #define __INTPTR_FMTi__ "i"
+// MIPSN32BE: #define __INTPTR_MAX__ 2147483647
+// MIPSN32BE: #define __INTPTR_TYPE__ int
 // MIPSN32BE: #define __INTPTR_WIDTH__ 32
 // MIPSN32BE: #define __INT_FAST16_FMTd__ "hd"
 // MIPSN32BE: #define __INT_FAST16_FMTi__ "hi"
@@ -684,12 +684,12 @@
 // MIPSN32BE: #define __UINTMAX_MAX__ 18446744073709551615ULL
 // MIPSN32BE: #define __UINTMAX_TYPE__ long long unsigned int
 // MIPSN32BE: #define __UINTMAX_WIDTH__ 64
-// MIPSN32BE: #define __UINTPTR_FMTX__ "lX"
-// MIPSN32BE: #define __UINTPTR_FMTo__ "lo"
-// MIPSN32BE: #define __UINTPTR_FMTu__ "lu"
-// MIPSN32BE: #define __UINTPTR_FMTx__ "lx"
-// MIPSN32BE: #define __UINTPTR_MAX__ 4294967295UL
-// MIPSN32BE: #define __UINTPTR_TYPE__ long unsigned int
+// MIPSN32BE: #define __UINTPTR_FMTX__ "X"
+// MIPSN32BE: #define __UINTPTR_FMTo__ "o"
+// MIPSN32BE: #define __UINTPTR_FMTu__ "u"
+// MIPSN32BE: #define __UINTPTR_FMTx__ "x"
+// MIPSN32BE: #define __UINTPTR_MAX__ 4294967295U
+// MIPSN32BE: #define __UINTPTR_TYPE__ unsigned int
 // MIPSN32BE: #define __UINTPTR_WIDTH__ 32
 // MIPSN32BE: #define __UINT_FAST16_FMTX__ "hX"
 // MIPSN32BE: #define __UINT_FAST16_FMTo__ "ho"
@@ -864,10 +864,10 @@
 // MIPSN32EL: #define __INTMAX_MAX__ 9223372036854775807LL
 // MIPSN32EL: #define __INTMAX_TYPE__ long long int
 // MIPSN32EL: #define __INTMAX_WIDTH__ 64
-// MIPSN32EL: #define __INTPTR_FMTd__ "ld"
-// MIPSN32EL: #define __INTPTR_FMTi__ "li"
-// MIPSN32EL: #define __INTPTR_MAX__ 2147483647L
-// MIPSN32EL: #define __INTPTR_TYPE__ long int
+// MIPSN32EL: #define __INTPTR_FMTd__ "d"
+// MIPSN32EL: #define __INTPTR_FMTi__ "i"
+// MIPSN32EL: #define __INTPTR_MAX__ 2147483647
+// MIPSN32EL: #define __INTPTR_TYPE__ int
 // MIPSN32EL: #define __INTPTR_WIDTH__ 32
 // MIPSN32EL: #define __INT_FAST16_FMTd__ "hd"
 // MIPSN32EL: #define __INT_FAST16_FMTi__ "hi"
@@ -1001,12 +1001,12 @@
 // MIPSN32EL: #define __UINTMAX_MAX__ 18446744073709551615ULL
 // MIPSN32EL: #define __UINTMAX_TYPE__ long long unsigned int
 // MIPSN32EL: #define __UINTMAX_WIDTH__ 64
-// MIPSN32EL: #define __UINTPTR_FMTX__ "lX"
-// MIPSN32EL: #define __UINTPTR_FMTo__ "lo"
-// MIPSN32EL: #define __UINTPTR_FMTu__ "lu"
-// MIPSN32EL: #define __UINTPTR_FMTx__ "lx"
-// MIPSN32EL: #define __UINTPTR_MAX__ 4294967295UL
-// MIPSN32EL: #define __UINTPTR_TYPE__ long unsigned int
+// MIPSN32EL: #define __UINTPTR_FMTX__ "X"
+// MIPSN32EL: #define __UINTPTR_FMTo__ "o"
+// MIPSN32EL: #define __UINTPTR_FMTu__ "u"
+// MIPSN32EL: #define __UINTPTR_FMTx__ "x"
+// MIPSN32EL: #define __UINTPTR_MAX__ 4294967295U
+// MIPSN32EL: #define __UINTPTR_TYPE__ unsigned int
 // MIPSN32EL: #define __UINTPTR_WIDTH__ 32
 // MIPSN32EL: #define __UINT_FAST16_FMTX__ "hX"
 // MIPSN32EL: #define __UINT_FAST16_FMTo__ "ho"
diff --git a/clang/test/Preprocessor/init-x86.c b/clang/test/Preprocessor/init-x86.c
index 8ea4ce7..f5fd976 100644
--- a/clang/test/Preprocessor/init-x86.c
+++ b/clang/test/Preprocessor/init-x86.c
@@ -1535,6 +1535,7 @@
 // I386-CYGWIN:#define __GCC_ATOMIC_SHORT_LOCK_FREE 2
 // I386-CYGWIN:#define __GCC_ATOMIC_TEST_AND_SET_TRUEVAL 1
 // I386-CYGWIN:#define __GCC_ATOMIC_WCHAR_T_LOCK_FREE 2
+// I386-CYGWIN:#define __GXX_TYPEINFO_EQUALITY_INLINE 0
 // I386-CYGWIN:#define __ILP32__ 1
 // I386-CYGWIN:#define __INT16_C(c) c
 // I386-CYGWIN:#define __INT16_C_SUFFIX__
@@ -1746,6 +1747,7 @@
 // X86_64-CYGWIN:#define __GCC_ATOMIC_SHORT_LOCK_FREE 2
 // X86_64-CYGWIN:#define __GCC_ATOMIC_TEST_AND_SET_TRUEVAL 1
 // X86_64-CYGWIN:#define __GCC_ATOMIC_WCHAR_T_LOCK_FREE 2
+// X86_64-CYGWIN:#define __GXX_TYPEINFO_EQUALITY_INLINE 0
 // X86_64-CYGWIN:#define __INT16_C(c) c
 // X86_64-CYGWIN:#define __INT16_C_SUFFIX__
 // X86_64-CYGWIN:#define __INT16_FMTd__ "hd"
diff --git a/clang/test/Preprocessor/pragma-pushpop-macro-diag.c b/clang/test/Preprocessor/pragma-pushpop-macro-diag.c
new file mode 100644
index 0000000..293cb82
--- /dev/null
+++ b/clang/test/Preprocessor/pragma-pushpop-macro-diag.c
@@ -0,0 +1,4 @@
+// RUN: %clang_cc1 -fms-extensions %s -fsyntax-only -verify
+
+#pragma push_macro("") // expected-warning {{'#pragma push_macro' expected a non-empty string}}
+#pragma pop_macro("") // expected-warning {{'#pragma pop_macro' expected a non-empty string}}
diff --git a/clang/test/Preprocessor/pragma-pushpop-macro.c b/clang/test/Preprocessor/pragma-pushpop-macro.c
index 0aee074..238e3ed 100644
--- a/clang/test/Preprocessor/pragma-pushpop-macro.c
+++ b/clang/test/Preprocessor/pragma-pushpop-macro.c
@@ -56,3 +56,6 @@ int P;
 // CHECK: int pmy2 = 4
 // CHECK: int Q;
 // CHECK: int P;
+
+#pragma push_macro("")
+#pragma pop_macro("")
diff --git a/clang/test/Preprocessor/preprocess-cpp-output.c b/clang/test/Preprocessor/preprocess-cpp-output.c
new file mode 100644
index 0000000..2c18060
--- /dev/null
+++ b/clang/test/Preprocessor/preprocess-cpp-output.c
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -E -x c %s | FileCheck %s --check-prefixes=EXPANDED
+// RUN: %clang_cc1 -E -x cpp-output %s | FileCheck %s --check-prefixes=NOT-EXPANDED
+
+// EXPANDED: void __attribute__((__attribute__((always_inline)))) foo()
+// NOT-EXPANDED: void __attribute__((always_inline)) foo()
+
+#define always_inline __attribute__((always_inline))
+void __attribute__((always_inline)) foo() {
+    return 4;
+}
diff --git a/clang/test/Preprocessor/preprocess-pragma-cpp-output.c b/clang/test/Preprocessor/preprocess-pragma-cpp-output.c
new file mode 100644
index 0000000..d538937
--- /dev/null
+++ b/clang/test/Preprocessor/preprocess-pragma-cpp-output.c
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -E -x c %s | FileCheck %s
+// RUN: %clang_cc1 -x c -fsyntax-only %s -verify
+// RUN: %clang_cc1 -x cpp-output -fsyntax-only -verify %s
+// expected-no-diagnostics
+
+// The preprocessor does not expand macro-identifiers in #pragma directives.
+// When we preprocess & parse the code, clang expands the macros in directives.
+// When we parse already preprocessed code, clang still has to expand the
+// macros in the directives.
+// This means that we're not always able to parse the preprocessor's output
+// without preserving the definitions (-dD).
+
+#define FACTOR 4
+
+void foo() {
+    // CHECK: #pragma unroll FACTOR
+    #pragma unroll FACTOR
+    for(;;) {
+    }
+    return;
+}
diff --git a/clang/test/Preprocessor/riscv-target-features-cv.c b/clang/test/Preprocessor/riscv-target-features-cv.c
new file mode 100644
index 0000000..a424a34
--- /dev/null
+++ b/clang/test/Preprocessor/riscv-target-features-cv.c
@@ -0,0 +1,60 @@
+// RUN: %clang --target=riscv32-unknown-linux-gnu -march=rv32i -E -dM %s \
+// RUN:   -o - | FileCheck %s
+// RUN: %clang --target=riscv64-unknown-linux-gnu -march=rv64i -E -dM %s \
+// RUN:   -o - | FileCheck %s
+
+// CHECK-NOT: __riscv_xcvalu {{.*$}}
+// CHECK-NOT: __riscv_xcvbi {{.*$}}
+// CHECK-NOT: __riscv_xcvbitmanip {{.*$}}
+// CHECK-NOT: __riscv_xcvelw {{.*$}}
+// CHECK-NOT: __riscv_xcvmac {{.*$}}
+// CHECK-NOT: __riscv_xcvmem {{.*$}}
+// CHECK-NOT: __riscv_xcvsimd {{.*$}}
+
+// RUN: %clang --target=riscv32-unknown-linux-gnu \
+// RUN:   -march=rv32ixcvalu -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XCVALU-EXT %s
+// RUN: %clang --target=riscv64-unknown-linux-gnu \
+// RUN:   -march=rv64ixcvalu -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XCVALU-EXT %s
+// CHECK-XCVALU-EXT: __riscv_xcvalu 1000000{{$}}
+
+// RUN: %clang --target=riscv32-unknown-linux-gnu \
+// RUN:   -march=rv32ixcvbi -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XCVBI-EXT %s
+// RUN: %clang --target=riscv64-unknown-linux-gnu \
+// RUN:   -march=rv64ixcvbi -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XCVBI-EXT %s
+// CHECK-XCVBI-EXT: __riscv_xcvbi 1000000{{$}}
+
+// RUN: %clang --target=riscv32-unknown-linux-gnu \
+// RUN:   -march=rv32ixcvbitmanip -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XCVBITMANIP-EXT %s
+// RUN: %clang --target=riscv64-unknown-linux-gnu \
+// RUN:   -march=rv64ixcvbitmanip -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XCVBITMANIP-EXT %s
+// CHECK-XCVBITMANIP-EXT: __riscv_xcvbitmanip 1000000{{$}}
+
+// RUN: %clang --target=riscv32-unknown-linux-gnu \
+// RUN:   -march=rv32ixcvmac -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XCVMAC-EXT %s
+// RUN: %clang --target=riscv64-unknown-linux-gnu \
+// RUN:   -march=rv64ixcvmac -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XCVMAC-EXT %s
+// CHECK-XCVMAC-EXT: __riscv_xcvmac 1000000{{$}}
+
+// RUN: %clang --target=riscv32-unknown-linux-gnu \
+// RUN:   -march=rv32ixcvmem -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XCVMEM-EXT %s
+// RUN: %clang --target=riscv64-unknown-linux-gnu \
+// RUN:   -march=rv64ixcvmem -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XCVMEM-EXT %s
+// CHECK-XCVMEM-EXT: __riscv_xcvmem 1000000{{$}}
+
+// RUN: %clang --target=riscv32-unknown-linux-gnu \
+// RUN:   -march=rv32ixcvsimd -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XCVSIMD-EXT %s
+// RUN: %clang --target=riscv64-unknown-linux-gnu \
+// RUN:   -march=rv64ixcvsimd -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XCVSIMD-EXT %s
+// CHECK-XCVSIMD-EXT: __riscv_xcvsimd 1000000{{$}}
diff --git a/clang/test/Preprocessor/riscv-target-features-sifive.c b/clang/test/Preprocessor/riscv-target-features-sifive.c
index e4c0387..1c49b55 100644
--- a/clang/test/Preprocessor/riscv-target-features-sifive.c
+++ b/clang/test/Preprocessor/riscv-target-features-sifive.c
@@ -1,3 +1,91 @@
+// RUN: %clang --target=riscv32-unknown-linux-gnu -march=rv32i -E -dM %s \
+// RUN:   -o - | FileCheck %s
+// RUN: %clang --target=riscv64-unknown-linux-gnu -march=rv64i -E -dM %s \
+// RUN:   -o - | FileCheck %s
+
+// CHECK-NOT: __riscv_xsfcease {{.*$}}
+// CHECK-NOT: __riscv_xsfvcp {{.*$}}
+// CHECK-NOT: __riscv_xsfvfnrclipxfqf {{.*$}}
+// CHECK-NOT: __riscv_xsfvfwmaccqqq {{.*$}}
+// CHECK-NOT: __riscv_xsfqmaccdod {{.*$}}
+// CHECK-NOT: __riscv_xsfvqmaccqoq {{.*$}}
+// CHECK-NOT: __riscv_xsifivecdiscarddlone {{.*$}}
+// CHECK-NOT: __riscv_xsifivecflushdlone {{.*$}}
+// CHECK-NOT: __riscv_xsfmm128t {{.*$}}
+// CHECK-NOT: __riscv_xsfmm16t {{.*$}}
+// CHECK-NOT: __riscv_xsfmm32a8i {{.*$}}
+// CHECK-NOT: __riscv_xsfmm32a8f {{.*$}}
+// CHECK-NOT: __riscv_xsfmm32a16f {{.*$}}
+// CHECK-NOT: __riscv_xsfmm32a32f {{.*$}}
+// CHECK-NOT: __riscv_xsfmm32a32t {{.*$}}
+// CHECK-NOT: __riscv_xsfmm64a64f {{.*$}}
+// CHECK-NOT: __riscv_xsfmm64t {{.*$}}
+// CHECK-NOT: __riscv_xsfmmbase {{.*$}}
+
+// RUN: %clang --target=riscv32-unknown-linux-gnu \
+// RUN:   -march=rv32ixsfcease -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XSFCEASE-EXT %s
+// RUN: %clang --target=riscv64-unknown-linux-gnu \
+// RUN:   -march=rv64ixsfcease -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XSFCEASE-EXT %s
+// CHECK-XSFCEASE-EXT: __riscv_xsfcease 1000000{{$}}
+
+// RUN: %clang --target=riscv32-unknown-linux-gnu \
+// RUN:   -march=rv32ixsfvcp -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XSFVCP-EXT %s
+// RUN: %clang --target=riscv64-unknown-linux-gnu \
+// RUN:   -march=rv64ixsfvcp -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XSFVCP-EXT %s
+// CHECK-XSFVCP-EXT: __riscv_xsfvcp 1000000{{$}}
+
+// RUN: %clang --target=riscv32-unknown-linux-gnu \
+// RUN:   -march=rv32ixsfvfnrclipxfqf -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XSFVFNRCLIPXFQF-EXT %s
+// RUN: %clang --target=riscv64-unknown-linux-gnu \
+// RUN:   -march=rv64ixsfvfnrclipxfqf -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XSFVFNRCLIPXFQF-EXT %s
+// CHECK-XSFVFNRCLIPXFQF-EXT: __riscv_xsfvfnrclipxfqf 1000000{{$}}
+
+// RUN: %clang --target=riscv32-unknown-linux-gnu \
+// RUN:   -march=rv32ixsfvfwmaccqqq -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XSFVFWMACCQQQ-EXT %s
+// RUN: %clang --target=riscv64-unknown-linux-gnu \
+// RUN:   -march=rv64ixsfvfwmaccqqq -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XSFVFWMACCQQQ-EXT %s
+// CHECK-XSFVFWMACCQQQ-EXT: __riscv_xsfvfwmaccqqq 1000000{{$}}
+
+// RUN: %clang --target=riscv32-unknown-linux-gnu \
+// RUN:   -march=rv32ixsfvqmaccdod -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XSFVQMACCDOD-EXT %s
+// RUN: %clang --target=riscv64-unknown-linux-gnu \
+// RUN:   -march=rv64ixsfvqmaccdod -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XSFVQMACCDOD-EXT %s
+// CHECK-XSFVQMACCDOD-EXT: __riscv_xsfvqmaccdod 1000000{{$}}
+
+// RUN: %clang --target=riscv32-unknown-linux-gnu \
+// RUN:   -march=rv32ixsfvqmaccqoq -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XSFVQMACCQOQ-EXT %s
+// RUN: %clang --target=riscv64-unknown-linux-gnu \
+// RUN:   -march=rv64ixsfvqmaccqoq -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XSFVQMACCQOQ-EXT %s
+// CHECK-XSFVQMACCQOQ-EXT: __riscv_xsfvqmaccqoq 1000000{{$}}
+
+// RUN: %clang --target=riscv32-unknown-linux-gnu \
+// RUN:   -march=rv32ixsifivecdiscarddlone -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XSIFIVECDISCARDDLONE-EXT %s
+// RUN: %clang --target=riscv64-unknown-linux-gnu \
+// RUN:   -march=rv64ixsifivecdiscarddlone -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XSIFIVECDISCARDDLONE-EXT %s
+// CHECK-XSIFIVECDISCARDDLONE-EXT: __riscv_xsifivecdiscarddlone 1000000{{$}}
+
+// RUN: %clang --target=riscv32-unknown-linux-gnu \
+// RUN:   -march=rv32ixsifivecflushdlone -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XSIFIVECFLUSHDLONE-EXT %s
+// RUN: %clang --target=riscv64-unknown-linux-gnu \
+// RUN:   -march=rv64ixsifivecflushdlone -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XSIFIVECFLUSHDLONE-EXT %s
+// CHECK-XSIFIVECFLUSHDLONE-EXT: __riscv_xsifivecflushdlone 1000000{{$}}
+
 // RUN: %clang --target=riscv32 \
 // RUN:   -march=rv32i_zve32x_xsfmm128t -E -dM %s \
 // RUN:   -o - | FileCheck --check-prefix=CHECK-XSFMM128T %s
diff --git a/clang/test/Preprocessor/riscv-target-features-thead.c b/clang/test/Preprocessor/riscv-target-features-thead.c
new file mode 100644
index 0000000..9d27d9a
--- /dev/null
+++ b/clang/test/Preprocessor/riscv-target-features-thead.c
@@ -0,0 +1,104 @@
+// RUN: %clang --target=riscv32-unknown-linux-gnu -march=rv32i -E -dM %s \
+// RUN:   -o - | FileCheck %s
+// RUN: %clang --target=riscv64-unknown-linux-gnu -march=rv64i -E -dM %s \
+// RUN:   -o - | FileCheck %s
+
+// CHECK-NOT: __riscv_xtheadba {{.*$}}
+// CHECK-NOT: __riscv_xtheadbb {{.*$}}
+// CHECK-NOT: __riscv_xtheadbs {{.*$}}
+// CHECK-NOT: __riscv_xtheadcmo {{.*$}}
+// CHECK-NOT: __riscv_xtheadcondmov {{.*$}}
+// CHECK-NOT: __riscv_xtheadfmemidx {{.*$}}
+// CHECK-NOT: __riscv_xtheadmac {{.*$}}
+// CHECK-NOT: __riscv_xtheadmemidx {{.*$}}
+// CHECK-NOT: __riscv_xtheadmempair {{.*$}}
+// CHECK-NOT: __riscv_xtheadsync {{.*$}}
+// CHECK-NOT: __riscv_xtheadvdot {{.*$}}
+
+// RUN: %clang --target=riscv32-unknown-linux-gnu \
+// RUN:   -march=rv32ixtheadba -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADBA-EXT %s
+// RUN: %clang --target=riscv64-unknown-linux-gnu \
+// RUN:   -march=rv64ixtheadba -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADBA-EXT %s
+// CHECK-XTHEADBA-EXT: __riscv_xtheadba 1000000{{$}}
+
+// RUN: %clang --target=riscv32-unknown-linux-gnu \
+// RUN:   -march=rv32ixtheadbb -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADBB-EXT %s
+// RUN: %clang --target=riscv64-unknown-linux-gnu \
+// RUN:   -march=rv64ixtheadbb -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADBB-EXT %s
+// CHECK-XTHEADBB-EXT: __riscv_xtheadbb 1000000{{$}}
+
+// RUN: %clang --target=riscv32-unknown-linux-gnu \
+// RUN:   -march=rv32ixtheadbs -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADBS-EXT %s
+// RUN: %clang --target=riscv64-unknown-linux-gnu \
+// RUN:   -march=rv64ixtheadbs -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADBS-EXT %s
+// CHECK-XTHEADBS-EXT: __riscv_xtheadbs 1000000{{$}}
+
+// RUN: %clang --target=riscv32-unknown-linux-gnu \
+// RUN:   -march=rv32ixtheadcmo -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADCMO-EXT %s
+// RUN: %clang --target=riscv64-unknown-linux-gnu \
+// RUN:   -march=rv64ixtheadcmo -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADCMO-EXT %s
+// CHECK-XTHEADCMO-EXT: __riscv_xtheadcmo 1000000{{$}}
+
+// RUN: %clang --target=riscv32-unknown-linux-gnu \
+// RUN:   -march=rv32ixtheadcondmov -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADCONDMOV-EXT %s
+// RUN: %clang --target=riscv64-unknown-linux-gnu \
+// RUN:   -march=rv64ixtheadcondmov -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADCONDMOV-EXT %s
+// CHECK-XTHEADCONDMOV-EXT: __riscv_xtheadcondmov 1000000{{$}}
+
+// RUN: %clang --target=riscv32-unknown-linux-gnu \
+// RUN:   -march=rv32ixtheadfmemidx -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADFMEMIDX-EXT %s
+// RUN: %clang --target=riscv64-unknown-linux-gnu \
+// RUN:   -march=rv64ixtheadfmemidx -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADFMEMIDX-EXT %s
+// CHECK-XTHEADFMEMIDX-EXT: __riscv_xtheadfmemidx 1000000{{$}}
+
+// RUN: %clang --target=riscv32-unknown-linux-gnu \
+// RUN:   -march=rv32ixtheadmac -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADMAC-EXT %s
+// RUN: %clang --target=riscv64-unknown-linux-gnu \
+// RUN:   -march=rv64ixtheadmac -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADMAC-EXT %s
+// CHECK-XTHEADMAC-EXT: __riscv_xtheadmac 1000000{{$}}
+
+// RUN: %clang --target=riscv32-unknown-linux-gnu \
+// RUN:   -march=rv32ixtheadmemidx -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADMEMIDX-EXT %s
+// RUN: %clang --target=riscv64-unknown-linux-gnu \
+// RUN:   -march=rv64ixtheadmemidx -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADMEMIDX-EXT %s
+// CHECK-XTHEADMEMIDX-EXT: __riscv_xtheadmemidx 1000000{{$}}
+
+// RUN: %clang --target=riscv32-unknown-linux-gnu \
+// RUN:   -march=rv32ixtheadmempair -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADMEMPAIR-EXT %s
+// RUN: %clang --target=riscv64-unknown-linux-gnu \
+// RUN:   -march=rv64ixtheadmempair -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADMEMPAIR-EXT %s
+// CHECK-XTHEADMEMPAIR-EXT: __riscv_xtheadmempair 1000000{{$}}
+
+// RUN: %clang --target=riscv32-unknown-linux-gnu \
+// RUN:   -march=rv32ixtheadsync -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADSYNC-EXT %s
+// RUN: %clang --target=riscv64-unknown-linux-gnu \
+// RUN:   -march=rv64ixtheadsync -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADSYNC-EXT %s
+// CHECK-XTHEADSYNC-EXT: __riscv_xtheadsync 1000000{{$}}
+
+// RUN: %clang --target=riscv32-unknown-linux-gnu \
+// RUN:   -march=rv32ixtheadvdot -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADVDOT-EXT %s
+// RUN: %clang --target=riscv64-unknown-linux-gnu \
+// RUN:   -march=rv64ixtheadvdot -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADVDOT-EXT %s
+// CHECK-XTHEADVDOT-EXT: __riscv_xtheadvdot 1000000{{$}}
diff --git a/clang/test/Preprocessor/riscv-target-features.c b/clang/test/Preprocessor/riscv-target-features.c
index 86085c2..864d782 100644
--- a/clang/test/Preprocessor/riscv-target-features.c
+++ b/clang/test/Preprocessor/riscv-target-features.c
@@ -64,32 +64,6 @@
 // CHECK-NOT: __riscv_v_intrinsic {{.*$}}
 // CHECK-NOT: __riscv_v_min_vlen {{.*$}}
 // CHECK-NOT: __riscv_vector {{.*$}}
-// CHECK-NOT: __riscv_xcvalu {{.*$}}
-// CHECK-NOT: __riscv_xcvbi {{.*$}}
-// CHECK-NOT: __riscv_xcvbitmanip {{.*$}}
-// CHECK-NOT: __riscv_xcvelw {{.*$}}
-// CHECK-NOT: __riscv_xcvmac {{.*$}}
-// CHECK-NOT: __riscv_xcvmem {{.*$}}
-// CHECK-NOT: __riscv_xcvsimd {{.*$}}
-// CHECK-NOT: __riscv_xsfcease {{.*$}}
-// CHECK-NOT: __riscv_xsfvcp {{.*$}}
-// CHECK-NOT: __riscv_xsfvfnrclipxfqf {{.*$}}
-// CHECK-NOT: __riscv_xsfvfwmaccqqq {{.*$}}
-// CHECK-NOT: __riscv_xsfqmaccdod {{.*$}}
-// CHECK-NOT: __riscv_xsfvqmaccqoq {{.*$}}
-// CHECK-NOT: __riscv_xsifivecdiscarddlone {{.*$}}
-// CHECK-NOT: __riscv_xsifivecflushdlone {{.*$}}
-// CHECK-NOT: __riscv_xtheadba {{.*$}}
-// CHECK-NOT: __riscv_xtheadbb {{.*$}}
-// CHECK-NOT: __riscv_xtheadbs {{.*$}}
-// CHECK-NOT: __riscv_xtheadcmo {{.*$}}
-// CHECK-NOT: __riscv_xtheadcondmov {{.*$}}
-// CHECK-NOT: __riscv_xtheadfmemidx {{.*$}}
-// CHECK-NOT: __riscv_xtheadmac {{.*$}}
-// CHECK-NOT: __riscv_xtheadmemidx {{.*$}}
-// CHECK-NOT: __riscv_xtheadmempair {{.*$}}
-// CHECK-NOT: __riscv_xtheadsync {{.*$}}
-// CHECK-NOT: __riscv_xtheadvdot {{.*$}}
 // CHECK-NOT: __riscv_xventanacondops {{.*$}}
 // CHECK-NOT: __riscv_za128rs {{.*$}}
 // CHECK-NOT: __riscv_za64rs {{.*$}}
@@ -569,198 +543,6 @@
 // CHECK-V-EXT: __riscv_vector 1
 
 // RUN: %clang --target=riscv32-unknown-linux-gnu \
-// RUN:   -march=rv32ixcvalu -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XCVALU-EXT %s
-// RUN: %clang --target=riscv64-unknown-linux-gnu \
-// RUN:   -march=rv64ixcvalu -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XCVALU-EXT %s
-// CHECK-XCVALU-EXT: __riscv_xcvalu 1000000{{$}}
-
-// RUN: %clang --target=riscv32-unknown-linux-gnu \
-// RUN:   -march=rv32ixcvbi -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XCVBI-EXT %s
-// RUN: %clang --target=riscv64-unknown-linux-gnu \
-// RUN:   -march=rv64ixcvbi -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XCVBI-EXT %s
-// CHECK-XCVBI-EXT: __riscv_xcvbi 1000000{{$}}
-
-// RUN: %clang --target=riscv32-unknown-linux-gnu \
-// RUN:   -march=rv32ixcvbitmanip -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XCVBITMANIP-EXT %s
-// RUN: %clang --target=riscv64-unknown-linux-gnu \
-// RUN:   -march=rv64ixcvbitmanip -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XCVBITMANIP-EXT %s
-// CHECK-XCVBITMANIP-EXT: __riscv_xcvbitmanip 1000000{{$}}
-
-// RUN: %clang --target=riscv32-unknown-linux-gnu \
-// RUN:   -march=rv32ixcvmac -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XCVMAC-EXT %s
-// RUN: %clang --target=riscv64-unknown-linux-gnu \
-// RUN:   -march=rv64ixcvmac -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XCVMAC-EXT %s
-// CHECK-XCVMAC-EXT: __riscv_xcvmac 1000000{{$}}
-
-// RUN: %clang --target=riscv32-unknown-linux-gnu \
-// RUN:   -march=rv32ixcvsimd -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XCVSIMD-EXT %s
-// RUN: %clang --target=riscv64-unknown-linux-gnu \
-// RUN:   -march=rv64ixcvsimd -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XCVSIMD-EXT %s
-// CHECK-XCVSIMD-EXT: __riscv_xcvsimd 1000000{{$}}
-
-// RUN: %clang --target=riscv32-unknown-linux-gnu \
-// RUN:   -march=rv32ixsfcease -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XSFCEASE-EXT %s
-// RUN: %clang --target=riscv64-unknown-linux-gnu \
-// RUN:   -march=rv64ixsfcease -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XSFCEASE-EXT %s
-// CHECK-XSFCEASE-EXT: __riscv_xsfcease 1000000{{$}}
-
-// RUN: %clang --target=riscv32-unknown-linux-gnu \
-// RUN:   -march=rv32ixsfvcp -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XSFVCP-EXT %s
-// RUN: %clang --target=riscv64-unknown-linux-gnu \
-// RUN:   -march=rv64ixsfvcp -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XSFVCP-EXT %s
-// CHECK-XSFVCP-EXT: __riscv_xsfvcp 1000000{{$}}
-
-// RUN: %clang --target=riscv32-unknown-linux-gnu \
-// RUN:   -march=rv32ixsfvfnrclipxfqf -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XSFVFNRCLIPXFQF-EXT %s
-// RUN: %clang --target=riscv64-unknown-linux-gnu \
-// RUN:   -march=rv64ixsfvfnrclipxfqf -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XSFVFNRCLIPXFQF-EXT %s
-// CHECK-XSFVFNRCLIPXFQF-EXT: __riscv_xsfvfnrclipxfqf 1000000{{$}}
-
-// RUN: %clang --target=riscv32-unknown-linux-gnu \
-// RUN:   -march=rv32ixsfvfwmaccqqq -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XSFVFWMACCQQQ-EXT %s
-// RUN: %clang --target=riscv64-unknown-linux-gnu \
-// RUN:   -march=rv64ixsfvfwmaccqqq -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XSFVFWMACCQQQ-EXT %s
-// CHECK-XSFVFWMACCQQQ-EXT: __riscv_xsfvfwmaccqqq 1000000{{$}}
-
-// RUN: %clang --target=riscv32-unknown-linux-gnu \
-// RUN:   -march=rv32ixsfvqmaccdod -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XSFVQMACCDOD-EXT %s
-// RUN: %clang --target=riscv64-unknown-linux-gnu \
-// RUN:   -march=rv64ixsfvqmaccdod -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XSFVQMACCDOD-EXT %s
-// CHECK-XSFVQMACCDOD-EXT: __riscv_xsfvqmaccdod 1000000{{$}}
-
-// RUN: %clang --target=riscv32-unknown-linux-gnu \
-// RUN:   -march=rv32ixsfvqmaccqoq -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XSFVQMACCQOQ-EXT %s
-// RUN: %clang --target=riscv64-unknown-linux-gnu \
-// RUN:   -march=rv64ixsfvqmaccqoq -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XSFVQMACCQOQ-EXT %s
-// CHECK-XSFVQMACCQOQ-EXT: __riscv_xsfvqmaccqoq 1000000{{$}}
-
-// RUN: %clang --target=riscv32-unknown-linux-gnu \
-// RUN:   -march=rv32ixsifivecdiscarddlone -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XSIFIVECDISCARDDLONE-EXT %s
-// RUN: %clang --target=riscv64-unknown-linux-gnu \
-// RUN:   -march=rv64ixsifivecdiscarddlone -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XSIFIVECDISCARDDLONE-EXT %s
-// CHECK-XSIFIVECDISCARDDLONE-EXT: __riscv_xsifivecdiscarddlone 1000000{{$}}
-
-// RUN: %clang --target=riscv32-unknown-linux-gnu \
-// RUN:   -march=rv32ixsifivecflushdlone -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XSIFIVECFLUSHDLONE-EXT %s
-// RUN: %clang --target=riscv64-unknown-linux-gnu \
-// RUN:   -march=rv64ixsifivecflushdlone -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XSIFIVECFLUSHDLONE-EXT %s
-// CHECK-XSIFIVECFLUSHDLONE-EXT: __riscv_xsifivecflushdlone 1000000{{$}}
-
-// RUN: %clang --target=riscv32-unknown-linux-gnu \
-// RUN:   -march=rv32ixtheadba -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADBA-EXT %s
-// RUN: %clang --target=riscv64-unknown-linux-gnu \
-// RUN:   -march=rv64ixtheadba -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADBA-EXT %s
-// CHECK-XTHEADBA-EXT: __riscv_xtheadba 1000000{{$}}
-
-// RUN: %clang --target=riscv32-unknown-linux-gnu \
-// RUN:   -march=rv32ixtheadbb -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADBB-EXT %s
-// RUN: %clang --target=riscv64-unknown-linux-gnu \
-// RUN:   -march=rv64ixtheadbb -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADBB-EXT %s
-// CHECK-XTHEADBB-EXT: __riscv_xtheadbb 1000000{{$}}
-
-// RUN: %clang --target=riscv32-unknown-linux-gnu \
-// RUN:   -march=rv32ixtheadbs -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADBS-EXT %s
-// RUN: %clang --target=riscv64-unknown-linux-gnu \
-// RUN:   -march=rv64ixtheadbs -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADBS-EXT %s
-// CHECK-XTHEADBS-EXT: __riscv_xtheadbs 1000000{{$}}
-
-// RUN: %clang --target=riscv32-unknown-linux-gnu \
-// RUN:   -march=rv32ixtheadcmo -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADCMO-EXT %s
-// RUN: %clang --target=riscv64-unknown-linux-gnu \
-// RUN:   -march=rv64ixtheadcmo -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADCMO-EXT %s
-// CHECK-XTHEADCMO-EXT: __riscv_xtheadcmo 1000000{{$}}
-
-// RUN: %clang --target=riscv32-unknown-linux-gnu \
-// RUN:   -march=rv32ixtheadcondmov -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADCONDMOV-EXT %s
-// RUN: %clang --target=riscv64-unknown-linux-gnu \
-// RUN:   -march=rv64ixtheadcondmov -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADCONDMOV-EXT %s
-// CHECK-XTHEADCONDMOV-EXT: __riscv_xtheadcondmov 1000000{{$}}
-
-// RUN: %clang --target=riscv32-unknown-linux-gnu \
-// RUN:   -march=rv32ixtheadfmemidx -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADFMEMIDX-EXT %s
-// RUN: %clang --target=riscv64-unknown-linux-gnu \
-// RUN:   -march=rv64ixtheadfmemidx -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADFMEMIDX-EXT %s
-// CHECK-XTHEADFMEMIDX-EXT: __riscv_xtheadfmemidx 1000000{{$}}
-
-// RUN: %clang --target=riscv32-unknown-linux-gnu \
-// RUN:   -march=rv32ixtheadmac -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADMAC-EXT %s
-// RUN: %clang --target=riscv64-unknown-linux-gnu \
-// RUN:   -march=rv64ixtheadmac -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADMAC-EXT %s
-// CHECK-XTHEADMAC-EXT: __riscv_xtheadmac 1000000{{$}}
-
-// RUN: %clang --target=riscv32-unknown-linux-gnu \
-// RUN:   -march=rv32ixtheadmemidx -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADMEMIDX-EXT %s
-// RUN: %clang --target=riscv64-unknown-linux-gnu \
-// RUN:   -march=rv64ixtheadmemidx -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADMEMIDX-EXT %s
-// CHECK-XTHEADMEMIDX-EXT: __riscv_xtheadmemidx 1000000{{$}}
-
-// RUN: %clang --target=riscv32-unknown-linux-gnu \
-// RUN:   -march=rv32ixtheadmempair -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADMEMPAIR-EXT %s
-// RUN: %clang --target=riscv64-unknown-linux-gnu \
-// RUN:   -march=rv64ixtheadmempair -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADMEMPAIR-EXT %s
-// CHECK-XTHEADMEMPAIR-EXT: __riscv_xtheadmempair 1000000{{$}}
-
-// RUN: %clang --target=riscv32-unknown-linux-gnu \
-// RUN:   -march=rv32ixtheadsync -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADSYNC-EXT %s
-// RUN: %clang --target=riscv64-unknown-linux-gnu \
-// RUN:   -march=rv64ixtheadsync -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADSYNC-EXT %s
-// CHECK-XTHEADSYNC-EXT: __riscv_xtheadsync 1000000{{$}}
-
-// RUN: %clang --target=riscv32-unknown-linux-gnu \
-// RUN:   -march=rv32ixtheadvdot -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADVDOT-EXT %s
-// RUN: %clang --target=riscv64-unknown-linux-gnu \
-// RUN:   -march=rv64ixtheadvdot -E -dM %s \
-// RUN:   -o - | FileCheck --check-prefix=CHECK-XTHEADVDOT-EXT %s
-// CHECK-XTHEADVDOT-EXT: __riscv_xtheadvdot 1000000{{$}}
-
-// RUN: %clang --target=riscv32-unknown-linux-gnu \
 // RUN:   -march=rv32ixventanacondops -E -dM %s \
 // RUN:   -o - | FileCheck --check-prefix=CHECK-XVENTANACONDOPS-EXT %s
 // RUN: %clang --target=riscv64-unknown-linux-gnu \
diff --git a/clang/test/Preprocessor/stdint.c b/clang/test/Preprocessor/stdint.c
index 899ff59..9f982a3 100644
--- a/clang/test/Preprocessor/stdint.c
+++ b/clang/test/Preprocessor/stdint.c
@@ -350,8 +350,8 @@
 // MIPS:typedef int8_t int_fast8_t;
 // MIPS:typedef uint8_t uint_fast8_t;
 //
-// MIPS:typedef long int intptr_t;
-// MIPS:typedef long unsigned int uintptr_t;
+// MIPS:typedef int intptr_t;
+// MIPS:typedef unsigned int uintptr_t;
 //
 // MIPS:typedef long long int intmax_t;
 // MIPS:typedef long long unsigned int uintmax_t;
@@ -396,9 +396,9 @@
 // MIPS:INT_FAST64_MAX_ 9223372036854775807LL
 // MIPS:UINT_FAST64_MAX_ 18446744073709551615ULL
 //
-// MIPS:INTPTR_MIN_ (-2147483647L -1)
-// MIPS:INTPTR_MAX_ 2147483647L
-// MIPS:UINTPTR_MAX_ 4294967295UL
+// MIPS:INTPTR_MIN_ (-2147483647 -1)
+// MIPS:INTPTR_MAX_ 2147483647
+// MIPS:UINTPTR_MAX_ 4294967295U
 // MIPS:PTRDIFF_MIN_ (-2147483647 -1)
 // MIPS:PTRDIFF_MAX_ 2147483647
 // MIPS:SIZE_MAX_ 4294967295U
diff --git a/clang/test/Preprocessor/wasm-target-features.c b/clang/test/Preprocessor/wasm-target-features.c
index 71b7cf6..3edaf9e 100644
--- a/clang/test/Preprocessor/wasm-target-features.c
+++ b/clang/test/Preprocessor/wasm-target-features.c
@@ -53,6 +53,15 @@
 // FP16: #define __wasm_fp16__ 1{{$}}
 
 // RUN: %clang -E -dM %s -o - 2>&1 \
+// RUN:     -target wasm32-unknown-unknown -mgc \
+// RUN:   | FileCheck %s -check-prefix=GC
+// RUN: %clang -E -dM %s -o - 2>&1 \
+// RUN:     -target wasm64-unknown-unknown -mgc \
+// RUN:   | FileCheck %s -check-prefix=GC
+//
+// GC: #define __wasm_gc__ 1{{$}}
+
+// RUN: %clang -E -dM %s -o - 2>&1 \
 // RUN:     -target wasm32-unknown-unknown -mmultimemory \
 // RUN:   | FileCheck %s -check-prefix=MULTIMEMORY
 // RUN: %clang -E -dM %s -o - 2>&1 \
@@ -145,6 +154,7 @@
 // MVP-NOT: #define __wasm_exception_handling__ 1{{$}}
 // MVP-NOT: #define __wasm_extended_const__ 1{{$}}
 // MVP-NOT: #define __wasm_fp16__ 1{{$}}
+// MVP-NOT: #define __wasm_gc__ 1{{$}}
 // MVP-NOT: #define __wasm_multimemory__ 1{{$}}
 // MVP-NOT: #define __wasm_multivalue__ 1{{$}}
 // MVP-NOT: #define __wasm_mutable_globals__ 1{{$}}
@@ -181,6 +191,7 @@
 // GENERIC-NOT: #define __wasm_exception_handling__ 1{{$}}
 // GENERIC-NOT: #define __wasm_extended_const__ 1{{$}}
 // GENERIC-NOT: #define __wasm__fp16__ 1{{$}}
+// GENERIC-NOT: #define __wasm_gc__ 1{{$}}
 // GENERIC-NOT: #define __wasm_multimemory__ 1{{$}}
 // GENERIC-NOT: #define __wasm_relaxed_simd__ 1{{$}}
 // GENERIC-NOT: #define __wasm_simd128__ 1{{$}}
@@ -199,6 +210,7 @@
 // BLEEDING-EDGE-INCLUDE-DAG: #define __wasm_exception_handling__ 1{{$}}
 // BLEEDING-EDGE-INCLUDE-DAG: #define __wasm_extended_const__ 1{{$}}
 // BLEEDING-EDGE-INCLUDE-DAG: #define __wasm_fp16__ 1{{$}}
+// BLEEDING-EDGE-INCLUDE-DAG: #define __wasm_gc__ 1{{$}}
 // BLEEDING-EDGE-INCLUDE-DAG: #define __wasm_multimemory__ 1{{$}}
 // BLEEDING-EDGE-INCLUDE-DAG: #define __wasm_multivalue__ 1{{$}}
 // BLEEDING-EDGE-INCLUDE-DAG: #define __wasm_mutable_globals__ 1{{$}}
diff --git a/clang/test/Sema/aarch64-sme-attrs-without-sve.cpp b/clang/test/Sema/aarch64-sme-attrs-without-sve.cpp
new file mode 100644
index 0000000..cc6a841
--- /dev/null
+++ b/clang/test/Sema/aarch64-sme-attrs-without-sve.cpp
@@ -0,0 +1,56 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -fsyntax-only -verify %s
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sme.h>
+
+void test_streaming(svint32_t *out, svint32_t *in) __arm_streaming {
+  *out = *in;
+}
+
+void test_non_streaming(svint32_t *out, svint32_t *in) {
+  *out = *in; // expected-error {{SVE vector type 'svint32_t' (aka '__SVInt32_t') cannot be used in a non-streaming function}} \
+                 expected-error {{SVE vector type 'svint32_t' (aka '__SVInt32_t') cannot be used in a non-streaming function}}
+}
+
+// This previously led to a diagnostic that '&a' could not be used in a non-streaming function,
+// even though all functions are streaming.
+void test_both_streaming(int32_t *out) __arm_streaming {
+  svint32_t a;
+  [&a, &out]() __arm_streaming {
+    a = svdup_s32(1);
+    svst1(svptrue_b32(), out, a);
+  }();
+}
+
+void test_lambda_streaming(int32_t *out) {
+  svint32_t a; // expected-error {{SVE vector type 'svint32_t' (aka '__SVInt32_t') cannot be used in a non-streaming function}}
+  [&a, &out]() __arm_streaming {
+    a = 1;
+    svst1(svptrue_b32(), out, a);
+  }();
+}
+
+void test_lambda_non_streaming_capture_do_nothing() __arm_streaming {
+  svint32_t a;
+  [&a] {
+    // Do nothing.
+  }();
+}
+
+// Error: Non-streaming function attempts to dereference capture:
+void test_lambda_non_streaming_capture_return_vector() __arm_streaming {
+  svint32_t a;
+  [&a] {
+    return a; // expected-error {{SVE vector type 'svint32_t' (aka '__SVInt32_t') cannot be used in a non-streaming function}}
+  }();
+}
+
+// By reference capture, only records and uses the address of `a`:
+// FIXME: This should be okay.
+void test_lambda_non_streaming_capture_return_address() __arm_streaming {
+  svint32_t a;
+  [&a] {
+    return &a; // expected-error {{SVE vector type 'svint32_t' (aka '__SVInt32_t') cannot be used in a non-streaming function}}
+  }();
+}
diff --git a/clang/test/Sema/attr-nonstring.c b/clang/test/Sema/attr-nonstring.c
index 3838aa3..fe7b6d2 100644
--- a/clang/test/Sema/attr-nonstring.c
+++ b/clang/test/Sema/attr-nonstring.c
@@ -229,3 +229,11 @@ struct Outer o2[] = {
     }
   }
 };
+
+// The attribute also works with a pointer type, not just an array type.
+__attribute__((nonstring)) char *ptr1;
+__attribute__((nonstring)) const unsigned char *ptr2;
+struct GH150951 {
+  __attribute__((nonstring)) char *ptr1;
+  __attribute__((nonstring)) const unsigned char *ptr2;
+};
diff --git a/clang/test/Sema/builtins-arm-exclusive-124.c b/clang/test/Sema/builtins-arm-exclusive-124.c
new file mode 100644
index 0000000..013ae3f
--- /dev/null
+++ b/clang/test/Sema/builtins-arm-exclusive-124.c
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -triple armv7m -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple armv8m.main -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple armv8.1m.main -fsyntax-only -verify %s
+
+// All these architecture versions provide 1-, 2- or 4-byte exclusive accesses,
+// but don't have the LDREXD instruction which takes two operand registers and
+// performs an 8-byte exclusive access. So the calls with a pointer to long
+// long are rejected.
+
+int test_ldrex(char *addr) {
+  int sum = 0;
+  sum += __builtin_arm_ldrex(addr);
+  sum += __builtin_arm_ldrex((short *)addr);
+  sum += __builtin_arm_ldrex((int *)addr);
+  sum += __builtin_arm_ldrex((long long *)addr); // expected-error {{address argument to load or store exclusive builtin must be a pointer to 1,2 or 4 byte type}}
+  return sum;
+}
+
+int test_strex(char *addr) {
+  int res = 0;
+  res |= __builtin_arm_strex(4, addr);
+  res |= __builtin_arm_strex(42, (short *)addr);
+  res |= __builtin_arm_strex(42, (int *)addr);
+  res |= __builtin_arm_strex(42, (long long *)addr); // expected-error {{address argument to load or store exclusive builtin must be a pointer to 1,2 or 4 byte type}}
+  return res;
+}
diff --git a/clang/test/Sema/builtins-arm-exclusive-4.c b/clang/test/Sema/builtins-arm-exclusive-4.c
new file mode 100644
index 0000000..68f01f5
--- /dev/null
+++ b/clang/test/Sema/builtins-arm-exclusive-4.c
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -triple armv6 -fsyntax-only -verify %s
+
+// Armv6 (apart from Armv6-M) provides 4-byte exclusive accesses, but not any
+// other size. So only the calls with a pointer to a 32-bit type are accepted.
+
+int test_ldrex(char *addr) {
+  int sum = 0;
+  sum += __builtin_arm_ldrex(addr); // expected-error {{address argument to load or store exclusive builtin must be a pointer to 4 byte type}}
+  sum += __builtin_arm_ldrex((short *)addr); // expected-error {{address argument to load or store exclusive builtin must be a pointer to 4 byte type}}
+  sum += __builtin_arm_ldrex((int *)addr);
+  sum += __builtin_arm_ldrex((long long *)addr); // expected-error {{address argument to load or store exclusive builtin must be a pointer to 4 byte type}}
+  return sum;
+}
+
+int test_strex(char *addr) {
+  int res = 0;
+  res |= __builtin_arm_strex(4, addr); // expected-error {{address argument to load or store exclusive builtin must be a pointer to 4 byte type}}
+  res |= __builtin_arm_strex(42, (short *)addr); // expected-error {{address argument to load or store exclusive builtin must be a pointer to 4 byte type}}
+  res |= __builtin_arm_strex(42, (int *)addr);
+  res |= __builtin_arm_strex(42, (long long *)addr); // expected-error {{address argument to load or store exclusive builtin must be a pointer to 4 byte type}}
+  return res;
+}
diff --git a/clang/test/Sema/builtins-arm-exclusive-none.c b/clang/test/Sema/builtins-arm-exclusive-none.c
new file mode 100644
index 0000000..76d327f
--- /dev/null
+++ b/clang/test/Sema/builtins-arm-exclusive-none.c
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -triple armv6m -fsyntax-only -verify %s
+
+// Armv6-M does not support exclusive loads/stores at all, so all uses of
+// __builtin_arm_ldrex and __builtin_arm_strex is forbidden.
+
+int test_ldrex(char *addr) {
+  int sum = 0;
+  sum += __builtin_arm_ldrex(addr); // expected-error {{load and store exclusive builtins are not available on this architecture}}
+  sum += __builtin_arm_ldrex((short *)addr); // expected-error {{load and store exclusive builtins are not available on this architecture}}
+  sum += __builtin_arm_ldrex((int *)addr); // expected-error {{load and store exclusive builtins are not available on this architecture}}
+  sum += __builtin_arm_ldrex((long long *)addr); // expected-error {{load and store exclusive builtins are not available on this architecture}}
+  return sum;
+}
+
+int test_strex(char *addr) {
+  int res = 0;
+  res |= __builtin_arm_strex(4, addr); // expected-error {{load and store exclusive builtins are not available on this architecture}}
+  res |= __builtin_arm_strex(42, (short *)addr); // expected-error {{load and store exclusive builtins are not available on this architecture}}
+  res |= __builtin_arm_strex(42, (int *)addr); // expected-error {{load and store exclusive builtins are not available on this architecture}}
+  res |= __builtin_arm_strex(42, (long long *)addr); // expected-error {{load and store exclusive builtins are not available on this architecture}}
+  return res;
+}
diff --git a/clang/test/Sema/builtins-arm-exclusive.c b/clang/test/Sema/builtins-arm-exclusive.c
index 68457d2..49aea15 100644
--- a/clang/test/Sema/builtins-arm-exclusive.c
+++ b/clang/test/Sema/builtins-arm-exclusive.c
@@ -1,5 +1,13 @@
 // RUN: %clang_cc1 -triple armv7 -fsyntax-only -verify %s
 
+// General tests of __builtin_arm_ldrex and __builtin_arm_strex error checking.
+//
+// This test is compiled for Armv7-A, which provides exclusive load/store
+// instructions for 1-, 2-, 4- and 8-byte quantities. Other Arm architecture
+// versions provide subsets of those, requiring different error reporting.
+// Those are tested in builtins-arm-exclusive-124.c, builtins-arm-exclusive-4.c
+// and builtins-arm-exclusive-none.c.
+
 struct Simple {
   char a, b;
 };
diff --git a/clang/test/Sema/builtins-elementwise-math.c b/clang/test/Sema/builtins-elementwise-math.c
index 01057b3..8548d3b 100644
--- a/clang/test/Sema/builtins-elementwise-math.c
+++ b/clang/test/Sema/builtins-elementwise-math.c
@@ -386,6 +386,96 @@ void test_builtin_elementwise_minimum(int i, short s, float f, double d, float4
   // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was '_Complex float')}}
 }
 
+void test_builtin_elementwise_maximumnum(int i, short s, float f, double d, float4 fv, double4 dv, int3 iv, unsigned3 uv, int *p) {
+  i = __builtin_elementwise_maximumnum(p, d);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'int *')}}
+
+  struct Foo foo = __builtin_elementwise_maximumnum(d, d);
+  // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'double'}}
+
+  i = __builtin_elementwise_maximumnum(i);
+  // expected-error@-1 {{too few arguments to function call, expected 2, have 1}}
+
+  i = __builtin_elementwise_maximumnum();
+  // expected-error@-1 {{too few arguments to function call, expected 2, have 0}}
+
+  i = __builtin_elementwise_maximumnum(i, i, i);
+  // expected-error@-1 {{too many arguments to function call, expected 2, have 3}}
+
+  i = __builtin_elementwise_maximumnum(fv, iv);
+  // expected-error@-1 {{arguments are of different types ('float4' (vector of 4 'float' values) vs 'int3' (vector of 3 'int' values))}}
+
+  i = __builtin_elementwise_maximumnum(uv, iv);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'unsigned3' (vector of 3 'unsigned int' values))}}
+
+  dv = __builtin_elementwise_maximumnum(fv, dv);
+  // expected-error@-1 {{arguments are of different types ('float4' (vector of 4 'float' values) vs 'double4' (vector of 4 'double' values))}}
+
+  d = __builtin_elementwise_maximumnum(f, d);
+  // expected-error@-1 {{arguments are of different types ('float' vs 'double')}}
+
+  fv = __builtin_elementwise_maximumnum(fv, fv);
+
+  i = __builtin_elementwise_maximumnum(iv, iv);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'int3' (vector of 3 'int' values))}}
+
+  i = __builtin_elementwise_maximumnum(i, i);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'int')}}
+
+  int A[10];
+  A = __builtin_elementwise_maximumnum(A, A);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'int *')}}
+
+  _Complex float c1, c2;
+  c1 = __builtin_elementwise_maximumnum(c1, c2);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was '_Complex float')}}
+}
+
+void test_builtin_elementwise_minimumnum(int i, short s, float f, double d, float4 fv, double4 dv, int3 iv, unsigned3 uv, int *p) {
+  i = __builtin_elementwise_minimumnum(p, d);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'int *')}}
+
+  struct Foo foo = __builtin_elementwise_minimumnum(d, d);
+  // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'double'}}
+
+  i = __builtin_elementwise_minimumnum(i);
+  // expected-error@-1 {{too few arguments to function call, expected 2, have 1}}
+
+  i = __builtin_elementwise_minimumnum();
+  // expected-error@-1 {{too few arguments to function call, expected 2, have 0}}
+
+  i = __builtin_elementwise_minimumnum(i, i, i);
+  // expected-error@-1 {{too many arguments to function call, expected 2, have 3}}
+
+  i = __builtin_elementwise_minimumnum(fv, iv);
+  // expected-error@-1 {{arguments are of different types ('float4' (vector of 4 'float' values) vs 'int3' (vector of 3 'int' values))}}
+
+  i = __builtin_elementwise_minimumnum(uv, iv);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'unsigned3' (vector of 3 'unsigned int' values))}}
+
+  dv = __builtin_elementwise_minimumnum(fv, dv);
+  // expected-error@-1 {{arguments are of different types ('float4' (vector of 4 'float' values) vs 'double4' (vector of 4 'double' values))}}
+
+  d = __builtin_elementwise_minimumnum(f, d);
+  // expected-error@-1 {{arguments are of different types ('float' vs 'double')}}
+
+  fv = __builtin_elementwise_minimumnum(fv, fv);
+
+  i = __builtin_elementwise_minimumnum(iv, iv);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'int3' (vector of 3 'int' values))}}
+
+  i = __builtin_elementwise_minimumnum(i, i);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'int')}}
+
+  int A[10];
+  A = __builtin_elementwise_minimumnum(A, A);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was 'int *')}}
+
+  _Complex float c1, c2;
+  c1 = __builtin_elementwise_minimumnum(c1, c2);
+  // expected-error@-1 {{1st argument must be a scalar or vector of floating-point types (was '_Complex float')}}
+}
+
 void test_builtin_elementwise_bitreverse(int i, float f, double d, float4 v, int3 iv, unsigned u, unsigned4 uv) {
 
   struct Foo s = __builtin_elementwise_bitreverse(i);
diff --git a/clang/test/Sema/builtins-wasm.c b/clang/test/Sema/builtins-wasm.c
index 31e5291..a3486b1 100644
--- a/clang/test/Sema/builtins-wasm.c
+++ b/clang/test/Sema/builtins-wasm.c
@@ -54,3 +54,27 @@ void test_table_copy(int dst_idx, int src_idx, int nelem) {
   __builtin_wasm_table_copy(table, table, dst_idx, src_idx, table);    // expected-error {{5th argument must be an integer}}
   __builtin_wasm_table_copy(table, table, dst_idx, src_idx, nelem);
 }
+
+typedef void (*F1)(void);
+typedef int (*F2)(int);
+typedef int (*F3)(__externref_t);
+typedef __externref_t (*F4)(int);
+
+void test_function_pointer_signature() {
+  // Test argument count validation
+  (void)__builtin_wasm_test_function_pointer_signature(); // expected-error {{too few arguments to function call, expected 1, have 0}}
+  (void)__builtin_wasm_test_function_pointer_signature((F1)0, (F2)0); // expected-error {{too many arguments to function call, expected 1, have 2}}
+
+  // // Test argument type validation - should require function pointer
+  (void)__builtin_wasm_test_function_pointer_signature((void*)0); // expected-error {{used type 'void *' where function pointer is required}}
+  (void)__builtin_wasm_test_function_pointer_signature((int)0);   // expected-error {{used type 'int' where function pointer is required}}
+  (void)__builtin_wasm_test_function_pointer_signature((F3)0);   // expected-error {{not supported for function pointers with a reference type parameter}}
+  (void)__builtin_wasm_test_function_pointer_signature((F4)0);   // expected-error {{not supported for function pointers with a reference type return value}}
+
+  // // Test valid usage
+  int res = __builtin_wasm_test_function_pointer_signature((F1)0);
+  res = __builtin_wasm_test_function_pointer_signature((F2)0);
+
+  // Test return type
+  _Static_assert(EXPR_HAS_TYPE(__builtin_wasm_test_function_pointer_signature((F1)0), int), "");
+}
diff --git a/clang/test/Sema/constexpr-void-cast.c b/clang/test/Sema/constexpr-void-cast.c
index 2ffc59f..cac671e 100644
--- a/clang/test/Sema/constexpr-void-cast.c
+++ b/clang/test/Sema/constexpr-void-cast.c
@@ -4,15 +4,16 @@
 // RUN: %clang_cc1 -x c -fsyntax-only %s -pedantic -verify=c-pedantic -std=c11 -fexperimental-new-constant-interpreter
 //
 // RUN: %clang_cc1 -x c++ -fsyntax-only %s -verify=cxx
-// RUN: %clang_cc1 -x c++ -fsyntax-only %s -pedantic -verify=cxx-pedantic
+// RUN: %clang_cc1 -x c++ -fsyntax-only %s -pedantic -verify=cxx,cxx-pedantic
 // RUN: %clang_cc1 -x c++ -fsyntax-only %s -verify=cxx -fexperimental-new-constant-interpreter
-// RUN: %clang_cc1 -x c++ -fsyntax-only %s -pedantic -verify=cxx-pedantic -fexperimental-new-constant-interpreter
+// RUN: %clang_cc1 -x c++ -fsyntax-only %s -pedantic -verify=cxx,cxx-pedantic -fexperimental-new-constant-interpreter
 
 // c-no-diagnostics
-// cxx-no-diagnostics
 
 void f(void);
 struct S {char c;} s;
 _Static_assert(&s != (void *)&f, ""); // c-pedantic-warning {{not an integer constant expression}} \
                                       // c-pedantic-note {{this conversion is not allowed in a constant expression}} \
+                                      // cxx-error {{static assertion expression is not an integral constant expression}} \
+                                      // cxx-note {{cast that performs the conversions of a reinterpret_cast is not allowed in a constant expression}} \
                                       // cxx-pedantic-warning {{'_Static_assert' is a C11 extension}}
diff --git a/clang/test/Sema/implicit-special-member-deprecated.cpp b/clang/test/Sema/implicit-special-member-deprecated.cpp
new file mode 100644
index 0000000..8e23404
--- /dev/null
+++ b/clang/test/Sema/implicit-special-member-deprecated.cpp
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -std=c++20 -Wdeprecated-declarations -verify %s
+
+struct A {
+  [[deprecated("use something else")]] int x = 42; // expected-note {{marked deprecated here}}
+};
+
+A makeDefaultA() { return {}; }    // ctor is implicit → no warn
+A copyA(const A &a) { return a; }  // copy-ctor implicit → no warn
+
+void assignA() {
+  A a, b;
+  a = b;                           // copy-assign implicit → no warn
+}
+
+void useA() {
+  A a;
+  (void)a.x;                       // expected-warning {{is deprecated}}
+}
+
+// Explicitly-defaulted ctor – now silent
+struct B {
+  [[deprecated]] int y;
+  B() = default;                   // no warning under new policy
+};
diff --git a/clang/test/Sema/unsupported-arm-streaming.cpp b/clang/test/Sema/unsupported-arm-streaming.cpp
new file mode 100644
index 0000000..8693dd6
--- /dev/null
+++ b/clang/test/Sema/unsupported-arm-streaming.cpp
@@ -0,0 +1,3 @@
+// RUN: %clang_cc1 -x c -triple x86_64-pc-linux-gnu -fsyntax-only -verify %s
+
+void arm_streaming(void) __arm_streaming {} // expected-error {{'__arm_streaming' is not supported on this target}}
diff --git a/clang/test/Sema/warn-fortify-source.c b/clang/test/Sema/warn-fortify-source.c
index f48ea09..216878c 100644
--- a/clang/test/Sema/warn-fortify-source.c
+++ b/clang/test/Sema/warn-fortify-source.c
@@ -3,6 +3,11 @@
 // RUN: %clang_cc1 -xc++ -triple x86_64-apple-macosx10.14.0 %s -verify
 // RUN: %clang_cc1 -xc++ -triple x86_64-apple-macosx10.14.0 %s -verify -DUSE_BUILTINS
 
+// RUN: %clang_cc1 -triple x86_64-apple-macosx10.14.0 %s -verify -fexperimental-new-constant-interpreter
+// RUN: %clang_cc1 -triple x86_64-apple-macosx10.14.0 %s -verify -DUSE_BUILTINS -fexperimental-new-constant-interpreter
+// RUN: %clang_cc1 -xc++ -triple x86_64-apple-macosx10.14.0 %s -verify -fexperimental-new-constant-interpreter
+// RUN: %clang_cc1 -xc++ -triple x86_64-apple-macosx10.14.0 %s -verify -DUSE_BUILTINS -fexperimental-new-constant-interpreter
+
 typedef unsigned long size_t;
 
 #ifdef __cplusplus
diff --git a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp
index 0e98904ade..2b934ac 100644
--- a/clang/test/Sema/warn-lifetime-safety-dataflow.cpp
+++ b/clang/test/Sema/warn-lifetime-safety-dataflow.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -mllvm -debug-only=LifetimeFacts,LifetimeLoanPropagation -Wexperimental-lifetime-safety %s 2>&1 | FileCheck %s
+// RUN: %clang_cc1 -fexperimental-lifetime-safety -mllvm -debug-only=LifetimeFacts -Wexperimental-lifetime-safety %s 2>&1 | FileCheck %s
 // REQUIRES: asserts
 
 struct MyObj {
@@ -19,10 +19,6 @@ MyObj* return_local_addr() {
 // CHECK:   ReturnOfOrigin (OriginID: [[O_RET_VAL]])
 // CHECK:   Expire (LoanID: [[L_X]])
 }
-// CHECK: LoanPropagation results:
-// CHECK-DAG: Origin [[O_ADDR_X]] contains Loan [[L_X]]
-// CHECK-DAG: Origin [[O_P]] contains Loan [[L_X]]
-// CHECK-DAG: Origin [[O_RET_VAL]] contains Loan [[L_X]]
 
 
 // Pointer Assignment and Return
@@ -47,15 +43,6 @@ MyObj* assign_and_return_local_addr() {
 // CHECK: ReturnOfOrigin (OriginID: [[O_PTR2_RVAL_2]])
 // CHECK: Expire (LoanID: [[L_Y]])
 }
-// CHECK: LoanPropagation results:
-// CHECK-DAG: Origin [[O_ADDR_Y]] contains Loan [[L_Y]]
-// CHECK-DAG: Origin [[O_PTR1]] contains Loan [[L_Y]]
-// CHECK-DAG: Origin [[O_PTR2]] contains Loan [[L_Y]]
-// CHECK-DAG: Origin [[O_PTR1_RVAL]] contains Loan [[L_Y]]
-// CHECK-DAG: Origin [[O_PTR1_RVAL_2]] contains Loan [[L_Y]]
-// CHECK-DAG: Origin [[O_PTR2_RVAL]] contains Loan [[L_Y]]
-// CHECK-DAG: Origin [[O_PTR2_RVAL_2]] contains Loan [[L_Y]]
-
 
 // Return of Non-Pointer Type
 // CHECK-LABEL: Function: return_int_val
@@ -65,8 +52,6 @@ int return_int_val() {
   return x;
 }
 // CHECK-NEXT: End of Block
-// CHECK: LoanPropagation results:
-// CHECK:  <empty>
 
 
 // Loan Expiration (Automatic Variable, C++)
@@ -79,9 +64,6 @@ void loan_expires_cpp() {
 // CHECK: AssignOrigin (DestID: [[O_POBJ:[0-9]+]], SrcID: [[O_ADDR_OBJ]])
 // CHECK: Expire (LoanID: [[L_OBJ]])
 }
-// CHECK: LoanPropagation results:
-// CHECK-DAG: Origin [[O_ADDR_OBJ]] contains Loan [[L_OBJ]]
-// CHECK-DAG: Origin [[O_POBJ]] contains Loan [[L_OBJ]]
 
 
 // FIXME: No expire for Trivial Destructors
@@ -96,10 +78,6 @@ void loan_expires_trivial() {
 // CHECK-NEXT: End of Block
   // FIXME: Add check for Expire once trivial destructors are handled for expiration.
 }
-// CHECK: LoanPropagation results:
-// CHECK-DAG: Origin [[O_ADDR_TRIVIAL_OBJ]] contains Loan [[L_TRIVIAL_OBJ]]
-// CHECK-DAG: Origin [[O_PTOBJ]] contains Loan [[L_TRIVIAL_OBJ]]
-
 
 // CHECK-LABEL: Function: conditional
 void conditional(bool condition) {
@@ -119,13 +97,6 @@ void conditional(bool condition) {
   // CHECK: AssignOrigin (DestID: [[O_P_RVAL:[0-9]+]], SrcID: [[O_P]])
   // CHECK: AssignOrigin (DestID: [[O_Q:[0-9]+]], SrcID: [[O_P_RVAL]])
 }
-// CHECK: LoanPropagation results:
-// CHECK-DAG: Origin [[O_ADDR_A]] contains Loan [[L_A]]
-// CHECK-DAG: Origin [[O_ADDR_B]] contains Loan [[L_B]]
-// CHECK-DAG: Origin [[O_P]] contains Loan [[L_A]]
-// CHECK-DAG: Origin [[O_P]] contains Loan [[L_B]]
-// CHECK-DAG: Origin [[O_Q]] contains Loan [[L_A]]
-// CHECK-DAG: Origin [[O_Q]] contains Loan [[L_B]]
 
 
 // CHECK-LABEL: Function: pointers_in_a_cycle
@@ -161,25 +132,6 @@ void pointers_in_a_cycle(bool condition) {
 // CHECK:   AssignOrigin (DestID: [[O_P3]], SrcID: [[O_TEMP_RVAL]])
   }
 }
-// At the end of the analysis, the origins for the pointers involved in the cycle
-// (p1, p2, p3, temp) should all contain the loans from v1, v2, and v3 at the fixed point.
-// CHECK: LoanPropagation results:
-// CHECK-DAG: Origin [[O_P1]] contains Loan [[L_V1]]
-// CHECK-DAG: Origin [[O_P1]] contains Loan [[L_V2]]
-// CHECK-DAG: Origin [[O_P1]] contains Loan [[L_V3]]
-// CHECK-DAG: Origin [[O_P2]] contains Loan [[L_V1]]
-// CHECK-DAG: Origin [[O_P2]] contains Loan [[L_V2]]
-// CHECK-DAG: Origin [[O_P2]] contains Loan [[L_V3]]
-// CHECK-DAG: Origin [[O_P3]] contains Loan [[L_V1]]
-// CHECK-DAG: Origin [[O_P3]] contains Loan [[L_V2]]
-// CHECK-DAG: Origin [[O_P3]] contains Loan [[L_V3]]
-// CHECK-DAG: Origin [[O_TEMP]] contains Loan [[L_V1]]
-// CHECK-DAG: Origin [[O_TEMP]] contains Loan [[L_V2]]
-// CHECK-DAG: Origin [[O_TEMP]] contains Loan [[L_V3]]
-// CHECK-DAG: Origin [[O_ADDR_V1]] contains Loan [[L_V1]]
-// CHECK-DAG: Origin [[O_ADDR_V2]] contains Loan [[L_V2]]
-// CHECK-DAG: Origin [[O_ADDR_V3]] contains Loan [[L_V3]]
-
 
 // CHECK-LABEL: Function: overwrite_origin
 void overwrite_origin() {
@@ -195,10 +147,6 @@ void overwrite_origin() {
 // CHECK:   Expire (LoanID: [[L_S2]])
 // CHECK:   Expire (LoanID: [[L_S1]])
 }
-// CHECK: LoanPropagation results:
-// CHECK:     Origin [[O_P]] contains Loan [[L_S2]]
-// CHECK-NOT: Origin [[O_P]] contains Loan [[L_S1]]
-
 
 // CHECK-LABEL: Function: reassign_to_null
 void reassign_to_null() {
@@ -213,8 +161,6 @@ void reassign_to_null() {
 }
 // FIXME: Have a better representation for nullptr than just an empty origin. 
 //        It should be a separate loan and origin kind.
-// CHECK: LoanPropagation results:
-// CHECK: Origin [[O_P]] contains no loans
 
 
 // CHECK-LABEL: Function: reassign_in_if
@@ -235,11 +181,6 @@ void reassign_in_if(bool condition) {
 // CHECK:   Expire (LoanID: [[L_S2]])
 // CHECK:   Expire (LoanID: [[L_S1]])
 }
-// CHECK: LoanPropagation results:
-// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S1]]
-// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S2]]
-// CHECK-DAG: Origin [[O_ADDR_S1]] contains Loan [[L_S1]]
-// CHECK-DAG: Origin [[O_ADDR_S2]] contains Loan [[L_S2]]
 
 
 // CHECK-LABEL: Function: assign_in_switch
@@ -276,14 +217,6 @@ void assign_in_switch(int mode) {
 // CHECK-DAG:   Expire (LoanID: [[L_S2]])
 // CHECK-DAG:   Expire (LoanID: [[L_S1]])
 }
-// CHECK: LoanPropagation results:
-// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S1]]
-// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S2]]
-// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S3]]
-// CHECK-DAG: Origin [[O_ADDR_S1]] contains Loan [[L_S1]]
-// CHECK-DAG: Origin [[O_ADDR_S2]] contains Loan [[L_S2]]
-// CHECK-DAG: Origin [[O_ADDR_S3]] contains Loan [[L_S3]]
-
 
 // CHECK-LABEL: Function: loan_in_loop
 void loan_in_loop(bool condition) {
@@ -299,10 +232,6 @@ void loan_in_loop(bool condition) {
 // CHECK:   Expire (LoanID: [[L_INNER]])
   }
 }
-// CHECK: LoanPropagation results:
-// CHECK-DAG: Origin [[O_P]] contains Loan [[L_INNER]]
-// CHECK-DAG: Origin [[O_ADDR_INNER]] contains Loan [[L_INNER]]
-
 
 // CHECK-LABEL: Function: loop_with_break
 void loop_with_break(int count) {
@@ -326,13 +255,6 @@ void loop_with_break(int count) {
 // CHECK:   Expire (LoanID: [[L_S1]])
 }
 
-// CHECK-LABEL: LoanPropagation results:
-// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S1]]
-// CHECK-DAG: Origin [[O_P]] contains Loan [[L_S2]]
-// CHECK-DAG: Origin [[O_ADDR_S1]] contains Loan [[L_S1]]
-// CHECK-DAG: Origin [[O_ADDR_S2]] contains Loan [[L_S2]]
-
-
 // CHECK-LABEL: Function: nested_scopes
 void nested_scopes() {
   MyObj* p = nullptr;
@@ -355,13 +277,6 @@ void nested_scopes() {
 // CHECK:   Expire (LoanID: [[L_OUTER]])
 }
 
-// CHECK-LABEL: LoanPropagation results:
-// CHECK-DAG: Origin [[O_P]] contains Loan [[L_INNER]]
-// CHECK-DAG: Origin [[O_ADDR_INNER]] contains Loan [[L_INNER]]
-// CHECK-DAG: Origin [[O_ADDR_OUTER]] contains Loan [[L_OUTER]]
-// CHECK-NOT: Origin [[O_P]] contains Loan [[L_OUTER]]
-
-
 // CHECK-LABEL: Function: pointer_indirection
 void pointer_indirection() {
   int a;
diff --git a/clang/test/Sema/warn-unreachable_crash.cpp b/clang/test/Sema/warn-unreachable_crash.cpp
index 628abcc..1955c2c 100644
--- a/clang/test/Sema/warn-unreachable_crash.cpp
+++ b/clang/test/Sema/warn-unreachable_crash.cpp
@@ -1,16 +1,33 @@
-// RUN: %clang_cc1 -verify -Wunreachable-code %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -verify -Wunreachable-code %s
+// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -target-feature +fullfp16 -verify -Wunreachable-code %s
+// REQUIRES: aarch64-registered-target
 
-// Previously this test will crash
-static void test(__fp16& x) {
-  if (x != 0 || x != 1.0) { // expected-note{{}} no-crash
-      x = 0.9;
-    } else
-      x = 0.8; // expected-warning{{code will never be executed}}
+// =======  __fp16 version  =======
+static void test_fp16(__fp16 &x) {
+  if (x != 0 || x != 1.0) {           // expected-note {{}} no-crash
+    x = 0.9;
+  } else
+    x = 0.8;                          // expected-warning{{code will never be executed}}
 }
 
-static void test2(__fp16& x) {
-  if (x != 1 && x == 1.0) { // expected-note{{}} no-crash
-      x = 0.9; // expected-warning{{code will never be executed}}
-    } else
-      x = 0.8;
+static void test_fp16_b(__fp16 &x) {
+  if (x != 1 && x == 1.0) {           // expected-note {{}} no-crash
+    x = 0.9;                          // expected-warning{{code will never be executed}}
+  } else
+    x = 0.8;
+}
+
+// =======  _Float16 version  =======
+static void test_f16(_Float16 &x) {
+  if (x != 0 || x != 1.0) {           // expected-note {{}} no-crash
+    x = 0.9;
+  } else
+    x = 0.8;                          // expected-warning{{code will never be executed}}
+}
+
+static void test_f16_b(_Float16 &x) {
+  if (x != 1 && x == 1.0) {           // expected-note {{}} no-crash
+    x = 0.9;                          // expected-warning{{code will never be executed}}
+  } else
+    x = 0.8;
 }
diff --git a/clang/test/SemaCXX/attr-target-clones-riscv.cpp b/clang/test/SemaCXX/attr-target-clones-riscv.cpp
index 102bb4b..7648284 100644
--- a/clang/test/SemaCXX/attr-target-clones-riscv.cpp
+++ b/clang/test/SemaCXX/attr-target-clones-riscv.cpp
@@ -9,6 +9,9 @@ void __attribute__((target_clones("default", "mtune=sifive-u74"))) mtune() {}
 // expected-warning@+1 {{version list contains duplicate entries}}
 void __attribute__((target_clones("default", "arch=+c", "arch=+c"))) dupVersion() {}
 
+// expected-warning@+1 {{version list contains duplicate entries}}
+void __attribute__((target_clones(" default", "default "))) dupDefault() {}
+
 // expected-warning@+1 {{unsupported '' in the 'target_clones' attribute string; 'target_clones' attribute ignored}}
 void __attribute__((target_clones("default", ""))) emptyVersion() {}
 
diff --git a/clang/test/SemaCXX/constant-expression-p2280r4.cpp b/clang/test/SemaCXX/constant-expression-p2280r4.cpp
index 16f5f82..312a778 100644
--- a/clang/test/SemaCXX/constant-expression-p2280r4.cpp
+++ b/clang/test/SemaCXX/constant-expression-p2280r4.cpp
@@ -383,3 +383,17 @@ namespace enable_if_2 {
   }
 }
 }
+
+namespace GH150015 {
+  extern int (& c)[8]; // interpreter-note {{declared here}}
+  constexpr int x = c <= c+8; // interpreter-error {{constexpr variable 'x' must be initialized by a constant expression}} \
+                              // interpreter-note {{initializer of 'c' is unknown}}
+
+  struct X {};
+  struct Y {};
+  struct Z : X, Y {};
+  extern Z &z; // interpreter-note{{declared here}}
+  constexpr int bases = (void*)(X*)&z <= (Y*)&z; // expected-error {{constexpr variable 'bases' must be initialized by a constant expression}} \
+                                                 // nointerpreter-note {{comparison of addresses of subobjects of different base classes has unspecified value}} \
+                                                 // interpreter-note {{initializer of 'z' is unknown}}
+}
diff --git a/clang/test/SemaCXX/cxx23-assume.cpp b/clang/test/SemaCXX/cxx23-assume.cpp
index 726cb3b..99a82d9 100644
--- a/clang/test/SemaCXX/cxx23-assume.cpp
+++ b/clang/test/SemaCXX/cxx23-assume.cpp
@@ -8,6 +8,14 @@
 struct A{};
 struct B{ explicit operator bool() { return true; } };
 
+// This should be the first test case of this file.
+void IsActOnFinishFullExprCalled() {
+  // Do not add other test cases to this function.
+  // Make sure `ActOnFinishFullExpr` is called and creates `ExprWithCleanups`
+  // to avoid assertion failure.
+  [[assume(B{})]]; // expected-warning {{assumption is ignored because it contains (potential) side-effects}} // ext-warning {{C++23 extension}}
+}
+
 template <bool cond>
 void f() {
   [[assume(cond)]]; // ext-warning {{C++23 extension}}
diff --git a/clang/test/SemaCXX/cxx2a-ms-no-unique-address.cpp b/clang/test/SemaCXX/cxx2a-ms-no-unique-address.cpp
index 726cfa3..a064401 100644
--- a/clang/test/SemaCXX/cxx2a-ms-no-unique-address.cpp
+++ b/clang/test/SemaCXX/cxx2a-ms-no-unique-address.cpp
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 -std=c++2a %s -verify=unsupported -triple x86_64-linux-gnu
 // RUN: %clang_cc1 -std=c++2a %s -verify -triple x86_64-windows -fms-compatibility
+// RUN: %clang_cc1 -std=c++2a %s -verify -triple x86_64-uefi -fms-compatibility
 
 [[msvc::no_unique_address]] int a; // expected-error {{only applies to non-bit-field non-static data members}} unsupported-warning {{unknown}}
 [[msvc::no_unique_address]] void f(); // expected-error {{only applies to non-bit-field non-static data members}} unsupported-warning {{unknown}}
diff --git a/clang/test/SemaCXX/cxx2b-deducing-this.cpp b/clang/test/SemaCXX/cxx2b-deducing-this.cpp
index 2253cbb..fcbe0f6 100644
--- a/clang/test/SemaCXX/cxx2b-deducing-this.cpp
+++ b/clang/test/SemaCXX/cxx2b-deducing-this.cpp
@@ -1357,3 +1357,35 @@ void Bar(this int) { // expected-note {{candidate function}}
 }
 
 }
+
+namespace GH147046_regression {
+
+template <typename z> struct ai {
+    ai(z::ah);
+};
+
+template <typename z> struct ak {
+    template <typename am> void an(am, z);
+    template <typename am> static void an(am, ai<z>);
+};
+template <typename> struct ao {};
+
+template <typename ap>
+auto ar(ao<ap> at) -> decltype(ak<ap>::an(at, 0));
+// expected-note@-1 {{candidate template ignored: substitution failure [with ap = GH147046_regression::ay]: no matching function for call to 'an'}}
+
+class aw;
+struct ax {
+    typedef int ah;
+};
+struct ay {
+    typedef aw ah;
+};
+
+ao<ay> az ;
+ai<ax> bd(0);
+void f() {
+    ar(az); // expected-error {{no matching function for call to 'ar'}}
+}
+
+}
diff --git a/clang/test/SemaCXX/noreturn-weverything.c b/clang/test/SemaCXX/noreturn-weverything.c
new file mode 100644
index 0000000..92a587d
--- /dev/null
+++ b/clang/test/SemaCXX/noreturn-weverything.c
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 -fsyntax-only %s -Weverything
+
+void free(void *);
+typedef void (*set_free_func)(void *);
+struct Method {
+  int nparams;
+  int *param;
+};
+void selelem_free_method(struct Method* method, void* data) {
+    set_free_func free_func = 0;
+    for (int i = 0; i < method->nparams; ++i)
+        free(&method->param[i]);
+    if (data && free_func)
+        free_func(data);
+}
diff --git a/clang/test/SemaCXX/overload-resolution-deferred-templates.cpp b/clang/test/SemaCXX/overload-resolution-deferred-templates.cpp
index 135865c..8b469dd2 100644
--- a/clang/test/SemaCXX/overload-resolution-deferred-templates.cpp
+++ b/clang/test/SemaCXX/overload-resolution-deferred-templates.cpp
@@ -80,30 +80,21 @@ struct ImplicitlyCopyable {
 static_assert(__is_constructible(ImplicitlyCopyable, const ImplicitlyCopyable&));
 
 
-struct Movable { // #Movable
+struct Movable {
   template <typename T>
   requires __is_constructible(Movable, T) // #err-self-constraint-1
-  explicit Movable(T op) noexcept; // #Movable1
-  Movable(Movable&&) noexcept = default; // #Movable2
+  explicit Movable(T op) noexcept; // #1
+  Movable(Movable&&) noexcept = default; // #2
 };
 static_assert(__is_constructible(Movable, Movable&&));
 static_assert(__is_constructible(Movable, const Movable&));
-// expected-error@-1 {{static assertion failed due to requirement '__is_constructible(Movable, const Movable &)'}} \
-// expected-error@-1 {{call to implicitly-deleted copy constructor of 'Movable'}} \
-// expected-note@#Movable  {{'Movable' defined here}} \
-// expected-note@#Movable  {{candidate constructor (the implicit copy constructor) not viable: no known conversion from 'int' to 'const Movable' for 1st argument}} \
-// expected-note@#Movable2  {{copy constructor is implicitly deleted because 'Movable' has a user-declared move constructor}} \
-// expected-note@#Movable2  {{candidate constructor not viable: no known conversion from 'int' to 'Movable' for 1st argument}} \
-// expected-note@#Movable1  {{candidate template ignored: constraints not satisfied [with T = int]}}
-
+// expected-error@-1 {{static assertion failed due to requirement '__is_constructible(Movable, const Movable &)'}}
 
 static_assert(__is_constructible(Movable, int));
-// expected-error@-1 {{static assertion failed due to requirement '__is_constructible(Movable, int)'}} \
-// expected-error@-1 {{no matching constructor for initialization of 'Movable'}} \
+// expected-error@-1{{static assertion failed due to requirement '__is_constructible(Movable, int)'}} \
 // expected-note@-1 2{{}}
 // expected-error@#err-self-constraint-1{{satisfaction of constraint '__is_constructible(Movable, T)' depends on itself}}
 // expected-note@#err-self-constraint-1 4{{}}
-// expected-note@#Movable  {{'Movable' defined here}}
 
 template <typename T>
 struct Members {
diff --git a/clang/test/SemaCXX/type-traits-unsatisfied-diags-std.cpp b/clang/test/SemaCXX/type-traits-unsatisfied-diags-std.cpp
index f3ddbbf..cf33ac2 100644
--- a/clang/test/SemaCXX/type-traits-unsatisfied-diags-std.cpp
+++ b/clang/test/SemaCXX/type-traits-unsatisfied-diags-std.cpp
@@ -42,14 +42,6 @@ static constexpr bool value = __is_standard_layout(T);
 };
 template <typename T>
 constexpr bool is_standard_layout_v = __is_standard_layout(T);
-
-template <typename... Args>
-struct is_constructible {
-    static constexpr bool value = __is_constructible(Args...);
-};
-
-template <typename... Args>
-constexpr bool is_constructible_v = __is_constructible(Args...);
 #endif
 
 #ifdef STD2
@@ -105,17 +97,6 @@ template <typename T>
 using is_standard_layout = __details_is_standard_layout<T>;
 template <typename T>
 constexpr bool is_standard_layout_v = __is_standard_layout(T);
-
-template <typename... Args>
-struct __details_is_constructible{
-    static constexpr bool value = __is_constructible(Args...);
-};
-
-template <typename... Args>
-using is_constructible  = __details_is_constructible<Args...>;
-
-template <typename... Args>
-constexpr bool is_constructible_v = __is_constructible(Args...);
 #endif
 
 
@@ -168,15 +149,6 @@ template <typename T>
 using is_standard_layout = __details_is_standard_layout<T>;
 template <typename T>
 constexpr bool is_standard_layout_v = is_standard_layout<T>::value;
-
-template <typename... Args>
-struct __details_is_constructible : bool_constant<__is_constructible(Args...)> {};
-
-template <typename... Args>
-using is_constructible  = __details_is_constructible<Args...>;
-
-template <typename... Args>
-constexpr bool is_constructible_v = is_constructible<Args...>::value;
 #endif
 
 }
@@ -239,15 +211,6 @@ static_assert(std::is_assignable_v<int&, void>);
 // expected-error@-1 {{static assertion failed due to requirement 'std::is_assignable_v<int &, void>'}} \
 // expected-error@-1 {{assigning to 'int' from incompatible type 'void'}}
 
-static_assert(std::is_constructible<int, int>::value);
-
-static_assert(std::is_constructible<void>::value);
-// expected-error-re@-1 {{static assertion failed due to requirement 'std::{{.*}}is_constructible<void>::value'}} \
-// expected-note@-1 {{because it is a cv void type}}
-static_assert(std::is_constructible_v<void>);
-// expected-error@-1 {{static assertion failed due to requirement 'std::is_constructible_v<void>'}} \
-// expected-note@-1 {{because it is a cv void type}}
-
 namespace test_namespace {
     using namespace std;
     static_assert(is_trivially_relocatable<int&>::value);
@@ -293,13 +256,6 @@ namespace test_namespace {
     // expected-error@-1 {{static assertion failed due to requirement 'is_empty_v<int &>'}} \
     // expected-note@-1 {{'int &' is not empty}} \
     // expected-note@-1 {{because it is a reference type}}
-
-    static_assert(is_constructible<void>::value);
-    // expected-error-re@-1 {{static assertion failed due to requirement '{{.*}}is_constructible<void>::value'}} \
-    // expected-note@-1 {{because it is a cv void type}}
-    static_assert(is_constructible_v<void>);
-    // expected-error@-1 {{static assertion failed due to requirement 'is_constructible_v<void>'}} \
-    // expected-note@-1 {{because it is a cv void type}}
 }
 
 
@@ -328,15 +284,6 @@ concept C4 = std::is_assignable_v<T, U>; // #concept8
 
 template <C4<void> T> void g4();  // #cand8
 
-template <typename... Args>
-requires std::is_constructible<Args...>::value void f3();  // #cand5
-
-template <typename... Args>
-concept C3 = std::is_constructible_v<Args...>; // #concept6
-
-template <C3 T> void g3();  // #cand6
-
-
 void test() {
     f<int&>();
     // expected-error@-1 {{no matching function for call to 'f'}} \
@@ -380,19 +327,6 @@ void test() {
     // expected-note@#cand8 {{because 'C4<int &, void>' evaluated to false}} \
     // expected-note@#concept8 {{because 'std::is_assignable_v<int &, void>' evaluated to false}} \
     // expected-error@#concept8 {{assigning to 'int' from incompatible type 'void'}}
-
-    f3<void>();
-    // expected-error@-1 {{no matching function for call to 'f3'}} \
-    // expected-note@#cand5 {{candidate template ignored: constraints not satisfied [with Args = <void>]}} \
-    // expected-note-re@#cand5 {{because '{{.*}}is_constructible<void>::value' evaluated to false}} \
-    // expected-note@#cand5 {{because it is a cv void type}}
-
-    g3<void>();
-    // expected-error@-1 {{no matching function for call to 'g3'}} \
-    // expected-note@#cand6 {{candidate template ignored: constraints not satisfied [with T = void]}} \
-    // expected-note@#cand6 {{because 'void' does not satisfy 'C3'}} \
-    // expected-note@#concept6 {{because 'std::is_constructible_v<void>' evaluated to false}} \
-    // expected-note@#concept6 {{because it is a cv void type}}
 }
 }
 
diff --git a/clang/test/SemaCXX/type-traits-unsatisfied-diags.cpp b/clang/test/SemaCXX/type-traits-unsatisfied-diags.cpp
index 54806a9..cc923d2 100644
--- a/clang/test/SemaCXX/type-traits-unsatisfied-diags.cpp
+++ b/clang/test/SemaCXX/type-traits-unsatisfied-diags.cpp
@@ -489,68 +489,6 @@ static_assert(__is_trivially_copyable(S12));
 // expected-note@#tc-S12 {{'S12' defined here}}
 }
 
-namespace constructible {
-
-struct S1 {  // #c-S1
-    S1(int); // #cc-S1
-};
-static_assert(__is_constructible(S1, char*));
-// expected-error@-1 {{static assertion failed due to requirement '__is_constructible(constructible::S1, char *)'}} \
-// expected-error@-1 {{no matching constructor for initialization of 'S1'}} \
-// expected-note@#c-S1 {{candidate constructor (the implicit copy constructor) not viable: no known conversion from 'char *' to 'const S1' for 1st argument}} \
-// expected-note@#c-S1 {{candidate constructor (the implicit move constructor) not viable: no known conversion from 'char *' to 'S1' for 1st argument}} \
-// expected-note@#cc-S1 {{candidate constructor not viable: no known conversion from 'char *' to 'int' for 1st argument; dereference the argument with *}} \
-// expected-note@#c-S1 {{'S1' defined here}}
-
-struct S2 { // #c-S2
-    S2(int, float, double); // #cc-S2
-};
-static_assert(__is_constructible(S2, float));
-// expected-error@-1 {{static assertion failed due to requirement '__is_constructible(constructible::S2, float)'}} \
-// expected-note@#c-S2 {{candidate constructor (the implicit copy constructor) not viable: no known conversion from 'float' to 'const S2' for 1st argument}} \
-// expected-note@#c-S2 {{candidate constructor (the implicit move constructor) not viable: no known conversion from 'float' to 'S2' for 1st argument}} \
-// expected-error@-1 {{no matching constructor for initialization of 'S2'}} \
-// expected-note@#cc-S2 {{candidate constructor not viable: requires 3 arguments, but 1 was provided}} \
-// expected-note@#c-S2 {{'S2' defined here}}
-
-static_assert(__is_constructible(S2, float, void));
-// expected-error@-1 {{static assertion failed due to requirement '__is_constructible(constructible::S2, float, void)'}} \
-// expected-note@#c-S2 {{candidate constructor (the implicit move constructor) not viable: requires 1 argument, but 2 were provided}} \
-// expected-note@#c-S2 {{candidate constructor (the implicit copy constructor) not viable: requires 1 argument, but 2 were provided}} \
-// expected-note@-1{{because it is a cv void type}} \
-// expected-error@-1 {{no matching constructor for initialization of 'S2'}} \
-// expected-note@#cc-S2 {{candidate constructor not viable: requires 3 arguments, but 2 were provided}} \
-// expected-note@#c-S2 {{'S2' defined here}}
-
-static_assert(__is_constructible(int[]));
-// expected-error@-1 {{static assertion failed due to requirement '__is_constructible(int[])'}} \
-// expected-note@-1 {{because it is an incomplete array type}}
-
-static_assert(__is_constructible(void));
-// expected-error@-1 {{static assertion failed due to requirement '__is_constructible(void)'}} \
-// expected-note@-1 {{because it is a cv void type}}
-
-static_assert(__is_constructible(void, void));
-// expected-error@-1 {{static assertion failed due to requirement '__is_constructible(void, void)'}} \
-// expected-note@-1 {{because it is a cv void type}}
-
-static_assert(__is_constructible(const void));
-// expected-error@-1 {{static assertion failed due to requirement '__is_constructible(const void)'}} \
-// expected-note@-1 {{because it is a cv void type}}
-
-static_assert(__is_constructible(volatile void));
-// expected-error@-1 {{static assertion failed due to requirement '__is_constructible(volatile void)'}} \
-// expected-note@-1 {{because it is a cv void type}}
-
-static_assert(__is_constructible(int ()));
-// expected-error@-1 {{static assertion failed due to requirement '__is_constructible(int ())'}} \
-// expected-note@-1 {{because it is a function type}}
-
-static_assert(__is_constructible(void (int, float)));
-// expected-error@-1 {{static assertion failed due to requirement '__is_constructible(void (int, float))'}} \
-// expected-note@-1 {{because it is a function type}}
-}
-
 namespace assignable {
 struct S1;
 static_assert(__is_assignable(S1&, const S1&));
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
index d6c32ca..6ae329a 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
@@ -47,11 +47,24 @@ namespace std {
     T *c_str();
     T *data();
     unsigned size_bytes();
+    unsigned size();
   };
 
   typedef basic_string<char> string;
   typedef basic_string<wchar_t> wstring;
 
+  template<typename T>
+  struct basic_string_view {
+    T *c_str() const noexcept;
+    T *data()  const noexcept;
+    unsigned size();
+    const T* begin() const noexcept;
+    const T* end() const noexcept;
+  };
+
+  typedef basic_string_view<char> string_view;
+  typedef basic_string_view<wchar_t> wstring_view;
+
   // C function under std:
   void memcpy();
   void strcpy();
@@ -134,6 +147,29 @@ void safe_examples(std::string s1, int *p) {
   snprintf(nullptr, 0, "%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", s1.c_str());           // no warn
 }
 
+void test_sarg_precision(std::string Str, std::string_view Sv, std::wstring_view WSv,
+			 std::span<char> SpC, std::span<int> SpI) {
+  printf("%.*s");
+  printf("%.*s", (int)Str.size(), Str.data());
+  printf("%.*s", (int)Str.size_bytes(), Str.data());
+  printf("%.*s", (int)Sv.size(), Sv.data());
+  printf("%.*s", (int)SpC.size(), SpC.data());
+  printf("%.*s", SpC.size(), SpC.data());
+  printf("%.*ls", WSv.size(), WSv.data());
+  printf("%.*s", SpC.data());  // no warn because `SpC.data()` is passed to the precision while the actually string pointer is not given
+
+  printf("%.*s", SpI.size(), SpI.data()); // expected-warning {{function 'printf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}}
+  printf("%.*s", SpI.size(), SpC.data()); // expected-warning {{function 'printf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}}
+  printf("%.*s", WSv.size(), WSv.data()); // expected-warning {{function 'printf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}}
+
+  char a[10];
+  int  b[10];
+
+  printf("%.10s", a);
+  printf("%.11s", a); // expected-warning {{function 'printf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}}
+  printf("%.10s", b); // expected-warning {{function 'printf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}}
+}
+
 
 void g(char *begin, char *end, char *p, std::span<char> s) {
   std::copy(begin, end, p); // no warn
diff --git a/clang/test/SemaCXX/warn-unused-result.cpp b/clang/test/SemaCXX/warn-unused-result.cpp
index fe7d5ea..447654e 100644
--- a/clang/test/SemaCXX/warn-unused-result.cpp
+++ b/clang/test/SemaCXX/warn-unused-result.cpp
@@ -365,6 +365,31 @@ void id_print_name() {
 }
 } // namespace GH117975
 
+namespace inheritance {
+// Test that [[nodiscard]] is not inherited by derived class types,
+// but is inherited by member functions
+struct [[nodiscard]] E {
+  [[nodiscard]] explicit E(int);
+  explicit E(const char*);
+  [[nodiscard]] int f();
+};
+struct F : E {
+  using E::E;
+};
+E e();
+F f();
+void test() {
+  e();     // expected-warning {{ignoring return value of type 'E' declared with 'nodiscard' attribute}}
+  f();     // no warning: derived class type does not inherit the attribute
+  E(1);    // expected-warning {{ignoring temporary created by a constructor declared with 'nodiscard' attribute}}
+  E("x");  // expected-warning {{ignoring temporary of type 'E' declared with 'nodiscard' attribute}}
+  F(1);    // no warning: inherited constructor does not inherit the attribute either
+  F("x");  // no warning
+  e().f(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  f().f(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+}
+} // namespace inheritance
+
 namespace BuildStringOnClangScope {
 
 [[clang::warn_unused_result("Discarded result")]]
@@ -381,4 +406,4 @@ void doGccThings() {
   makeGccTrue(); // expected-warning {{ignoring return value of function declared with 'gnu::warn_unused_result' attribute}}
 }
 
-}
+} // namespace BuildStringOnClangScope
diff --git a/clang/test/SemaCXX/wreturn-always-throws.cpp b/clang/test/SemaCXX/wreturn-always-throws.cpp
index addcadd..df7689f 100644
--- a/clang/test/SemaCXX/wreturn-always-throws.cpp
+++ b/clang/test/SemaCXX/wreturn-always-throws.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fsyntax-only -fcxx-exceptions -fexceptions -Wreturn-type -verify %s
+// RUN: %clang_cc1 -fsyntax-only -fcxx-exceptions -fexceptions -Wreturn-type -Winvalid-noreturn -verify %s
 // expected-no-diagnostics
 
 namespace std {
@@ -44,3 +44,22 @@ void testTemplates() {
   throwErrorTemplate("ERROR");
   (void)ensureZeroTemplate(42);
 }
+
+// Ensure that explicit specialization of a member function does not inherit
+// the warning from the primary template.
+
+template<typename T>
+struct S {
+  void f();
+  void g();
+};
+
+template<typename T>
+void S<T>::f() { throw 0; } 
+template<>
+void S<int>::f() {}
+
+template<typename T> 
+void S<T>::g() {}  
+template<> 
+void S<int>::g() { throw 0; }
diff --git a/clang/test/SemaHLSL/BuiltIns/D3DCOLORtoUBYTE4-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/D3DCOLORtoUBYTE4-errors.hlsl
index e9ba851..e9bf4c9 100644
--- a/clang/test/SemaHLSL/BuiltIns/D3DCOLORtoUBYTE4-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/D3DCOLORtoUBYTE4-errors.hlsl
@@ -25,5 +25,5 @@ struct S {
 int4 struct_arg(S v) {
     return D3DCOLORtoUBYTE4(v);
     // expected-error@-1 {{no matching function for call to 'D3DCOLORtoUBYTE4'}}
-    // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: no known conversion from 'S' to 'vector<float, 4>' (vector of 4 'float' values) for 1st argument}}
+    // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: no known conversion from 'S' to 'float4' (aka 'vector<float, 4>') for 1st argument}}
 }
diff --git a/clang/test/SemaObjC/attr-nodiscard.m b/clang/test/SemaObjC/attr-nodiscard.m
new file mode 100644
index 0000000..6d04665
--- /dev/null
+++ b/clang/test/SemaObjC/attr-nodiscard.m
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1  -fsyntax-only -verify %s
+
+struct [[nodiscard]] expected {};
+
+typedef struct expected E;
+
+@interface INTF
+- (int) a [[nodiscard]];
++ (int) b [[nodiscard]];
+- (struct expected) c;
++ (struct expected) d;
+- (E) e;
++ (E) f;
+- (void) g [[nodiscard]]; // expected-warning {{attribute 'nodiscard' cannot be applied to Objective-C method without return value}}
+@end
+
+void foo(INTF *a) {
+  [a a]; // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  [INTF b]; // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  [a c]; // expected-warning {{ignoring return value of type 'expected' declared with 'nodiscard' attribute}}
+  [INTF d]; // expected-warning {{ignoring return value of type 'expected' declared with 'nodiscard' attribute}}
+  [a e]; // expected-warning {{ignoring return value of type 'expected' declared with 'nodiscard' attribute}}
+  [INTF f]; // expected-warning {{ignoring return value of type 'expected' declared with 'nodiscard' attribute}}
+  [a g];
+}
diff --git a/clang/test/SemaObjCXX/attr-nodiscard.mm b/clang/test/SemaObjCXX/attr-nodiscard.mm
new file mode 100644
index 0000000..e1eefb7
--- /dev/null
+++ b/clang/test/SemaObjCXX/attr-nodiscard.mm
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1  -fsyntax-only -verify %s
+
+template<class T>
+struct [[nodiscard]] expected {};
+
+using E = expected<int>;
+
+@interface INTF
+- (int) a [[nodiscard]];
++ (int) b [[nodiscard]];
+- (expected<int>) c;
++ (expected<int>) d;
+- (E) e;
++ (E) f;
+- (void) g [[nodiscard]]; // expected-warning {{attribute 'nodiscard' cannot be applied to Objective-C method without return value}}
+@end
+
+void foo(INTF *a) {
+  [a a]; // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  [INTF b]; // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+  [a c]; // expected-warning {{ignoring return value of type 'expected<int>' declared with 'nodiscard' attribute}}
+  [INTF d]; // expected-warning {{ignoring return value of type 'expected<int>' declared with 'nodiscard' attribute}}
+  [a e]; // expected-warning {{ignoring return value of type 'expected<int>' declared with 'nodiscard' attribute}}
+  [INTF f]; // expected-warning {{ignoring return value of type 'expected<int>' declared with 'nodiscard' attribute}}
+  [a g];
+}
diff --git a/clang/test/SemaOpenACC/atomic-construct.cpp b/clang/test/SemaOpenACC/atomic-construct.cpp
index eba2559..10c85c2 100644
--- a/clang/test/SemaOpenACC/atomic-construct.cpp
+++ b/clang/test/SemaOpenACC/atomic-construct.cpp
@@ -1098,7 +1098,7 @@ void AtomicCaptureTemplateCompound(T LHS, T RHS) {
   {
     LHS--;
     // expected-error@-3{{statement associated with OpenACC 'atomic capture' directive is invalid}}
-    // expected-note@+1{{variable on right hand side of assignment('RHS') must match variable used in unary expression('LHS') from the first statement}}
+    // expected-note@+1{{sub-expression on right hand side of assignment('RHS') must match sub-expression used in unary expression('LHS') from the first statement}}
     LHS = RHS;
   }
 
@@ -1128,7 +1128,7 @@ void AtomicCaptureTemplateCompound(T LHS, T RHS) {
   {
     LHS *= 1;
     // expected-error@-3{{statement associated with OpenACC 'atomic capture' directive is invalid}}
-    // expected-note@+1{{variable on right hand side of assignment('RHS') must match variable used on left hand side of compound assignment('LHS') from the first statement}}
+    // expected-note@+1{{sub-expression on right hand side of assignment('RHS') must match sub-expression used on left hand side of compound assignment('LHS') from the first statement}}
     LHS = RHS;
   }
 #pragma acc atomic capture
@@ -1157,7 +1157,7 @@ void AtomicCaptureTemplateCompound(T LHS, T RHS) {
   {
     LHS = LHS * 1;
     // expected-error@-3{{statement associated with OpenACC 'atomic capture' directive is invalid}}
-    // expected-note@+1{{variable on right hand side of assignment('RHS') must match variable used on left hand side of assignment('LHS') from the first statement}}
+    // expected-note@+1{{sub-expression on right hand side of assignment('RHS') must match sub-expression used on left hand side of assignment('LHS') from the first statement}}
     RHS = RHS;
   }
 #pragma acc atomic capture
@@ -1186,7 +1186,7 @@ void AtomicCaptureTemplateCompound(T LHS, T RHS) {
   {
     LHS = LHS | 1;
     // expected-error@-3{{statement associated with OpenACC 'atomic capture' directive is invalid}}
-    // expected-note@+1{{variable on right hand side of assignment('RHS') must match variable used on left hand side of assignment('LHS') from the first statement}}
+    // expected-note@+1{{sub-expression on right hand side of assignment('RHS') must match sub-expression used on left hand side of assignment('LHS') from the first statement}}
     RHS = RHS;
   }
 #pragma acc atomic capture
@@ -1255,7 +1255,7 @@ void AtomicCaptureTemplateCompound(T LHS, T RHS) {
   {
     LHS = RHS;
     // expected-error@-3{{statement associated with OpenACC 'atomic capture' directive is invalid}}
-    // expected-note@+1{{variable on left hand side of assignment('LHS') must match variable used on right hand side of assignment('RHS') from the first statement}}
+    // expected-note@+1{{sub-expression on left hand side of assignment('LHS') must match sub-expression used on right hand side of assignment('RHS') from the first statement}}
     LHS = 1;
   }
 
@@ -1294,7 +1294,7 @@ void AtomicCaptureTemplateCompound(T LHS, T RHS) {
   {
     LHS = RHS;
     // expected-error@-3{{statement associated with OpenACC 'atomic capture' directive is invalid}}
-    // expected-note@+1{{variable in unary expression('LHS') must match variable used on right hand side of assignment('RHS') from the first statement}}
+    // expected-note@+1{{sub-expression in unary expression('LHS') must match sub-expression used on right hand side of assignment('RHS') from the first statement}}
     LHS++;
   }
 }
@@ -1352,7 +1352,7 @@ void AtomicCaptureTemplateCompound2(T LHS, T RHS) {
   {
     LHS--;
     // expected-error@-3{{statement associated with OpenACC 'atomic capture' directive is invalid}}
-    // expected-note@+1{{variable on right hand side of assignment('RHS') must match variable used in unary expression('LHS') from the first statement}}
+    // expected-note@+1{{sub-expression on right hand side of assignment('RHS') must match sub-expression used in unary expression('LHS') from the first statement}}
     LHS = RHS;
   }
 
@@ -1384,7 +1384,7 @@ void AtomicCaptureTemplateCompound2(T LHS, T RHS) {
   {
     LHS *= 1;
     // expected-error@-3{{statement associated with OpenACC 'atomic capture' directive is invalid}}
-    // expected-note@+1{{variable on right hand side of assignment('RHS') must match variable used on left hand side of compound assignment('LHS') from the first statement}}
+    // expected-note@+1{{sub-expression on right hand side of assignment('RHS') must match sub-expression used on left hand side of compound assignment('LHS') from the first statement}}
     LHS = RHS;
   }
 #pragma acc atomic capture
@@ -1415,7 +1415,7 @@ void AtomicCaptureTemplateCompound2(T LHS, T RHS) {
   {
     LHS = LHS * 1;
     // expected-error@-3{{statement associated with OpenACC 'atomic capture' directive is invalid}}
-    // expected-note@+1{{variable on right hand side of assignment('RHS') must match variable used on left hand side of assignment('LHS') from the first statement}}
+    // expected-note@+1{{sub-expression on right hand side of assignment('RHS') must match sub-expression used on left hand side of assignment('LHS') from the first statement}}
     RHS = RHS;
   }
 #pragma acc atomic capture
@@ -1446,7 +1446,7 @@ void AtomicCaptureTemplateCompound2(T LHS, T RHS) {
   {
     LHS = LHS | 1;
     // expected-error@-3{{statement associated with OpenACC 'atomic capture' directive is invalid}}
-    // expected-note@+1{{variable on right hand side of assignment('RHS') must match variable used on left hand side of assignment('LHS') from the first statement}}
+    // expected-note@+1{{sub-expression on right hand side of assignment('RHS') must match sub-expression used on left hand side of assignment('LHS') from the first statement}}
     RHS = RHS;
   }
 #pragma acc atomic capture
@@ -1523,7 +1523,7 @@ void AtomicCaptureTemplateCompound2(T LHS, T RHS) {
   {
     LHS = RHS;
     // expected-error@-3{{statement associated with OpenACC 'atomic capture' directive is invalid}}
-    // expected-note@+1{{variable on left hand side of assignment('LHS') must match variable used on right hand side of assignment('RHS') from the first statement}}
+    // expected-note@+1{{sub-expression on left hand side of assignment('LHS') must match sub-expression used on right hand side of assignment('RHS') from the first statement}}
     LHS = 1;
   }
 
@@ -1570,7 +1570,7 @@ void AtomicCaptureTemplateCompound2(T LHS, T RHS) {
   {
     LHS = RHS;
     // expected-error@-3{{statement associated with OpenACC 'atomic capture' directive is invalid}}
-    // expected-note@+1{{variable in unary expression('LHS') must match variable used on right hand side of assignment('RHS') from the first statement}}
+    // expected-note@+1{{sub-expression in unary expression('LHS') must match sub-expression used on right hand side of assignment('RHS') from the first statement}}
     LHS++;
   }
 }
@@ -1629,7 +1629,7 @@ void AtomicCaptureCompound(int LHS, int RHS) {
   {
     LHS--;
     // expected-error@-3{{statement associated with OpenACC 'atomic capture' directive is invalid}}
-    // expected-note@+1{{variable on right hand side of assignment('RHS') must match variable used in unary expression('LHS') from the first statement}}
+    // expected-note@+1{{sub-expression on right hand side of assignment('RHS') must match sub-expression used in unary expression('LHS') from the first statement}}
     LHS = RHS;
   }
 
@@ -1666,7 +1666,7 @@ void AtomicCaptureCompound(int LHS, int RHS) {
   {
     LHS *= 1;
     // expected-error@-3{{statement associated with OpenACC 'atomic capture' directive is invalid}}
-    // expected-note@+1{{variable on right hand side of assignment('RHS') must match variable used on left hand side of compound assignment('LHS') from the first statement}}
+    // expected-note@+1{{sub-expression on right hand side of assignment('RHS') must match sub-expression used on left hand side of compound assignment('LHS') from the first statement}}
     LHS = RHS;
   }
 #pragma acc atomic capture
@@ -1702,7 +1702,7 @@ void AtomicCaptureCompound(int LHS, int RHS) {
   {
     LHS = LHS * 1;
     // expected-error@-3{{statement associated with OpenACC 'atomic capture' directive is invalid}}
-    // expected-note@+1{{variable on right hand side of assignment('RHS') must match variable used on left hand side of assignment('LHS') from the first statement}}
+    // expected-note@+1{{sub-expression on right hand side of assignment('RHS') must match sub-expression used on left hand side of assignment('LHS') from the first statement}}
     RHS = RHS;
   }
 #pragma acc atomic capture
@@ -1738,7 +1738,7 @@ void AtomicCaptureCompound(int LHS, int RHS) {
   {
     LHS = LHS | 1;
     // expected-error@-3{{statement associated with OpenACC 'atomic capture' directive is invalid}}
-    // expected-note@+1{{variable on right hand side of assignment('RHS') must match variable used on left hand side of assignment('LHS') from the first statement}}
+    // expected-note@+1{{sub-expression on right hand side of assignment('RHS') must match sub-expression used on left hand side of assignment('LHS') from the first statement}}
     RHS = RHS;
   }
 #pragma acc atomic capture
@@ -1815,7 +1815,7 @@ void AtomicCaptureCompound(int LHS, int RHS) {
   {
     LHS = RHS;
     // expected-error@-3{{statement associated with OpenACC 'atomic capture' directive is invalid}}
-    // expected-note@+1{{variable on left hand side of assignment('LHS') must match variable used on right hand side of assignment('RHS') from the first statement}}
+    // expected-note@+1{{sub-expression on left hand side of assignment('LHS') must match sub-expression used on right hand side of assignment('RHS') from the first statement}}
     LHS = 1;
   }
 
@@ -1861,7 +1861,23 @@ void AtomicCaptureCompound(int LHS, int RHS) {
   {
     LHS = RHS;
     // expected-error@-3{{statement associated with OpenACC 'atomic capture' directive is invalid}}
-    // expected-note@+1{{variable in unary expression('LHS') must match variable used on right hand side of assignment('RHS') from the first statement}}
+    // expected-note@+1{{sub-expression in unary expression('LHS') must match sub-expression used on right hand side of assignment('RHS') from the first statement}}
     LHS++;
   }
+
+  // Example from UDel test suite, which wasn't working because of irrelevant
+  // parens, make sure we work with these. This should not diagnose.
+  typedef double real_t;
+  int * distribution;
+  real_t *a;
+  real_t *b;
+  int *c;
+  for (int x = 0; x < 5; ++x) {
+#pragma acc atomic capture
+    {
+      c[x] = distribution[(int) (a[x]*b[x]/10)];
+      (distribution[(int)(a[x]*b[x]/10)])--;
+    }
+  }
+
 }
diff --git a/clang/test/SemaOpenACC/compute-construct-reduction-clause.c b/clang/test/SemaOpenACC/compute-construct-reduction-clause.c
index 6a77f07..995b6d3 100644
--- a/clang/test/SemaOpenACC/compute-construct-reduction-clause.c
+++ b/clang/test/SemaOpenACC/compute-construct-reduction-clause.c
@@ -68,7 +68,6 @@ void uses(unsigned Parm) {
 #pragma acc parallel reduction(&: ChC)
   while (1);
 
-  // expected-error@+1{{OpenACC 'reduction' variable must be of scalar type, sub-array, or a composite of scalar types; type is 'int[5]'}}
 #pragma acc parallel reduction(&: Array)
   while (1);
 
@@ -76,7 +75,7 @@ void uses(unsigned Parm) {
   while (1);
 
   struct CompositeHasComposite ChCArray[5];
-  // expected-error@+1{{OpenACC 'reduction' variable must be of scalar type, sub-array, or a composite of scalar types; sub-array base type is 'struct CompositeHasComposite'}}
+  // expected-error@+1{{OpenACC 'reduction' variable must be of scalar type, aggregate, sub-array, or a composite of scalar types; sub-array base type is 'struct CompositeHasComposite'}}
 #pragma acc parallel reduction(&: CoS, Array[I], ChCArray[0:I])
   while (1);
 
diff --git a/clang/test/SemaOpenACC/compute-construct-reduction-clause.cpp b/clang/test/SemaOpenACC/compute-construct-reduction-clause.cpp
index 3e972b0..b40268c 100644
--- a/clang/test/SemaOpenACC/compute-construct-reduction-clause.cpp
+++ b/clang/test/SemaOpenACC/compute-construct-reduction-clause.cpp
@@ -70,7 +70,6 @@ void uses(unsigned Parm) {
   // expected-note@#COS_FIELD{{invalid field is here}}
 #pragma acc parallel reduction(&: ChC)
   while (1);
-  // expected-error@+1{{OpenACC 'reduction' variable must be of scalar type, sub-array, or a composite of scalar types; type is 'int[5]'}}
 #pragma acc parallel reduction(&: Array)
   while (1);
 
@@ -140,10 +139,8 @@ void TemplUses(T Parm, U CoS, V ChC) {
   // expected-note@#COS_FIELD{{invalid field is here}}
 #pragma acc parallel reduction(&: ChC)
   while (1);
-  // expected-error@+1{{OpenACC 'reduction' variable must be of scalar type, sub-array, or a composite of scalar types; type is 'int[5]'}}
 #pragma acc parallel reduction(&: Array)
   while (1);
-  // expected-error@+1{{OpenACC 'reduction' variable must be of scalar type, sub-array, or a composite of scalar types; type is 'int[5]'}}
 #pragma acc parallel reduction(&: NonDepArray)
   while (1);
 
diff --git a/clang/test/SemaOpenACC/data-construct-use_device-clause.c b/clang/test/SemaOpenACC/data-construct-use_device-clause.c
index 9239757..65eaf4e 100644
--- a/clang/test/SemaOpenACC/data-construct-use_device-clause.c
+++ b/clang/test/SemaOpenACC/data-construct-use_device-clause.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -fopenacc -verify
+// RUN: %clang_cc1 %s -fopenacc -verify -Wopenacc-extension
 
 typedef struct IsComplete {
   struct S { int A; } CompositeMember;
@@ -23,7 +23,7 @@ void uses(int IntParam, short *PointerParam, float ArrayParam[5], Complete Compo
 #pragma acc host_data use_device(LocalComposite.ScalarMember, LocalComposite.ScalarMember)
   ;
 
-  // expected-error@+1{{OpenACC variable in 'use_device' clause is not a valid variable name or array name}}
+  // expected-warning@+1{{sub-array as a variable in 'use_device' clause is not a valid variable name or array name}}
 #pragma acc host_data use_device(LocalArray[2:1])
 
   // expected-error@+1{{OpenACC variable in 'use_device' clause is not a valid variable name or array name}}
@@ -35,12 +35,12 @@ void uses(int IntParam, short *PointerParam, float ArrayParam[5], Complete Compo
   ;
 
   // expected-error@+2{{OpenACC sub-array length is unspecified and cannot be inferred because the subscripted value is not an array}}
-  // expected-error@+1{{OpenACC variable in 'use_device' clause is not a valid variable name or array name}}
+  // expected-warning@+1{{sub-array as a variable in 'use_device' clause is not a valid variable name or array name}}
 #pragma acc host_data use_device(PointerParam[2:])
   ;
 
   // expected-error@+2{{OpenACC sub-array specified range [2:5] would be out of the range of the subscripted array size of 5}}
-  // expected-error@+1{{OpenACC variable in 'use_device' clause is not a valid variable name or array name}}
+  // expected-warning@+1{{sub-array as a variable in 'use_device' clause is not a valid variable name or array name}}
 #pragma acc host_data use_device(ArrayParam[2:5])
   ;
 
diff --git a/clang/test/SemaOpenACC/data-construct.cpp b/clang/test/SemaOpenACC/data-construct.cpp
index 394ebb0..da7b80a 100644
--- a/clang/test/SemaOpenACC/data-construct.cpp
+++ b/clang/test/SemaOpenACC/data-construct.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -fopenacc -verify -Wno-empty-body -Wno-unused-value
+// RUN: %clang_cc1 %s -fopenacc -verify -Wno-empty-body -Wno-unused-value -Wopenacc-extension
 
 void HasStmt() {
   {
@@ -185,7 +185,7 @@ void HostDataRules() {
 #pragma acc host_data use_device(Array)
   ;
 
-  // expected-error@+1{{OpenACC variable in 'use_device' clause is not a valid variable name or array name}}
+  // expected-warning@+1{{sub-array as a variable in 'use_device' clause is not a valid variable name or array name}}
 #pragma acc host_data use_device(Array[1:1])
   ;
 
diff --git a/clang/test/SemaOpenACC/declare-construct.cpp b/clang/test/SemaOpenACC/declare-construct.cpp
index 6f21aed..6828ecd 100644
--- a/clang/test/SemaOpenACC/declare-construct.cpp
+++ b/clang/test/SemaOpenACC/declare-construct.cpp
@@ -1,7 +1,8 @@
-// RUN: %clang_cc1 %s -fopenacc -verify
+// RUN: %clang_cc1 %s -fopenacc -verify -Wopenacc-extension
 
 int *Global;
 int GlobalArray[5];
+int GlobalArray2[5];
 // expected-error@+1{{no valid clauses specified in OpenACC 'declare' directive}}
 #pragma acc declare
 namespace NS {
@@ -265,8 +266,8 @@ void use() {
 
 // expected-error@+1{{OpenACC variable on 'declare' construct is not a valid variable name or array name}}
 #pragma acc declare create(GlobalArray[0])
-// expected-error@+1{{OpenACC variable on 'declare' construct is not a valid variable name or array name}}
-#pragma acc declare create(GlobalArray[0: 1])
+// expected-warning@+1{{sub-array as a variable on 'declare' construct is not a valid variable name or array name}}
+#pragma acc declare create(GlobalArray[0: 1]) // #GLOBALARRAYREF
 
 struct S { int I; };
 // expected-error@+1{{OpenACC variable on 'declare' construct is not a valid variable name or array name}}
@@ -288,8 +289,12 @@ void ExternVar() {
 #pragma acc declare copy(I) copyin(I2), copyout(I3), create(I4), present(I5), deviceptr(I6), device_resident(I7), link(I8)
 }
 
+// expected-error@+2{{variable referenced in 'link' clause of OpenACC 'declare' directive was already referenced}}
+// expected-note@#GLOBALARRAYREF{{previous reference is here}}
+#pragma acc declare link(GlobalArray)
+
 // Link can only have global, namespace, or extern vars.
-#pragma acc declare link(Global, GlobalArray)
+#pragma acc declare link(Global, GlobalArray2)
 
 struct Struct2 {
   static const int StaticMem = 5;
diff --git a/clang/test/SemaOpenACC/loop-construct-reduction-clause.cpp b/clang/test/SemaOpenACC/loop-construct-reduction-clause.cpp
index bbcfffb..00bcd74 100644
--- a/clang/test/SemaOpenACC/loop-construct-reduction-clause.cpp
+++ b/clang/test/SemaOpenACC/loop-construct-reduction-clause.cpp
@@ -36,7 +36,6 @@ void uses() {
 
 #pragma acc serial
   {
-  // expected-error@+1{{OpenACC 'reduction' variable must be of scalar type, sub-array, or a composite of scalar types; type is 'int[5]'}}
 #pragma acc loop reduction(+:Array)
     for(int i = 0; i < 5; ++i){}
   }
@@ -172,7 +171,6 @@ void templ_uses() {
 
 #pragma acc serial
   {
-  // expected-error@+1{{OpenACC 'reduction' variable must be of scalar type, sub-array, or a composite of scalar types; type is 'int[5]'}}
 #pragma acc loop reduction(+:Array)
     for(int i = 0; i < 5; ++i){}
   }
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl
index 9711b3b..3247380 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl
@@ -1,6 +1,7 @@
 // REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1250 -verify -S -o - %s
+// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-- -target-cpu gfx1250 -verify -S -o - %s
 
+typedef int    v2i   __attribute__((ext_vector_type(2)));
 typedef int    v4i   __attribute__((ext_vector_type(4)));
 typedef int    v8i   __attribute__((ext_vector_type(8)));
 
@@ -28,6 +29,17 @@ void test__builtin_amdgcn_cvt_f16_bf8(int a, int b) {
   __builtin_amdgcn_cvt_f16_bf8(a, b); // expected-error {{'__builtin_amdgcn_cvt_f16_bf8' must be a constant integer}}
 }
 
+void test_amdgcn_load_monitor(global int* b32gaddr, global v2i* b64gaddr, global v4i* b128gaddr, int *b32faddr, v2i* b64faddr, v4i *b128faddr,
+                              global int* b32out, global v2i* b64out, global v4i* b128out, int cpol)
+{
+  *b32out  = __builtin_amdgcn_global_load_monitor_b32(b32gaddr, cpol); // expected-error {{'__builtin_amdgcn_global_load_monitor_b32' must be a constant integer}}
+  *b64out  = __builtin_amdgcn_global_load_monitor_b64(b64gaddr, cpol); // expected-error {{'__builtin_amdgcn_global_load_monitor_b64' must be a constant integer}}
+  *b128out = __builtin_amdgcn_global_load_monitor_b128(b128gaddr, cpol); // expected-error {{'__builtin_amdgcn_global_load_monitor_b128' must be a constant integer}}
+  *b32out  = __builtin_amdgcn_flat_load_monitor_b32(b32faddr, cpol); // expected-error {{'__builtin_amdgcn_flat_load_monitor_b32' must be a constant integer}}
+  *b64out  = __builtin_amdgcn_flat_load_monitor_b64(b64faddr, cpol); // expected-error {{'__builtin_amdgcn_flat_load_monitor_b64' must be a constant integer}}
+  *b128out = __builtin_amdgcn_flat_load_monitor_b128(b128faddr, cpol); // expected-error {{'__builtin_amdgcn_flat_load_monitor_b128' must be a constant integer}}
+}
+
 void test_amdgcn_tensor_load_store(v4i sg0, v8i sg1, v4i sg2, v4i sg3, int cpol)
 {
   __builtin_amdgcn_tensor_load_to_lds(sg0, sg1, sg2, sg3, cpol); // expected-error {{'__builtin_amdgcn_tensor_load_to_lds' must be a constant integer}}
@@ -36,6 +48,11 @@ void test_amdgcn_tensor_load_store(v4i sg0, v8i sg1, v4i sg2, v4i sg3, int cpol)
   __builtin_amdgcn_tensor_store_from_lds_d2(sg0, sg1, cpol); // expected-error {{'__builtin_amdgcn_tensor_store_from_lds_d2' must be a constant integer}}
 }
 
+void test_prefetch(generic void *fptr, global void *gptr, int cpol) {
+  __builtin_amdgcn_flat_prefetch(fptr, cpol); // expected-error {{'__builtin_amdgcn_flat_prefetch' must be a constant integer}}
+  __builtin_amdgcn_global_prefetch(gptr, cpol); // expected-error {{'__builtin_amdgcn_global_prefetch' must be a constant integer}}
+}
+
 void test_cvt_f32_fp8_e5m3(global int* out, int a)
 {
   *out = __builtin_amdgcn_cvt_f32_fp8_e5m3(a, a); // expected-error {{'__builtin_amdgcn_cvt_f32_fp8_e5m3' must be a constant integer}}
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
index 55d705e..8aa7c34 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-wmma-w32-param.cl
@@ -114,6 +114,13 @@ void test_amdgcn_wmma_i32_16x16x64_iu8(global v8i* out, v8i a, v8i b, v8i c, int
   *out = __builtin_amdgcn_wmma_i32_16x16x64_iu8(0, a, 0, b, c, false, mod); // expected-error {{'__builtin_amdgcn_wmma_i32_16x16x64_iu8' must be a constant integer}}
 }
 
+void test_amdgcn_wmma_f32_16x16x128_f8f6f4(global v8f* out, v16i a, v16i b, v8f c, int mod)
+{
+  *out = __builtin_amdgcn_wmma_f32_16x16x128_f8f6f4(mod, a, 2, b, 0, c); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x128_f8f6f4' must be a constant integer}}
+  *out = __builtin_amdgcn_wmma_f32_16x16x128_f8f6f4(1, a, mod, b, 0, c); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x128_f8f6f4' must be a constant integer}}
+  *out = __builtin_amdgcn_wmma_f32_16x16x128_f8f6f4(1, a, 2, b, mod, c); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x128_f8f6f4' must be a constant integer}}
+}
+
 void test_amdgcn_wmma_f32_16x16x32_f16(global v8f* out, v16h a, v16h b, v8f c, int mod)
 {
   *out = __builtin_amdgcn_wmma_f32_16x16x32_f16(mod, a, 0, b, 0, c, false, false); // expected-error {{'__builtin_amdgcn_wmma_f32_16x16x32_f16' must be a constant integer}}
diff --git a/clang/test/SemaTemplate/concepts.cpp b/clang/test/SemaTemplate/concepts.cpp
index 62a4f95..d63ad01 100644
--- a/clang/test/SemaTemplate/concepts.cpp
+++ b/clang/test/SemaTemplate/concepts.cpp
@@ -1228,25 +1228,33 @@ template <KnownKind T> struct KnownType {
 
 }
 
-namespace GH115838 {
+namespace CWG2369_Regression_2 {
 
-template<typename T> concept has_x = requires(T t) {{ t.x };};
+template <typename T>
+concept HasFastPropertyForAttribute =
+    requires(T element, int name) { element.propertyForAttribute(name); };
+
+template <typename OwnerType>
+struct SVGPropertyOwnerRegistry {
+  static int fastAnimatedPropertyLookup() {
+    static_assert (HasFastPropertyForAttribute<OwnerType>);
+    return 1;
+  }
+};
 
-class Publ { public:    int x = 0; };
-class Priv { private:   int x = 0; };
-class Prot { protected: int x = 0; };
-class Same { protected: int x = 0; };
+class SVGCircleElement {
+  friend SVGPropertyOwnerRegistry<SVGCircleElement>;
+  void propertyForAttribute(int);
+};
 
-template<typename T> class D;
-template<typename T> requires ( has_x<T>) class D<T>: public T { public: static constexpr bool has = 1; };
-template<typename T> requires (!has_x<T>) class D<T>: public T { public: static constexpr bool has = 0; };
+int i = SVGPropertyOwnerRegistry<SVGCircleElement>::fastAnimatedPropertyLookup();
 
-// "Same" is identical to "Prot" but queried before used.
-static_assert(!has_x<Same>,  "Protected should be invisible.");
-static_assert(!D<Same>::has, "Protected should be invisible.");
+}
 
-static_assert( D<Publ>::has, "Public should be visible.");
-static_assert(!D<Priv>::has, "Private should be invisible.");
-static_assert(!D<Prot>::has, "Protected should be invisible.");
 
+namespace GH149986 {
+template <typename T> concept PerfectSquare = [](){} // expected-note 2{{here}}
+([](auto) { return true; }) < PerfectSquare <class T>;
+// expected-error@-1 {{declaration of 'T' shadows template parameter}} \
+// expected-error@-1 {{a concept definition cannot refer to itself}}
 }
diff --git a/clang/test/SemaTemplate/deduction-guide.cpp b/clang/test/SemaTemplate/deduction-guide.cpp
index 0953f647..f6bc6ee 100644
--- a/clang/test/SemaTemplate/deduction-guide.cpp
+++ b/clang/test/SemaTemplate/deduction-guide.cpp
@@ -966,3 +966,19 @@ Expand<Type, Invocable<>> _{};
 // CHECK-NEXT:   | `-ParmVarDecl {{.+}} 'T...' pack
 
 }
+
+namespace GH134613 {
+template <typename R> struct Foo {
+  using value_type = R;
+
+  Foo() = default;
+  Foo(Foo<Foo<R>> &&rhs) {}
+};
+
+void main() {
+  auto r1 = Foo(Foo<Foo<int>>{});
+
+  static_assert(__is_same(decltype(r1)::value_type, int));
+}
+
+}
diff --git a/clang/tools/cir-lsp-server/CMakeLists.txt b/clang/tools/cir-lsp-server/CMakeLists.txt
index aad2646..f421215 100644
--- a/clang/tools/cir-lsp-server/CMakeLists.txt
+++ b/clang/tools/cir-lsp-server/CMakeLists.txt
@@ -1,26 +1,23 @@
-get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
-get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS)
-
 include_directories(${LLVM_MAIN_SRC_DIR}/../mlir/include)
 include_directories(${CMAKE_BINARY_DIR}/tools/mlir/include)
 
 set(LIBS
-  ${dialect_libs}
-  ${conversion_libs}
   ${test_libs}
   clangCIR
   clangCIRLoweringDirectToLLVM
-  MLIRCIR
   MLIRAffineAnalysis
   MLIRAnalysis
+  MLIRCIR
   MLIRDialect
+  MLIRIR
   MLIRLspServerLib
   MLIRParser
   MLIRPass
-  MLIRTransforms
-  MLIRTransformUtils
+  MLIRRegisterAllDialects
+  MLIRRegisterAllPasses
   MLIRSupport
-  MLIRIR
+  MLIRTransformUtils
+  MLIRTransforms
   )
 
 add_mlir_tool(cir-lsp-server
diff --git a/clang/tools/cir-opt/cir-opt.cpp b/clang/tools/cir-opt/cir-opt.cpp
index 3dad3b1..c4d29a2 100644
--- a/clang/tools/cir-opt/cir-opt.cpp
+++ b/clang/tools/cir-opt/cir-opt.cpp
@@ -17,11 +17,12 @@
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
-#include "mlir/InitAllPasses.h"
+#include "mlir/IR/BuiltinDialect.h"
 #include "mlir/Pass/PassManager.h"
 #include "mlir/Pass/PassOptions.h"
 #include "mlir/Pass/PassRegistry.h"
 #include "mlir/Tools/mlir-opt/MlirOptMain.h"
+#include "mlir/Transforms/Passes.h"
 #include "clang/CIR/Dialect/IR/CIRDialect.h"
 #include "clang/CIR/Dialect/Passes.h"
 #include "clang/CIR/Passes.h"
diff --git a/clang/tools/clang-format/ClangFormat.cpp b/clang/tools/clang-format/ClangFormat.cpp
index 24ad3cb..afc40e9 100644
--- a/clang/tools/clang-format/ClangFormat.cpp
+++ b/clang/tools/clang-format/ClangFormat.cpp
@@ -237,8 +237,8 @@ static bool parseLineRange(StringRef Input, unsigned &FromLine,
 
 static bool fillRanges(MemoryBuffer *Code,
                        std::vector<tooling::Range> &Ranges) {
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto InMemoryFileSystem =
+      makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   FileManager Files(FileSystemOptions(), InMemoryFileSystem);
   DiagnosticOptions DiagOpts;
   DiagnosticsEngine Diagnostics(
@@ -511,8 +511,8 @@ static bool format(StringRef FileName, bool ErrorOnIncompleteFormat = false) {
   if (OutputXML) {
     outputXML(Replaces, FormatChanges, Status, Cursor, CursorPosition);
   } else {
-    IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-        new llvm::vfs::InMemoryFileSystem);
+    auto InMemoryFileSystem =
+        makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
     FileManager Files(FileSystemOptions(), InMemoryFileSystem);
 
     DiagnosticOptions DiagOpts;
diff --git a/clang/tools/clang-import-test/clang-import-test.cpp b/clang/tools/clang-import-test/clang-import-test.cpp
index 7f5df92..ab021a5 100644
--- a/clang/tools/clang-import-test/clang-import-test.cpp
+++ b/clang/tools/clang-import-test/clang-import-test.cpp
@@ -236,7 +236,7 @@ std::unique_ptr<CodeGenerator> BuildCodeGen(CompilerInstance &CI,
                                             llvm::LLVMContext &LLVMCtx) {
   StringRef ModuleName("$__module");
   return std::unique_ptr<CodeGenerator>(CreateLLVMCodeGen(
-      CI.getDiagnostics(), ModuleName, &CI.getVirtualFileSystem(),
+      CI.getDiagnostics(), ModuleName, CI.getVirtualFileSystemPtr(),
       CI.getHeaderSearchOpts(), CI.getPreprocessorOpts(), CI.getCodeGenOpts(),
       LLVMCtx));
 }
diff --git a/clang/tools/clang-installapi/ClangInstallAPI.cpp b/clang/tools/clang-installapi/ClangInstallAPI.cpp
index 60e9fc4..509934e 100644
--- a/clang/tools/clang-installapi/ClangInstallAPI.cpp
+++ b/clang/tools/clang-installapi/ClangInstallAPI.cpp
@@ -83,10 +83,11 @@ static bool run(ArrayRef<const char *> Args, const char *ProgName) {
 
   // Create file manager for all file operations and holding in-memory generated
   // inputs.
-  llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayFileSystem(
-      new llvm::vfs::OverlayFileSystem(llvm::vfs::getRealFileSystem()));
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto OverlayFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(
+          llvm::vfs::getRealFileSystem());
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   OverlayFileSystem->pushOverlay(InMemoryFileSystem);
   IntrusiveRefCntPtr<clang::FileManager> FM(
       new FileManager(clang::FileSystemOptions(), OverlayFileSystem));
diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
index 9d34b62..1d91f5f2 100644
--- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -474,10 +474,10 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args,
   StringRef Arch = Args.getLastArgValue(OPT_arch_EQ);
   // Create a new file to write the linked device image to. Assume that the
   // input filename already has the device and architecture.
-  auto TempFileOrErr =
-      createOutputFile(sys::path::filename(ExecutableName) + "." +
-                           Triple.getArchName() + "." + Arch,
-                       "img");
+  std::string OutputFileBase =
+      "." + Triple.getArchName().str() + "." + Arch.str();
+  auto TempFileOrErr = createOutputFile(
+      sys::path::filename(ExecutableName) + OutputFileBase, "img");
   if (!TempFileOrErr)
     return TempFileOrErr.takeError();
 
@@ -486,6 +486,12 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args,
       "--no-default-config",
       "-o",
       *TempFileOrErr,
+      // Without -dumpdir, Clang will place auxiliary output files in the
+      // temporary directory of TempFileOrErr, where they will not easily be
+      // found by the user and might eventually be automatically removed.  Tell
+      // Clang to instead place them alongside the final executable.
+      "-dumpdir",
+      Args.MakeArgString(ExecutableName + OutputFileBase + ".img."),
       Args.MakeArgString("--target=" + Triple.getTriple()),
   };
 
diff --git a/clang/tools/clang-repl/CMakeLists.txt b/clang/tools/clang-repl/CMakeLists.txt
index 68d86dd..c3d14ce 100644
--- a/clang/tools/clang-repl/CMakeLists.txt
+++ b/clang/tools/clang-repl/CMakeLists.txt
@@ -19,14 +19,14 @@ if(MSVC)
   set_target_properties(clang-repl PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS 1)
 
   # RTTI/C++ symbols
-  set(clang_repl_exports ${clang_repl_exports} ??_7type_info@@6B@
-    ?__type_info_root_node@@3U__type_info_node@@A
-    ?nothrow@std@@3Unothrow_t@1@B
+  set(clang_repl_exports ${clang_repl_exports} ??_7type_info@@6B@,DATA
+    ?__type_info_root_node@@3U__type_info_node@@A,DATA
+    ?nothrow@std@@3Unothrow_t@1@B,DATA
   )
 
   # Compiler added symbols for static variables. NOT for VStudio < 2015
-  set(clang_repl_exports ${clang_repl_exports} _Init_thread_abort _Init_thread_epoch
-    _Init_thread_footer _Init_thread_header _tls_index
+  set(clang_repl_exports ${clang_repl_exports} _Init_thread_abort _Init_thread_epoch,DATA
+    _Init_thread_footer _Init_thread_header _tls_index,DATA
   )
 
   if(CMAKE_SIZEOF_VOID_P EQUAL 8)
@@ -50,7 +50,10 @@ if(MSVC)
   endif()
 
   # List to '/EXPORT:sym0 /EXPORT:sym1 /EXPORT:sym2 ...'
-  list(TRANSFORM clang_repl_exports PREPEND "LINKER:/EXPORT:")
+  # The 'SHELL' prefix tells CMake to use a space instead of comma as the
+  # separator between the driver and linker options, which we need since MSVC's
+  # linker uses `,DATA` as a suffix to indicate that data is being exported.
+  list(TRANSFORM clang_repl_exports PREPEND "LINKER:SHELL:/EXPORT:")
 
   set_property(TARGET clang-repl APPEND PROPERTY LINK_OPTIONS ${clang_repl_exports})
 
diff --git a/clang/tools/driver/cc1_main.cpp b/clang/tools/driver/cc1_main.cpp
index 2c17f28..9b1e390 100644
--- a/clang/tools/driver/cc1_main.cpp
+++ b/clang/tools/driver/cc1_main.cpp
@@ -304,7 +304,7 @@ int cc1_main(ArrayRef<const char *> Argv, const char *Argv0, void *MainAddr) {
     *IOFile << "{\n";
     llvm::TimerGroup::printAllJSONValues(*IOFile, "");
     *IOFile << "\n}\n";
-  } else {
+  } else if (!Clang->getCodeGenOpts().TimePassesStatsFile) {
     llvm::TimerGroup::printAll(*IOFile);
   }
   llvm::TimerGroup::clearAll();
diff --git a/clang/unittests/Analysis/CMakeLists.txt b/clang/unittests/Analysis/CMakeLists.txt
index 059a748..52e7d28 100644
--- a/clang/unittests/Analysis/CMakeLists.txt
+++ b/clang/unittests/Analysis/CMakeLists.txt
@@ -4,6 +4,7 @@ add_clang_unittest(ClangAnalysisTests
   CloneDetectionTest.cpp
   ExprMutationAnalyzerTest.cpp
   IntervalPartitionTest.cpp
+  LifetimeSafetyTest.cpp
   MacroExpansionContextTest.cpp
   UnsafeBufferUsageTest.cpp
   CLANG_LIBS
diff --git a/clang/unittests/Analysis/LifetimeSafetyTest.cpp b/clang/unittests/Analysis/LifetimeSafetyTest.cpp
new file mode 100644
index 0000000..a48dc45
--- /dev/null
+++ b/clang/unittests/Analysis/LifetimeSafetyTest.cpp
@@ -0,0 +1,710 @@
+//===- LifetimeSafetyTest.cpp - Lifetime Safety Tests -*---------- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Analysis/Analyses/LifetimeSafety.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/ASTMatchers/ASTMatchers.h"
+#include "clang/Testing/TestAST.h"
+#include "llvm/ADT/StringMap.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include <optional>
+#include <vector>
+
+namespace clang::lifetimes::internal {
+namespace {
+
+using namespace ast_matchers;
+using ::testing::UnorderedElementsAreArray;
+
+// A helper class to run the full lifetime analysis on a piece of code
+// and provide an interface for querying the results.
+class LifetimeTestRunner {
+public:
+  LifetimeTestRunner(llvm::StringRef Code) {
+    std::string FullCode = R"(
+      #define POINT(name) void("__lifetime_test_point_" #name)
+      struct MyObj { ~MyObj() {} int i; };
+    )";
+    FullCode += Code.str();
+
+    AST = std::make_unique<clang::TestAST>(FullCode);
+    ASTCtx = &AST->context();
+
+    // Find the target function using AST matchers.
+    auto MatchResult =
+        match(functionDecl(hasName("target")).bind("target"), *ASTCtx);
+    auto *FD = selectFirst<FunctionDecl>("target", MatchResult);
+    if (!FD) {
+      ADD_FAILURE() << "Test case must have a function named 'target'";
+      return;
+    }
+    AnalysisCtx = std::make_unique<AnalysisDeclContext>(nullptr, FD);
+    CFG::BuildOptions &BuildOptions = AnalysisCtx->getCFGBuildOptions();
+    BuildOptions.setAllAlwaysAdd();
+    BuildOptions.AddImplicitDtors = true;
+    BuildOptions.AddTemporaryDtors = true;
+
+    // Run the main analysis.
+    Analysis = std::make_unique<LifetimeSafetyAnalysis>(*AnalysisCtx);
+    Analysis->run();
+
+    AnnotationToPointMap = Analysis->getTestPoints();
+  }
+
+  LifetimeSafetyAnalysis &getAnalysis() { return *Analysis; }
+  ASTContext &getASTContext() { return *ASTCtx; }
+
+  ProgramPoint getProgramPoint(llvm::StringRef Annotation) {
+    auto It = AnnotationToPointMap.find(Annotation);
+    if (It == AnnotationToPointMap.end()) {
+      ADD_FAILURE() << "Annotation '" << Annotation << "' not found.";
+      return nullptr;
+    }
+    return It->second;
+  }
+
+private:
+  std::unique_ptr<TestAST> AST;
+  ASTContext *ASTCtx = nullptr;
+  std::unique_ptr<AnalysisDeclContext> AnalysisCtx;
+  std::unique_ptr<LifetimeSafetyAnalysis> Analysis;
+  llvm::StringMap<ProgramPoint> AnnotationToPointMap;
+};
+
+// A convenience wrapper that uses the LifetimeSafetyAnalysis public API.
+class LifetimeTestHelper {
+public:
+  LifetimeTestHelper(LifetimeTestRunner &Runner)
+      : Runner(Runner), Analysis(Runner.getAnalysis()) {}
+
+  std::optional<OriginID> getOriginForDecl(llvm::StringRef VarName) {
+    auto *VD = findDecl<ValueDecl>(VarName);
+    if (!VD)
+      return std::nullopt;
+    auto OID = Analysis.getOriginIDForDecl(VD);
+    if (!OID)
+      ADD_FAILURE() << "Origin for '" << VarName << "' not found.";
+    return OID;
+  }
+
+  std::optional<LoanID> getLoanForVar(llvm::StringRef VarName) {
+    auto *VD = findDecl<VarDecl>(VarName);
+    if (!VD)
+      return std::nullopt;
+    std::vector<LoanID> LID = Analysis.getLoanIDForVar(VD);
+    if (LID.empty()) {
+      ADD_FAILURE() << "Loan for '" << VarName << "' not found.";
+      return std::nullopt;
+    }
+    // TODO: Support retrieving more than one loans to a var.
+    if (LID.size() > 1) {
+      ADD_FAILURE() << "More than 1 loans found for '" << VarName;
+      return std::nullopt;
+    }
+    return LID[0];
+  }
+
+  std::optional<LoanSet> getLoansAtPoint(OriginID OID,
+                                         llvm::StringRef Annotation) {
+    ProgramPoint PP = Runner.getProgramPoint(Annotation);
+    if (!PP)
+      return std::nullopt;
+    return Analysis.getLoansAtPoint(OID, PP);
+  }
+
+  std::optional<LoanSet> getExpiredLoansAtPoint(llvm::StringRef Annotation) {
+    ProgramPoint PP = Runner.getProgramPoint(Annotation);
+    if (!PP)
+      return std::nullopt;
+    return Analysis.getExpiredLoansAtPoint(PP);
+  }
+
+private:
+  template <typename DeclT> DeclT *findDecl(llvm::StringRef Name) {
+    auto &Ctx = Runner.getASTContext();
+    auto Results = match(valueDecl(hasName(Name)).bind("v"), Ctx);
+    if (Results.empty()) {
+      ADD_FAILURE() << "Declaration '" << Name << "' not found in AST.";
+      return nullptr;
+    }
+    return const_cast<DeclT *>(selectFirst<DeclT>("v", Results));
+  }
+
+  LifetimeTestRunner &Runner;
+  LifetimeSafetyAnalysis &Analysis;
+};
+
+// ========================================================================= //
+//                         GTest Matchers & Fixture
+// ========================================================================= //
+
+// A helper class to represent a set of loans, identified by variable names.
+class LoanSetInfo {
+public:
+  LoanSetInfo(const std::vector<std::string> &Vars, LifetimeTestHelper &H)
+      : LoanVars(Vars), Helper(H) {}
+  std::vector<std::string> LoanVars;
+  LifetimeTestHelper &Helper;
+};
+
+// It holds the name of the origin variable and a reference to the helper.
+class OriginInfo {
+public:
+  OriginInfo(llvm::StringRef OriginVar, LifetimeTestHelper &Helper)
+      : OriginVar(OriginVar), Helper(Helper) {}
+  llvm::StringRef OriginVar;
+  LifetimeTestHelper &Helper;
+};
+
+/// Matcher to verify the set of loans held by an origin at a specific
+/// program point.
+///
+/// This matcher is intended to be used with an \c OriginInfo object.
+///
+/// \param LoanVars A vector of strings, where each string is the name of a
+/// variable expected to be the source of a loan.
+/// \param Annotation A string identifying the program point (created with
+/// POINT()) where the check should be performed.
+MATCHER_P2(HasLoansToImpl, LoanVars, Annotation, "") {
+  const OriginInfo &Info = arg;
+  std::optional<OriginID> OIDOpt = Info.Helper.getOriginForDecl(Info.OriginVar);
+  if (!OIDOpt) {
+    *result_listener << "could not find origin for '" << Info.OriginVar.str()
+                     << "'";
+    return false;
+  }
+
+  std::optional<LoanSet> ActualLoansSetOpt =
+      Info.Helper.getLoansAtPoint(*OIDOpt, Annotation);
+  if (!ActualLoansSetOpt) {
+    *result_listener << "could not get a valid loan set at point '"
+                     << Annotation << "'";
+    return false;
+  }
+  std::vector<LoanID> ActualLoans(ActualLoansSetOpt->begin(),
+                                  ActualLoansSetOpt->end());
+
+  std::vector<LoanID> ExpectedLoans;
+  for (const auto &LoanVar : LoanVars) {
+    std::optional<LoanID> ExpectedLIDOpt = Info.Helper.getLoanForVar(LoanVar);
+    if (!ExpectedLIDOpt) {
+      *result_listener << "could not find loan for var '" << LoanVar << "'";
+      return false;
+    }
+    ExpectedLoans.push_back(*ExpectedLIDOpt);
+  }
+
+  return ExplainMatchResult(UnorderedElementsAreArray(ExpectedLoans),
+                            ActualLoans, result_listener);
+}
+
+/// Matcher to verify that the complete set of expired loans at a program point
+/// matches the expected loan set.
+MATCHER_P(AreExpiredAt, Annotation, "") {
+  const LoanSetInfo &Info = arg;
+  auto &Helper = Info.Helper;
+
+  auto ActualExpiredSetOpt = Helper.getExpiredLoansAtPoint(Annotation);
+  if (!ActualExpiredSetOpt) {
+    *result_listener << "could not get a valid expired loan set at point '"
+                     << Annotation << "'";
+    return false;
+  }
+  std::vector<LoanID> ActualExpiredLoans(ActualExpiredSetOpt->begin(),
+                                         ActualExpiredSetOpt->end());
+  std::vector<LoanID> ExpectedExpiredLoans;
+  for (const auto &VarName : Info.LoanVars) {
+    auto LoanIDOpt = Helper.getLoanForVar(VarName);
+    if (!LoanIDOpt) {
+      *result_listener << "could not find a loan for variable '" << VarName
+                       << "'";
+      return false;
+    }
+    ExpectedExpiredLoans.push_back(*LoanIDOpt);
+  }
+  return ExplainMatchResult(UnorderedElementsAreArray(ExpectedExpiredLoans),
+                            ActualExpiredLoans, result_listener);
+}
+
+// Base test fixture to manage the runner and helper.
+class LifetimeAnalysisTest : public ::testing::Test {
+protected:
+  void SetupTest(llvm::StringRef Code) {
+    Runner = std::make_unique<LifetimeTestRunner>(Code);
+    Helper = std::make_unique<LifetimeTestHelper>(*Runner);
+  }
+
+  OriginInfo Origin(llvm::StringRef OriginVar) {
+    return OriginInfo(OriginVar, *Helper);
+  }
+
+  /// Factory function that hides the std::vector creation.
+  LoanSetInfo LoansTo(std::initializer_list<std::string> LoanVars) {
+    return LoanSetInfo({LoanVars}, *Helper);
+  }
+
+  /// A convenience helper for asserting that no loans are expired.
+  LoanSetInfo NoLoans() { return LoansTo({}); }
+
+  // Factory function that hides the std::vector creation.
+  auto HasLoansTo(std::initializer_list<std::string> LoanVars,
+                  const char *Annotation) {
+    return HasLoansToImpl(std::vector<std::string>(LoanVars), Annotation);
+  }
+
+  std::unique_ptr<LifetimeTestRunner> Runner;
+  std::unique_ptr<LifetimeTestHelper> Helper;
+};
+
+// ========================================================================= //
+//                                 TESTS
+// ========================================================================= //
+
+TEST_F(LifetimeAnalysisTest, SimpleLoanAndOrigin) {
+  SetupTest(R"(
+    void target() {
+      int x;
+      int* p = &x;
+      POINT(p1);
+    }
+  )");
+  EXPECT_THAT(Origin("p"), HasLoansTo({"x"}, "p1"));
+}
+
+TEST_F(LifetimeAnalysisTest, OverwriteOrigin) {
+  SetupTest(R"(
+    void target() {
+      MyObj s1, s2;
+
+      MyObj* p = &s1;
+      POINT(after_s1);
+
+      p = &s2;
+      POINT(after_s2);
+    }
+  )");
+  EXPECT_THAT(Origin("p"), HasLoansTo({"s1"}, "after_s1"));
+  EXPECT_THAT(Origin("p"), HasLoansTo({"s2"}, "after_s2"));
+}
+
+TEST_F(LifetimeAnalysisTest, ConditionalLoan) {
+  SetupTest(R"(
+    void target(bool cond) {
+      int a, b;
+      int *p = nullptr;
+      if (cond) {
+        p = &a;
+        POINT(after_then);
+      } else {
+        p = &b;
+        POINT(after_else);
+      }
+      POINT(after_if);
+    }
+  )");
+  EXPECT_THAT(Origin("p"), HasLoansTo({"a"}, "after_then"));
+  EXPECT_THAT(Origin("p"), HasLoansTo({"b"}, "after_else"));
+  EXPECT_THAT(Origin("p"), HasLoansTo({"a", "b"}, "after_if"));
+}
+
+TEST_F(LifetimeAnalysisTest, PointerChain) {
+  SetupTest(R"(
+    void target() {
+      MyObj y;
+      MyObj* ptr1 = &y;
+      POINT(p1);
+
+      MyObj* ptr2 = ptr1;
+      POINT(p2);
+
+      ptr2 = ptr1;
+      POINT(p3);
+
+      ptr2 = ptr2; // Self assignment
+      POINT(p4);
+    }
+  )");
+  EXPECT_THAT(Origin("ptr1"), HasLoansTo({"y"}, "p1"));
+  EXPECT_THAT(Origin("ptr2"), HasLoansTo({"y"}, "p2"));
+  EXPECT_THAT(Origin("ptr2"), HasLoansTo({"y"}, "p3"));
+  EXPECT_THAT(Origin("ptr2"), HasLoansTo({"y"}, "p4"));
+}
+
+TEST_F(LifetimeAnalysisTest, ReassignToNull) {
+  SetupTest(R"(
+    void target() {
+      MyObj s1;
+      MyObj* p = &s1;
+      POINT(before_null);
+      p = nullptr;
+      POINT(after_null);
+    }
+  )");
+  EXPECT_THAT(Origin("p"), HasLoansTo({"s1"}, "before_null"));
+  EXPECT_THAT(Origin("p"), HasLoansTo({}, "after_null"));
+}
+
+TEST_F(LifetimeAnalysisTest, ReassignInIf) {
+  SetupTest(R"(
+    void target(bool condition) {
+      MyObj s1, s2;
+      MyObj* p = &s1;
+      POINT(before_if);
+      if (condition) {
+        p = &s2;
+        POINT(after_reassign);
+      }
+      POINT(after_if);
+    }
+  )");
+  EXPECT_THAT(Origin("p"), HasLoansTo({"s1"}, "before_if"));
+  EXPECT_THAT(Origin("p"), HasLoansTo({"s2"}, "after_reassign"));
+  EXPECT_THAT(Origin("p"), HasLoansTo({"s1", "s2"}, "after_if"));
+}
+
+TEST_F(LifetimeAnalysisTest, AssignInSwitch) {
+  SetupTest(R"(
+    void target(int mode) {
+      MyObj s1, s2, s3;
+      MyObj* p = nullptr;
+      switch (mode) {
+        case 1:
+          p = &s1;
+          POINT(case1);
+          break;
+        case 2:
+          p = &s2;
+          POINT(case2);
+          break;
+        default:
+          p = &s3;
+          POINT(case3);
+          break;
+      }
+      POINT(after_switch);
+    }
+  )");
+  EXPECT_THAT(Origin("p"), HasLoansTo({"s1"}, "case1"));
+  EXPECT_THAT(Origin("p"), HasLoansTo({"s2"}, "case2"));
+  EXPECT_THAT(Origin("p"), HasLoansTo({"s3"}, "case3"));
+  EXPECT_THAT(Origin("p"), HasLoansTo({"s1", "s2", "s3"}, "after_switch"));
+}
+
+TEST_F(LifetimeAnalysisTest, LoanInLoop) {
+  SetupTest(R"(
+    void target(bool condition) {
+      MyObj* p = nullptr;
+      while (condition) {
+        POINT(start_loop);
+        MyObj inner;
+        p = &inner;
+        POINT(end_loop);
+      }
+      POINT(after_loop);
+    }
+  )");
+  EXPECT_THAT(Origin("p"), HasLoansTo({"inner"}, "start_loop"));
+  EXPECT_THAT(LoansTo({"inner"}), AreExpiredAt("start_loop"));
+
+  EXPECT_THAT(Origin("p"), HasLoansTo({"inner"}, "end_loop"));
+  EXPECT_THAT(NoLoans(), AreExpiredAt("end_loop"));
+
+  EXPECT_THAT(Origin("p"), HasLoansTo({"inner"}, "after_loop"));
+  EXPECT_THAT(LoansTo({"inner"}), AreExpiredAt("after_loop"));
+}
+
+TEST_F(LifetimeAnalysisTest, LoopWithBreak) {
+  SetupTest(R"(
+    void target(int count) {
+      MyObj s1;
+      MyObj s2;
+      MyObj* p = &s1;
+      POINT(before_loop);
+      for (int i = 0; i < count; ++i) {
+        if (i == 5) {
+          p = &s2;
+          POINT(inside_if);
+          break;
+        }
+        POINT(after_if);
+      }
+      POINT(after_loop);
+    }
+  )");
+  EXPECT_THAT(Origin("p"), HasLoansTo({"s1"}, "before_loop"));
+  EXPECT_THAT(Origin("p"), HasLoansTo({"s2"}, "inside_if"));
+  // At the join point after if, s2 cannot make it to p without the if.
+  EXPECT_THAT(Origin("p"), HasLoansTo({"s1"}, "after_if"));
+  // At the join point after the loop, p could hold a loan to s1 (if the loop
+  // completed normally) or to s2 (if the loop was broken).
+  EXPECT_THAT(Origin("p"), HasLoansTo({"s1", "s2"}, "after_loop"));
+}
+
+TEST_F(LifetimeAnalysisTest, PointersInACycle) {
+  SetupTest(R"(
+    void target(bool condition) {
+      MyObj v1, v2, v3;
+      MyObj *p1 = &v1, *p2 = &v2, *p3 = &v3;
+
+      POINT(before_while);
+      while (condition) {
+        MyObj* temp = p1;
+        p1 = p2;
+        p2 = p3;
+        p3 = temp;
+      }
+      POINT(after_loop);
+    }
+  )");
+  EXPECT_THAT(Origin("p1"), HasLoansTo({"v1"}, "before_while"));
+  EXPECT_THAT(Origin("p2"), HasLoansTo({"v2"}, "before_while"));
+  EXPECT_THAT(Origin("p3"), HasLoansTo({"v3"}, "before_while"));
+
+  // At the fixed point after the loop, all pointers could point to any of
+  // the three variables.
+  EXPECT_THAT(Origin("p1"), HasLoansTo({"v1", "v2", "v3"}, "after_loop"));
+  EXPECT_THAT(Origin("p2"), HasLoansTo({"v1", "v2", "v3"}, "after_loop"));
+  EXPECT_THAT(Origin("p3"), HasLoansTo({"v1", "v2", "v3"}, "after_loop"));
+  EXPECT_THAT(Origin("temp"), HasLoansTo({"v1", "v2", "v3"}, "after_loop"));
+}
+
+TEST_F(LifetimeAnalysisTest, PointersAndExpirationInACycle) {
+  SetupTest(R"(
+    void target(bool condition) {
+      MyObj v1, v2;
+      MyObj *p1 = &v1, *p2 = &v2;
+
+      POINT(before_while);
+      while (condition) {
+        POINT(in_loop_before_temp);
+        MyObj temp;
+        p1 = &temp;
+        POINT(in_loop_after_temp);
+
+        MyObj* q = p1;
+        p1 = p2;
+        p2 = q;
+      }
+      POINT(after_loop);
+    }
+  )");
+  EXPECT_THAT(Origin("p1"), HasLoansTo({"v1"}, "before_while"));
+  EXPECT_THAT(Origin("p2"), HasLoansTo({"v2"}, "before_while"));
+  EXPECT_THAT(NoLoans(), AreExpiredAt("before_while"));
+
+  EXPECT_THAT(Origin("p1"),
+              HasLoansTo({"v1", "v2", "temp"}, "in_loop_before_temp"));
+  EXPECT_THAT(Origin("p2"), HasLoansTo({"v2", "temp"}, "in_loop_before_temp"));
+  EXPECT_THAT(LoansTo({"temp"}), AreExpiredAt("in_loop_before_temp"));
+
+  EXPECT_THAT(Origin("p1"), HasLoansTo({"temp"}, "in_loop_after_temp"));
+  EXPECT_THAT(Origin("p2"), HasLoansTo({"v2", "temp"}, "in_loop_after_temp"));
+  EXPECT_THAT(NoLoans(), AreExpiredAt("in_loop_after_temp"));
+
+  EXPECT_THAT(Origin("p1"), HasLoansTo({"v1", "v2", "temp"}, "after_loop"));
+  EXPECT_THAT(Origin("p2"), HasLoansTo({"v2", "temp"}, "after_loop"));
+  EXPECT_THAT(LoansTo({"temp"}), AreExpiredAt("after_loop"));
+}
+
+TEST_F(LifetimeAnalysisTest, NestedScopes) {
+  SetupTest(R"(
+    void target() {
+      MyObj* p = nullptr;
+      {
+        MyObj outer;
+        p = &outer;
+        POINT(before_inner_scope);
+        {
+          MyObj inner;
+          p = &inner;
+          POINT(inside_inner_scope);
+        } // inner expires
+        POINT(after_inner_scope);
+      } // outer expires
+    }
+  )");
+  EXPECT_THAT(Origin("p"), HasLoansTo({"outer"}, "before_inner_scope"));
+  EXPECT_THAT(Origin("p"), HasLoansTo({"inner"}, "inside_inner_scope"));
+  EXPECT_THAT(Origin("p"), HasLoansTo({"inner"}, "after_inner_scope"));
+}
+
+TEST_F(LifetimeAnalysisTest, SimpleExpiry) {
+  SetupTest(R"(
+    void target() {
+      MyObj* p = nullptr;
+      {
+        MyObj s;
+        p = &s;
+        POINT(before_expiry);
+      } // s goes out of scope here
+      POINT(after_expiry);
+    }
+  )");
+  EXPECT_THAT(NoLoans(), AreExpiredAt("before_expiry"));
+  EXPECT_THAT(LoansTo({"s"}), AreExpiredAt("after_expiry"));
+}
+
+TEST_F(LifetimeAnalysisTest, NestedExpiry) {
+  SetupTest(R"(
+    void target() {
+      MyObj s1;
+      MyObj* p = &s1;
+      POINT(before_inner);
+      {
+        MyObj s2;
+        p = &s2;
+        POINT(in_inner);
+      } // s2 expires
+      POINT(after_inner);
+    }
+  )");
+  EXPECT_THAT(NoLoans(), AreExpiredAt("before_inner"));
+  EXPECT_THAT(NoLoans(), AreExpiredAt("in_inner"));
+  EXPECT_THAT(LoansTo({"s2"}), AreExpiredAt("after_inner"));
+}
+
+TEST_F(LifetimeAnalysisTest, ConditionalExpiry) {
+  SetupTest(R"(
+    void target(bool cond) {
+      MyObj s1;
+      MyObj* p = &s1;
+      POINT(before_if);
+      if (cond) {
+        MyObj s2;
+        p = &s2;
+        POINT(then_block);
+      } // s2 expires here
+      POINT(after_if);
+    }
+  )");
+  EXPECT_THAT(NoLoans(), AreExpiredAt("before_if"));
+  EXPECT_THAT(NoLoans(), AreExpiredAt("then_block"));
+  EXPECT_THAT(LoansTo({"s2"}), AreExpiredAt("after_if"));
+}
+
+TEST_F(LifetimeAnalysisTest, LoopExpiry) {
+  SetupTest(R"(
+    void target() {
+      MyObj *p = nullptr;
+      for (int i = 0; i < 2; ++i) {
+        POINT(start_loop);
+        MyObj s;
+        p = &s;
+        POINT(end_loop);
+      } // s expires here on each iteration
+      POINT(after_loop);
+    }
+  )");
+  EXPECT_THAT(LoansTo({"s"}), AreExpiredAt("start_loop"));
+  EXPECT_THAT(NoLoans(), AreExpiredAt("end_loop"));
+  EXPECT_THAT(LoansTo({"s"}), AreExpiredAt("after_loop"));
+}
+
+TEST_F(LifetimeAnalysisTest, MultipleExpiredLoans) {
+  SetupTest(R"(
+    void target() {
+      MyObj *p1, *p2, *p3;
+      {
+        MyObj s1;
+        p1 = &s1;
+        POINT(p1);
+      } // s1 expires
+      POINT(p2);
+      {
+        MyObj s2;
+        p2 = &s2;
+        MyObj s3;
+        p3 = &s3;
+        POINT(p3);
+      } // s2, s3 expire
+      POINT(p4);
+    }
+  )");
+  EXPECT_THAT(NoLoans(), AreExpiredAt("p1"));
+  EXPECT_THAT(LoansTo({"s1"}), AreExpiredAt("p2"));
+  EXPECT_THAT(LoansTo({"s1"}), AreExpiredAt("p3"));
+  EXPECT_THAT(LoansTo({"s1", "s2", "s3"}), AreExpiredAt("p4"));
+}
+
+TEST_F(LifetimeAnalysisTest, GotoJumpsOutOfScope) {
+  SetupTest(R"(
+    void target(bool cond) {
+      MyObj *p = nullptr;
+      {
+        MyObj s;
+        p = &s;
+        POINT(before_goto);
+        if (cond) {
+          goto end;
+        }
+      } // `s` expires here on the path that doesn't jump
+      POINT(after_scope);
+    end:
+      POINT(after_goto);
+    }
+  )");
+  EXPECT_THAT(NoLoans(), AreExpiredAt("before_goto"));
+  EXPECT_THAT(LoansTo({"s"}), AreExpiredAt("after_scope"));
+  EXPECT_THAT(LoansTo({"s"}), AreExpiredAt("after_goto"));
+}
+
+TEST_F(LifetimeAnalysisTest, ContinueInLoop) {
+  SetupTest(R"(
+    void target(int count) {
+      MyObj *p = nullptr;
+      MyObj outer;
+      p = &outer;
+      POINT(before_loop);
+
+      for (int i = 0; i < count; ++i) {
+        if (i % 2 == 0) {
+          MyObj s_even;
+          p = &s_even;
+          POINT(in_even_iter);
+          continue;
+        }
+        MyObj s_odd;
+        p = &s_odd;
+        POINT(in_odd_iter);
+      }
+      POINT(after_loop);
+    }
+  )");
+  EXPECT_THAT(NoLoans(), AreExpiredAt("before_loop"));
+  EXPECT_THAT(LoansTo({"s_odd"}), AreExpiredAt("in_even_iter"));
+  EXPECT_THAT(LoansTo({"s_even"}), AreExpiredAt("in_odd_iter"));
+  EXPECT_THAT(LoansTo({"s_even", "s_odd"}), AreExpiredAt("after_loop"));
+}
+
+TEST_F(LifetimeAnalysisTest, ReassignedPointerThenOriginalExpires) {
+  SetupTest(R"(
+    void target() {
+      MyObj* p = nullptr;
+      {
+        MyObj s1;
+        p = &s1;
+        POINT(p_has_s1);
+        {
+          MyObj s2;
+          p = &s2;
+          POINT(p_has_s2);
+        }
+        POINT(p_after_s2_expires);
+      } // s1 expires here.
+      POINT(p_after_s1_expires);
+    }
+  )");
+  EXPECT_THAT(NoLoans(), AreExpiredAt("p_has_s1"));
+  EXPECT_THAT(NoLoans(), AreExpiredAt("p_has_s2"));
+  EXPECT_THAT(LoansTo({"s2"}), AreExpiredAt("p_after_s2_expires"));
+  EXPECT_THAT(LoansTo({"s1", "s2"}), AreExpiredAt("p_after_s1_expires"));
+}
+
+} // anonymous namespace
+} // namespace clang::lifetimes::internal
diff --git a/clang/unittests/Analysis/MacroExpansionContextTest.cpp b/clang/unittests/Analysis/MacroExpansionContextTest.cpp
index 9874ea6..af7a8d6 100644
--- a/clang/unittests/Analysis/MacroExpansionContextTest.cpp
+++ b/clang/unittests/Analysis/MacroExpansionContextTest.cpp
@@ -33,7 +33,8 @@ namespace {
 class MacroExpansionContextTest : public ::testing::Test {
 protected:
   MacroExpansionContextTest()
-      : InMemoryFileSystem(new llvm::vfs::InMemoryFileSystem),
+      : InMemoryFileSystem(
+            llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>()),
         FileMgr(FileSystemOptions(), InMemoryFileSystem),
         DiagID(new DiagnosticIDs()),
         Diags(DiagID, DiagOpts, new IgnoringDiagConsumer()),
diff --git a/clang/unittests/Basic/FileManagerTest.cpp b/clang/unittests/Basic/FileManagerTest.cpp
index 88d778f..7b3e8bc 100644
--- a/clang/unittests/Basic/FileManagerTest.cpp
+++ b/clang/unittests/Basic/FileManagerTest.cpp
@@ -454,8 +454,7 @@ TEST_F(FileManagerTest, makeAbsoluteUsesVFS) {
                                                        : StringRef("/");
   llvm::sys::path::append(CustomWorkingDir, "some", "weird", "path");
 
-  auto FS = IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem>(
-      new llvm::vfs::InMemoryFileSystem);
+  auto FS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   // setCurrentworkingdirectory must finish without error.
   ASSERT_TRUE(!FS->setCurrentWorkingDirectory(CustomWorkingDir));
 
@@ -475,8 +474,7 @@ TEST_F(FileManagerTest, makeAbsoluteUsesVFS) {
 TEST_F(FileManagerTest, getVirtualFileFillsRealPathName) {
   SmallString<64> CustomWorkingDir = getSystemRoot();
 
-  auto FS = IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem>(
-      new llvm::vfs::InMemoryFileSystem);
+  auto FS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   // setCurrentworkingdirectory must finish without error.
   ASSERT_TRUE(!FS->setCurrentWorkingDirectory(CustomWorkingDir));
 
@@ -501,8 +499,7 @@ TEST_F(FileManagerTest, getVirtualFileFillsRealPathName) {
 TEST_F(FileManagerTest, getFileDontOpenRealPath) {
   SmallString<64> CustomWorkingDir = getSystemRoot();
 
-  auto FS = IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem>(
-      new llvm::vfs::InMemoryFileSystem);
+  auto FS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   // setCurrentworkingdirectory must finish without error.
   ASSERT_TRUE(!FS->setCurrentWorkingDirectory(CustomWorkingDir));
 
@@ -533,8 +530,7 @@ TEST_F(FileManagerTest, getBypassFile) {
   CustomWorkingDir = "/";
 #endif
 
-  auto FS = IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem>(
-      new llvm::vfs::InMemoryFileSystem);
+  auto FS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   // setCurrentworkingdirectory must finish without error.
   ASSERT_TRUE(!FS->setCurrentWorkingDirectory(CustomWorkingDir));
 
diff --git a/clang/unittests/Basic/SarifTest.cpp b/clang/unittests/Basic/SarifTest.cpp
index ad9f8ec..c6be7ee 100644
--- a/clang/unittests/Basic/SarifTest.cpp
+++ b/clang/unittests/Basic/SarifTest.cpp
@@ -41,7 +41,8 @@ static std::string serializeSarifDocument(llvm::json::Object &&Doc) {
 class SarifDocumentWriterTest : public ::testing::Test {
 protected:
   SarifDocumentWriterTest()
-      : InMemoryFileSystem(new llvm::vfs::InMemoryFileSystem),
+      : InMemoryFileSystem(
+            llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>()),
         FileMgr(FileSystemOptions(), InMemoryFileSystem),
         DiagID(new DiagnosticIDs()),
         Diags(DiagID, DiagOpts, new IgnoringDiagConsumer()),
diff --git a/clang/unittests/CodeGen/TestCompiler.h b/clang/unittests/CodeGen/TestCompiler.h
index a6fec7f..f6fada5 100644
--- a/clang/unittests/CodeGen/TestCompiler.h
+++ b/clang/unittests/CodeGen/TestCompiler.h
@@ -58,7 +58,7 @@ struct TestCompiler {
 
     CG.reset(CreateLLVMCodeGen(
         compiler.getDiagnostics(), "main-module",
-        &compiler.getVirtualFileSystem(), compiler.getHeaderSearchOpts(),
+        compiler.getVirtualFileSystemPtr(), compiler.getHeaderSearchOpts(),
         compiler.getPreprocessorOpts(), compiler.getCodeGenOpts(), Context));
   }
 
diff --git a/clang/unittests/Driver/DXCModeTest.cpp b/clang/unittests/Driver/DXCModeTest.cpp
index f684593..3a2e9ec 100644
--- a/clang/unittests/Driver/DXCModeTest.cpp
+++ b/clang/unittests/Driver/DXCModeTest.cpp
@@ -57,8 +57,8 @@ static void validateTargetProfile(
 TEST(DxcModeTest, TargetProfileValidation) {
   IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
 
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
 
   InMemoryFileSystem->addFile("foo.hlsl", 0,
                               llvm::MemoryBuffer::getMemBuffer("\n"));
@@ -107,8 +107,8 @@ TEST(DxcModeTest, TargetProfileValidation) {
 TEST(DxcModeTest, ValidatorVersionValidation) {
   IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
 
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
 
   InMemoryFileSystem->addFile("foo.hlsl", 0,
                               llvm::MemoryBuffer::getMemBuffer("\n"));
@@ -210,8 +210,8 @@ TEST(DxcModeTest, ValidatorVersionValidation) {
 }
 
 TEST(DxcModeTest, DefaultEntry) {
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
 
   InMemoryFileSystem->addFile("foo.hlsl", 0,
                               llvm::MemoryBuffer::getMemBuffer("\n"));
diff --git a/clang/unittests/Driver/SanitizerArgsTest.cpp b/clang/unittests/Driver/SanitizerArgsTest.cpp
index b8bfc68..41884c0 100644
--- a/clang/unittests/Driver/SanitizerArgsTest.cpp
+++ b/clang/unittests/Driver/SanitizerArgsTest.cpp
@@ -78,8 +78,7 @@ protected:
 private:
   llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem>
   prepareFS(llvm::ArrayRef<std::string> ExtraFiles) {
-    llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS =
-        new llvm::vfs::InMemoryFileSystem;
+    auto FS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
     FS->addFile(ClangBinary, time_t(), llvm::MemoryBuffer::getMemBuffer(""));
     FS->addFile(InputFile, time_t(), llvm::MemoryBuffer::getMemBuffer(""));
     for (llvm::StringRef F : ExtraFiles)
diff --git a/clang/unittests/Driver/SimpleDiagnosticConsumer.h b/clang/unittests/Driver/SimpleDiagnosticConsumer.h
index c3772ba..a90524c 100644
--- a/clang/unittests/Driver/SimpleDiagnosticConsumer.h
+++ b/clang/unittests/Driver/SimpleDiagnosticConsumer.h
@@ -44,8 +44,8 @@ struct SimpleDiagnosticConsumer : public clang::DiagnosticConsumer {
 inline clang::driver::Driver diagnostic_test_driver() {
   llvm::IntrusiveRefCntPtr<clang::DiagnosticIDs> DiagID(
       new clang::DiagnosticIDs());
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   auto *DiagConsumer = new SimpleDiagnosticConsumer;
   clang::DiagnosticOptions DiagOpts;
   clang::DiagnosticsEngine Diags(DiagID, DiagOpts, DiagConsumer);
diff --git a/clang/unittests/Driver/ToolChainTest.cpp b/clang/unittests/Driver/ToolChainTest.cpp
index 670090a..03f2fcf 100644
--- a/clang/unittests/Driver/ToolChainTest.cpp
+++ b/clang/unittests/Driver/ToolChainTest.cpp
@@ -42,8 +42,8 @@ TEST(ToolChainTest, VFSGCCInstallation) {
 
   IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
   struct TestDiagnosticConsumer : public DiagnosticConsumer {};
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
 
   const char *EmptyFiles[] = {
       "foo.cpp",
@@ -140,8 +140,8 @@ TEST(ToolChainTest, VFSGCCInstallationRelativeDir) {
   IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
   struct TestDiagnosticConsumer : public DiagnosticConsumer {};
   DiagnosticsEngine Diags(DiagID, DiagOpts, new TestDiagnosticConsumer);
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   Driver TheDriver("/home/test/bin/clang", "arm-linux-gnueabi", Diags,
                    "clang LLVM compiler", InMemoryFileSystem);
 
@@ -178,8 +178,8 @@ TEST(ToolChainTest, VFSSolarisMultiGCCInstallation) {
 
   IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
   struct TestDiagnosticConsumer : public DiagnosticConsumer {};
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
 
   const char *EmptyFiles[] = {
       // Sort entries so the latest version doesn't come first.
@@ -342,8 +342,8 @@ TEST(ToolChainTest, VFSGnuLibcxxPathNoSysroot) {
 
   IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
   struct TestDiagnosticConsumer : public DiagnosticConsumer {};
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
 
   const char *EmptyFiles[] = {
       "foo.cpp",
@@ -374,8 +374,8 @@ TEST(ToolChainTest, DefaultDriverMode) {
   IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
   struct TestDiagnosticConsumer : public DiagnosticConsumer {};
   DiagnosticsEngine Diags(DiagID, DiagOpts, new TestDiagnosticConsumer);
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
 
   Driver CCDriver("/home/test/bin/clang", "arm-linux-gnueabi", Diags,
                   "clang LLVM compiler", InMemoryFileSystem);
@@ -520,8 +520,8 @@ TEST(ToolChainTest, CommandOutput) {
   IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
   struct TestDiagnosticConsumer : public DiagnosticConsumer {};
   DiagnosticsEngine Diags(DiagID, DiagOpts, new TestDiagnosticConsumer);
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
 
   Driver CCDriver("/home/test/bin/clang", "arm-linux-gnueabi", Diags,
                   "clang LLVM compiler", InMemoryFileSystem);
@@ -548,8 +548,8 @@ TEST(ToolChainTest, PostCallback) {
   IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
   struct TestDiagnosticConsumer : public DiagnosticConsumer {};
   DiagnosticsEngine Diags(DiagID, DiagOpts, new TestDiagnosticConsumer);
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
 
   // The executable path must not exist.
   Driver CCDriver("/home/test/bin/clang", "arm-linux-gnueabi", Diags,
@@ -601,8 +601,8 @@ TEST(ToolChainTest, UEFIDefaultDebugFormatTest) {
   IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
   struct TestDiagnosticConsumer : public DiagnosticConsumer {};
   DiagnosticsEngine Diags(DiagID, DiagOpts, new TestDiagnosticConsumer);
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   Driver CCDriver("/home/test/bin/clang", "x86_64-unknown-uefi", Diags,
                   "clang LLVM compiler", InMemoryFileSystem);
   CCDriver.setCheckInputsExist(false);
@@ -643,8 +643,7 @@ TEST(ToolChainTest, ConfigFileSearch) {
   IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
   struct TestDiagnosticConsumer : public DiagnosticConsumer {};
   DiagnosticsEngine Diags(DiagID, DiagOpts, new TestDiagnosticConsumer);
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS(
-      new llvm::vfs::InMemoryFileSystem);
+  auto FS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
 
 #ifdef _WIN32
   const char *TestRoot = "C:\\";
@@ -721,7 +720,7 @@ TEST(ToolChainTest, ConfigFileError) {
   std::unique_ptr<SimpleDiagnosticConsumer> DiagConsumer(
       new SimpleDiagnosticConsumer());
   DiagnosticsEngine Diags(DiagID, DiagOpts, DiagConsumer.get(), false);
-  IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS(new FileSystemWithError);
+  auto FS = llvm::makeIntrusiveRefCnt<FileSystemWithError>();
 
   Driver TheDriver("/home/test/bin/clang", "arm-linux-gnueabi", Diags,
                    "clang LLVM compiler", FS);
@@ -742,8 +741,7 @@ TEST(ToolChainTest, BadConfigFile) {
   std::unique_ptr<SimpleDiagnosticConsumer> DiagConsumer(
       new SimpleDiagnosticConsumer());
   DiagnosticsEngine Diags(DiagID, DiagOpts, DiagConsumer.get(), false);
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS(
-      new llvm::vfs::InMemoryFileSystem);
+  auto FS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
 
 #ifdef _WIN32
   const char *TestRoot = "C:\\";
@@ -816,8 +814,7 @@ TEST(ToolChainTest, ConfigInexistentInclude) {
   std::unique_ptr<SimpleDiagnosticConsumer> DiagConsumer(
       new SimpleDiagnosticConsumer());
   DiagnosticsEngine Diags(DiagID, DiagOpts, DiagConsumer.get(), false);
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS(
-      new llvm::vfs::InMemoryFileSystem);
+  auto FS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
 
 #ifdef _WIN32
   const char *TestRoot = "C:\\";
@@ -857,8 +854,7 @@ TEST(ToolChainTest, ConfigRecursiveInclude) {
   std::unique_ptr<SimpleDiagnosticConsumer> DiagConsumer(
       new SimpleDiagnosticConsumer());
   DiagnosticsEngine Diags(DiagID, DiagOpts, DiagConsumer.get(), false);
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS(
-      new llvm::vfs::InMemoryFileSystem);
+  auto FS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
 
 #ifdef _WIN32
   const char *TestRoot = "C:\\";
@@ -902,8 +898,7 @@ TEST(ToolChainTest, NestedConfigFile) {
   IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
   struct TestDiagnosticConsumer : public DiagnosticConsumer {};
   DiagnosticsEngine Diags(DiagID, DiagOpts, new TestDiagnosticConsumer);
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS(
-      new llvm::vfs::InMemoryFileSystem);
+  auto FS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
 
 #ifdef _WIN32
   const char *TestRoot = "C:\\";
diff --git a/clang/unittests/Format/BracesInserterTest.cpp b/clang/unittests/Format/BracesInserterTest.cpp
index e0c447d..572e53e 100644
--- a/clang/unittests/Format/BracesInserterTest.cpp
+++ b/clang/unittests/Format/BracesInserterTest.cpp
@@ -257,9 +257,9 @@ TEST_F(BracesInserterTest, InsertBracesRange) {
   FormatStyle Style = getLLVMStyle();
   Style.InsertBraces = true;
 
-  const StringRef Code("while (a)\n"
-                       "  if (b)\n"
-                       "    return;");
+  constexpr StringRef Code("while (a)\n"
+                           "  if (b)\n"
+                           "    return;");
 
   verifyFormat("while (a) {\n"
                "  if (b)\n"
diff --git a/clang/unittests/Format/ConfigParseTest.cpp b/clang/unittests/Format/ConfigParseTest.cpp
index 65d8b36..9de3cca 100644
--- a/clang/unittests/Format/ConfigParseTest.cpp
+++ b/clang/unittests/Format/ConfigParseTest.cpp
@@ -249,6 +249,7 @@ TEST(ConfigParseTest, ParsesConfigurationBools) {
   CHECK_PARSE_NESTED_BOOL(SpaceBeforeParensOptions,
                           AfterFunctionDefinitionName);
   CHECK_PARSE_NESTED_BOOL(SpaceBeforeParensOptions, AfterIfMacros);
+  CHECK_PARSE_NESTED_BOOL(SpaceBeforeParensOptions, AfterNot);
   CHECK_PARSE_NESTED_BOOL(SpaceBeforeParensOptions, AfterOverloadedOperator);
   CHECK_PARSE_NESTED_BOOL(SpaceBeforeParensOptions, AfterPlacementOperator);
   CHECK_PARSE_NESTED_BOOL(SpaceBeforeParensOptions, BeforeNonEmptyParentheses);
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index 0bc1c6d..9c5aa11 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -3185,7 +3185,7 @@ TEST_F(FormatTest, FormatsLabels) {
   // The opening brace may either be on the same unwrapped line as the colon or
   // on a separate one. The formatter should recognize both.
   Style = getLLVMStyle();
-  Style.BreakBeforeBraces = FormatStyle::BraceBreakingStyle::BS_Allman;
+  Style.BreakBeforeBraces = FormatStyle::BS_Allman;
   verifyFormat("{\n"
                "  some_code();\n"
                "test_label:\n"
@@ -3206,7 +3206,7 @@ TEST_F(FormatTest, FormatsLabels) {
 
 TEST_F(FormatTest, MultiLineControlStatements) {
   FormatStyle Style = getLLVMStyleWithColumns(20);
-  Style.BreakBeforeBraces = FormatStyle::BraceBreakingStyle::BS_Custom;
+  Style.BreakBeforeBraces = FormatStyle::BS_Custom;
   Style.BraceWrapping.AfterControlStatement = FormatStyle::BWACS_MultiLine;
   // Short lines should keep opening brace on same line.
   verifyFormat("if (foo) {\n"
@@ -3441,7 +3441,7 @@ TEST_F(FormatTest, MultiLineControlStatements) {
 
 TEST_F(FormatTest, BeforeWhile) {
   FormatStyle Style = getLLVMStyle();
-  Style.BreakBeforeBraces = FormatStyle::BraceBreakingStyle::BS_Custom;
+  Style.BreakBeforeBraces = FormatStyle::BS_Custom;
 
   verifyFormat("do {\n"
                "  foo();\n"
@@ -4803,12 +4803,13 @@ TEST_F(FormatTest, FormatsInlineASM) {
                "int   i;");
 
   auto Style = getLLVMStyleWithColumns(0);
-  const StringRef Code1{"asm(\"xyz\" : \"=a\"(a), \"=d\"(b) : \"a\"(data));"};
-  const StringRef Code2{"asm(\"xyz\"\n"
-                        "    : \"=a\"(a), \"=d\"(b)\n"
-                        "    : \"a\"(data));"};
-  const StringRef Code3{"asm(\"xyz\" : \"=a\"(a), \"=d\"(b)\n"
-                        "    : \"a\"(data));"};
+  constexpr StringRef Code1(
+      "asm(\"xyz\" : \"=a\"(a), \"=d\"(b) : \"a\"(data));");
+  constexpr StringRef Code2("asm(\"xyz\"\n"
+                            "    : \"=a\"(a), \"=d\"(b)\n"
+                            "    : \"a\"(data));");
+  constexpr StringRef Code3("asm(\"xyz\" : \"=a\"(a), \"=d\"(b)\n"
+                            "    : \"a\"(data));");
 
   Style.BreakBeforeInlineASMColon = FormatStyle::BBIAS_OnlyMultiline;
   verifyFormat(Code1, Style);
@@ -6681,6 +6682,17 @@ TEST_F(FormatTest, EscapedNewlines) {
                "  int x(int a);",
                AlignLeft);
 
+  // Escaped with a trigraph.  The program just has to avoid crashing.
+  verifyNoCrash("#define A \?\?/\n"
+                "int i;\?\?/\n"
+                "  int j;");
+  verifyNoCrash("#define A \?\?/\r\n"
+                "int i;\?\?/\r\n"
+                "  int j;");
+  verifyNoCrash("#define A \?\?/\n"
+                "int i;",
+                getGoogleStyle(FormatStyle::LK_CSharp));
+
   // CRLF line endings
   verifyFormat("#define A \\\r\n  int i;  \\\r\n  int j;",
                "#define A \\\r\nint i;\\\r\n  int j;", Narrow);
@@ -6693,16 +6705,16 @@ TEST_F(FormatTest, EscapedNewlines) {
                "  int x(int a);",
                AlignLeft);
 
-  constexpr StringRef Code{"#define A   \\\n"
+  constexpr StringRef Code("#define A   \\\n"
                            "  int a123; \\\n"
                            "  int a;    \\\n"
-                           "  int a1234;"};
+                           "  int a1234;");
   verifyFormat(Code, AlignLeft);
 
-  constexpr StringRef Code2{"#define A    \\\n"
+  constexpr StringRef Code2("#define A    \\\n"
                             "  int a123;  \\\n"
                             "  int a;     \\\n"
-                            "  int a1234;"};
+                            "  int a1234;");
   auto LastLine = getLLVMStyle();
   LastLine.AlignEscapedNewlines = FormatStyle::ENAS_LeftWithLastLine;
   verifyFormat(Code2, LastLine);
@@ -7771,6 +7783,16 @@ TEST_F(FormatTest, ConstructorInitializers) {
                "Constructor() :\n"
                "    // Comment forcing unwanted break.\n"
                "    aaaa(aaaa) {}");
+
+  // Braced initializers with trailing commas.
+  verifyFormat("MyClass::MyClass()\n"
+               "    : aaaa{\n"
+               "          0,\n"
+               "      },\n"
+               "      bbbb{\n"
+               "          0,\n"
+               "      } {}",
+               "MyClass::MyClass():aaaa{0,},bbbb{0,}{}");
 }
 
 TEST_F(FormatTest, AllowAllConstructorInitializersOnNextLine) {
@@ -8571,10 +8593,10 @@ TEST_F(FormatTest, BreaksFunctionDeclarations) {
                "operator<<(const SomeLooooooooooooooooooooooooogType &other);");
   verifyGoogleFormat(
       "SomeLoooooooooooooooooooooooooooooogType operator>>(\n"
-      "    const SomeLooooooooogType &a, const SomeLooooooooogType &b);");
+      "    const SomeLooooooooogType& a, const SomeLooooooooogType& b);");
   verifyGoogleFormat(
       "SomeLoooooooooooooooooooooooooooooogType operator<<(\n"
-      "    const SomeLooooooooogType &a, const SomeLooooooooogType &b);");
+      "    const SomeLooooooooogType& a, const SomeLooooooooogType& b);");
 
   verifyFormat("void aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa(\n"
                "    int aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa = 1);");
@@ -8583,7 +8605,7 @@ TEST_F(FormatTest, BreaksFunctionDeclarations) {
   verifyGoogleFormat(
       "typename aaaaaaaaaa<aaaaaa>::aaaaaaaaaaa\n"
       "aaaaaaaaaa<aaaaaa>::aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa(\n"
-      "    bool *aaaaaaaaaaaaaaaaaa, bool *aa) {}");
+      "    bool* aaaaaaaaaaaaaaaaaa, bool* aa) {}");
   verifyGoogleFormat("template <typename T>\n"
                      "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n"
                      "aaaaaaaaaaaaaaaaaaaaaaa<T>::aaaaaaaaaaaaa(\n"
@@ -12091,15 +12113,22 @@ TEST_F(FormatTest, UnderstandsFunctionRefQualification) {
   Prefix = "void a() const &;\n"
            "void b() const &;\n";
   verifyFormat(Prefix + "int *x;", Prefix + "int* x;", DerivePointerAlignment);
+
+  constexpr StringRef Code("MACRO(int*, std::function<void() &&>);");
+  verifyFormat(Code, DerivePointerAlignment);
+
+  auto Style = getGoogleStyle();
+  Style.DerivePointerAlignment = true;
+  verifyFormat(Code, Style);
 }
 
 TEST_F(FormatTest, PointerAlignmentFallback) {
   FormatStyle Style = getLLVMStyle();
   Style.DerivePointerAlignment = true;
 
-  const StringRef Code("int* p;\n"
-                       "int *q;\n"
-                       "int * r;");
+  constexpr StringRef Code("int* p;\n"
+                           "int *q;\n"
+                           "int * r;");
 
   EXPECT_EQ(Style.PointerAlignment, FormatStyle::PAS_Right);
   verifyFormat("int *p;\n"
@@ -12891,27 +12920,31 @@ TEST_F(FormatTest, UnderstandsEllipsis) {
 }
 
 TEST_F(FormatTest, AdaptivelyFormatsPointersAndReferences) {
+  auto Style = getGoogleStyle();
+  EXPECT_FALSE(Style.DerivePointerAlignment);
+  Style.DerivePointerAlignment = true;
+
   verifyFormat("int *a;\n"
                "int *a;\n"
                "int *a;",
                "int *a;\n"
                "int* a;\n"
                "int *a;",
-               getGoogleStyle());
+               Style);
   verifyFormat("int* a;\n"
                "int* a;\n"
                "int* a;",
                "int* a;\n"
                "int* a;\n"
                "int *a;",
-               getGoogleStyle());
+               Style);
   verifyFormat("int *a;\n"
                "int *a;\n"
                "int *a;",
                "int *a;\n"
                "int * a;\n"
                "int *  a;",
-               getGoogleStyle());
+               Style);
   verifyFormat("auto x = [] {\n"
                "  int *a;\n"
                "  int *a;\n"
@@ -12920,7 +12953,7 @@ TEST_F(FormatTest, AdaptivelyFormatsPointersAndReferences) {
                "auto x=[]{int *a;\n"
                "int * a;\n"
                "int *  a;};",
-               getGoogleStyle());
+               Style);
 }
 
 TEST_F(FormatTest, UnderstandsRvalueReferences) {
@@ -13056,7 +13089,7 @@ TEST_F(FormatTest, FormatsCasts) {
   verifyFormat("virtual void foo(char &) const;");
   verifyFormat("virtual void foo(int *a, char *) const;");
   verifyFormat("int a = sizeof(int *) + b;");
-  verifyGoogleFormat("int a = alignof(int *) + b;");
+  verifyGoogleFormat("int a = alignof(int*) + b;");
   verifyFormat("bool b = f(g<int>) && c;");
   verifyFormat("typedef void (*f)(int i) func;");
   verifyFormat("void operator++(int) noexcept;");
@@ -15014,7 +15047,7 @@ TEST_F(FormatTest, PullTrivialFunctionDefinitionsIntoSingleLine) {
       "    aaaaaaaaaaaaaaaaaa,\n"
       "    aaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb) {}");
 
-  constexpr StringRef Code{"void foo() { /* Empty */ }"};
+  constexpr StringRef Code("void foo() { /* Empty */ }");
   verifyFormat(Code);
   verifyFormat(Code, "void foo() { /* Empty */\n"
                      "}");
@@ -17652,6 +17685,12 @@ TEST_F(FormatTest, ConfigurableSpaceBeforeParens) {
   verifyFormat("int x = int (y);", SomeSpace2);
   verifyFormat("auto lambda = []() { return 0; };", SomeSpace2);
 
+  auto Style = getLLVMStyle();
+  Style.SpaceBeforeParens = FormatStyle::SBPO_Custom;
+  EXPECT_FALSE(Style.SpaceBeforeParensOptions.AfterNot);
+  Style.SpaceBeforeParensOptions.AfterNot = true;
+  verifyFormat("return not (a || b);", Style);
+
   FormatStyle SpaceAfterOverloadedOperator = getLLVMStyle();
   SpaceAfterOverloadedOperator.SpaceBeforeParens = FormatStyle::SBPO_Custom;
   SpaceAfterOverloadedOperator.SpaceBeforeParensOptions
@@ -23779,7 +23818,7 @@ TEST_F(FormatTest, FormatsLambdas) {
   LLVMWithBeforeLambdaBody.BreakBeforeBraces = FormatStyle::BS_Custom;
   LLVMWithBeforeLambdaBody.BraceWrapping.BeforeLambdaBody = true;
   LLVMWithBeforeLambdaBody.AllowShortLambdasOnASingleLine =
-      FormatStyle::ShortLambdaStyle::SLS_None;
+      FormatStyle::SLS_None;
   verifyFormat("FctWithOneNestedLambdaInline_SLS_None(\n"
                "    []()\n"
                "    {\n"
@@ -23815,7 +23854,7 @@ TEST_F(FormatTest, FormatsLambdas) {
                LLVMWithBeforeLambdaBody);
 
   LLVMWithBeforeLambdaBody.AllowShortLambdasOnASingleLine =
-      FormatStyle::ShortLambdaStyle::SLS_Empty;
+      FormatStyle::SLS_Empty;
   verifyFormat("FctWithOneNestedLambdaInline_SLS_Empty(\n"
                "    []()\n"
                "    {\n"
@@ -23862,7 +23901,7 @@ TEST_F(FormatTest, FormatsLambdas) {
       LLVMWithBeforeLambdaBody);
 
   LLVMWithBeforeLambdaBody.AllowShortLambdasOnASingleLine =
-      FormatStyle::ShortLambdaStyle::SLS_Inline;
+      FormatStyle::SLS_Inline;
   verifyFormat("FctWithOneNestedLambdaInline_SLS_Inline([]() { return 17; });",
                LLVMWithBeforeLambdaBody);
   verifyFormat("FctWithOneNestedLambdaEmpty_SLS_Inline([]() {});",
@@ -23893,7 +23932,7 @@ TEST_F(FormatTest, FormatsLambdas) {
       LLVMWithBeforeLambdaBody);
 
   LLVMWithBeforeLambdaBody.AllowShortLambdasOnASingleLine =
-      FormatStyle::ShortLambdaStyle::SLS_All;
+      FormatStyle::SLS_All;
   verifyFormat("FctWithOneNestedLambdaInline_SLS_All([]() { return 17; });",
                LLVMWithBeforeLambdaBody);
   verifyFormat("FctWithOneNestedLambdaEmpty_SLS_All([]() {});",
@@ -24025,7 +24064,7 @@ TEST_F(FormatTest, FormatsLambdas) {
                LLVMWithBeforeLambdaBody);
 
   LLVMWithBeforeLambdaBody.AllowShortLambdasOnASingleLine =
-      FormatStyle::ShortLambdaStyle::SLS_None;
+      FormatStyle::SLS_None;
 
   verifyFormat("auto select = [this]() -> const Library::Object *\n"
                "{\n"
@@ -24273,7 +24312,7 @@ TEST_F(FormatTest, LambdaWithLineComments) {
   LLVMWithBeforeLambdaBody.BreakBeforeBraces = FormatStyle::BS_Custom;
   LLVMWithBeforeLambdaBody.BraceWrapping.BeforeLambdaBody = true;
   LLVMWithBeforeLambdaBody.AllowShortLambdasOnASingleLine =
-      FormatStyle::ShortLambdaStyle::SLS_All;
+      FormatStyle::SLS_All;
 
   verifyFormat("auto k = []() { return; }", LLVMWithBeforeLambdaBody);
   verifyFormat("auto k = []() // comment\n"
@@ -25419,7 +25458,7 @@ TEST_F(FormatTest, AtomicQualifier) {
   verifyFormat("struct foo {\n"
                "  int a1;\n"
                "  _Atomic(a) a2;\n"
-               "  _Atomic(_Atomic(int) *const) a3;\n"
+               "  _Atomic(_Atomic(int)* const) a3;\n"
                "};",
                Google);
   verifyFormat("_Atomic(uint64_t) a;");
@@ -27244,7 +27283,7 @@ TEST_F(FormatTest, IndentAccessModifiers) {
 
 TEST_F(FormatTest, LimitlessStringsAndComments) {
   auto Style = getLLVMStyleWithColumns(0);
-  constexpr StringRef Code =
+  constexpr StringRef Code(
       "/**\n"
       " * This is a multiline comment with quite some long lines, at least for "
       "the LLVM Style.\n"
@@ -27265,7 +27304,7 @@ TEST_F(FormatTest, LimitlessStringsAndComments) {
       "  const std::string SmallString = \"Hello World\";\n"
       "  // Small line comment\n"
       "  return String.size() > SmallString.size();\n"
-      "}";
+      "}");
   verifyNoChange(Code, Style);
 }
 
@@ -28371,10 +28410,15 @@ TEST_F(FormatTest, BreakAfterAttributes) {
                "Foo &operator-(Foo &);",
                Style);
 
-  Style.ReferenceAlignment = FormatStyle::ReferenceAlignmentStyle::RAS_Left;
+  Style.ReferenceAlignment = FormatStyle::RAS_Left;
   verifyFormat("[[nodiscard]]\n"
                "Foo& operator-(Foo&);",
                Style);
+
+  Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All;
+  verifyFormat("[[deprecated]]\n"
+               "void f() = delete;",
+               Style);
 }
 
 TEST_F(FormatTest, InsertNewlineAtEOF) {
@@ -28384,9 +28428,9 @@ TEST_F(FormatTest, InsertNewlineAtEOF) {
   verifyNoChange("int i;\n", Style);
   verifyFormat("int i;\n", "int i;", Style);
 
-  constexpr StringRef Code{"namespace {\n"
+  constexpr StringRef Code("namespace {\n"
                            "int i;\n"
-                           "} // namespace"};
+                           "} // namespace");
   verifyFormat(Code.str() + '\n', Code, Style,
                {tooling::Range(19, 13)}); // line 3
 }
@@ -28395,7 +28439,7 @@ TEST_F(FormatTest, KeepEmptyLinesAtEOF) {
   FormatStyle Style = getLLVMStyle();
   Style.KeepEmptyLines.AtEndOfFile = true;
 
-  const StringRef Code{"int i;\n\n"};
+  constexpr StringRef Code("int i;\n\n");
   verifyNoChange(Code, Style);
   verifyFormat(Code, "int i;\n\n\n", Style);
 }
@@ -28628,8 +28672,8 @@ TEST_F(FormatTest, PPDirectivesAndCommentsInBracedInit) {
 }
 
 TEST_F(FormatTest, BreakAdjacentStringLiterals) {
-  constexpr StringRef Code{
-      "return \"Code\" \"\\0\\52\\26\\55\\55\\0\" \"x013\" \"\\02\\xBA\";"};
+  constexpr StringRef Code(
+      "return \"Code\" \"\\0\\52\\26\\55\\55\\0\" \"x013\" \"\\02\\xBA\";");
 
   verifyFormat("return \"Code\"\n"
                "       \"\\0\\52\\26\\55\\55\\0\"\n"
@@ -29040,9 +29084,9 @@ TEST_F(FormatTest, KeepFormFeed) {
   auto Style = getLLVMStyle();
   Style.KeepFormFeed = true;
 
-  constexpr StringRef NoFormFeed{"int i;\n"
+  constexpr StringRef NoFormFeed("int i;\n"
                                  "\n"
-                                 "void f();"};
+                                 "void f();");
   verifyFormat(NoFormFeed,
                "int i;\n"
                " \f\n"
@@ -29064,9 +29108,9 @@ TEST_F(FormatTest, KeepFormFeed) {
                "void f();\f",
                Style);
 
-  constexpr StringRef FormFeed{"int i;\n"
+  constexpr StringRef FormFeed("int i;\n"
                                "\f\n"
-                               "void f();"};
+                               "void f();");
   verifyNoChange(FormFeed, Style);
 
   Style.LineEnding = FormatStyle::LE_LF;
@@ -29076,10 +29120,10 @@ TEST_F(FormatTest, KeepFormFeed) {
                "void f();",
                Style);
 
-  constexpr StringRef FormFeedBeforeEmptyLine{"int i;\n"
+  constexpr StringRef FormFeedBeforeEmptyLine("int i;\n"
                                               "\f\n"
                                               "\n"
-                                              "void f();"};
+                                              "void f();");
   Style.MaxEmptyLinesToKeep = 2;
   verifyFormat(FormFeedBeforeEmptyLine,
                "int i;\n"
diff --git a/clang/unittests/Format/FormatTestComments.cpp b/clang/unittests/Format/FormatTestComments.cpp
index 8870755..69026bc 100644
--- a/clang/unittests/Format/FormatTestComments.cpp
+++ b/clang/unittests/Format/FormatTestComments.cpp
@@ -1120,11 +1120,11 @@ TEST_F(FormatTestComments, KeepsLevelOfCommentBeforePPDirective) {
                    "  }\n"
                    "}"));
 
-  const StringRef Code("void func() {\n"
-                       "  // clang-format off\n"
-                       "  #define KV(value) #value, value\n"
-                       "  // clang-format on\n"
-                       "}");
+  constexpr StringRef Code("void func() {\n"
+                           "  // clang-format off\n"
+                           "  #define KV(value) #value, value\n"
+                           "  // clang-format on\n"
+                           "}");
   verifyNoChange(Code);
 
   auto Style = getLLVMStyle();
diff --git a/clang/unittests/Format/FormatTestJava.cpp b/clang/unittests/Format/FormatTestJava.cpp
index ca5aba0..1275564 100644
--- a/clang/unittests/Format/FormatTestJava.cpp
+++ b/clang/unittests/Format/FormatTestJava.cpp
@@ -631,17 +631,17 @@ TEST_F(FormatTestJava, SwitchExpression) {
                "});",
                Style);
 
-  constexpr StringRef Code1{"i = switch (day) {\n"
+  constexpr StringRef Code1("i = switch (day) {\n"
                             "  case THURSDAY, SATURDAY -> 8;\n"
                             "  case WEDNESDAY -> 9;\n"
                             "  default -> 0;\n"
-                            "};"};
+                            "};");
   verifyFormat(Code1, Style);
 
   Style.IndentCaseLabels = true;
   verifyFormat(Code1, Style);
 
-  constexpr StringRef Code2{"i = switch (day) {\n"
+  constexpr StringRef Code2("i = switch (day) {\n"
                             "  case THURSDAY, SATURDAY -> {\n"
                             "    foo();\n"
                             "    yield 8;\n"
@@ -653,17 +653,17 @@ TEST_F(FormatTestJava, SwitchExpression) {
                             "  default -> {\n"
                             "    yield 0;\n"
                             "  }\n"
-                            "};"};
+                            "};");
   verifyFormat(Code2, Style);
 
   Style.IndentCaseLabels = false;
   verifyFormat(Code2, Style);
 
-  constexpr StringRef Code3{"switch (day) {\n"
+  constexpr StringRef Code3("switch (day) {\n"
                             "case THURSDAY, SATURDAY -> i = 8;\n"
                             "case WEDNESDAY -> i = 9;\n"
                             "default -> i = 0;\n"
-                            "};"};
+                            "};");
   verifyFormat(Code3, Style);
 
   Style.IndentCaseLabels = true;
diff --git a/clang/unittests/Format/FormatTestSelective.cpp b/clang/unittests/Format/FormatTestSelective.cpp
index 0b7ac21..1a01153 100644
--- a/clang/unittests/Format/FormatTestSelective.cpp
+++ b/clang/unittests/Format/FormatTestSelective.cpp
@@ -672,15 +672,14 @@ TEST_F(FormatTestSelective, FormatMacroRegardlessOfPreviousIndent) {
   // need to be adapted.
   Style = getLLVMStyle();
 
-  const StringRef Code{"      class Foo {\n"
-                       "            void test() {\n"
-                       "    #ifdef 1\n"
-                       "                #define some\n" // format this line
-                       "         #endif\n"
-                       "    }};"};
-
-  EXPECT_EQ(Style.IndentPPDirectives,
-            FormatStyle::PPDirectiveIndentStyle::PPDIS_None);
+  constexpr StringRef Code("      class Foo {\n"
+                           "            void test() {\n"
+                           "    #ifdef 1\n"
+                           "                #define some\n" // format this line
+                           "         #endif\n"
+                           "    }};");
+
+  EXPECT_EQ(Style.IndentPPDirectives, FormatStyle::PPDIS_None);
   EXPECT_EQ("      class Foo {\n"
             "            void test() {\n"
             "    #ifdef 1\n"
@@ -689,8 +688,7 @@ TEST_F(FormatTestSelective, FormatMacroRegardlessOfPreviousIndent) {
             "            }};", // Ditto: Bug?
             format(Code, 57, 0));
 
-  Style.IndentPPDirectives =
-      FormatStyle::PPDirectiveIndentStyle::PPDIS_BeforeHash;
+  Style.IndentPPDirectives = FormatStyle::PPDIS_BeforeHash;
   EXPECT_EQ("      class Foo {\n"
             "            void test() {\n"
             "    #ifdef 1\n"
@@ -699,8 +697,7 @@ TEST_F(FormatTestSelective, FormatMacroRegardlessOfPreviousIndent) {
             "    }};",
             format(Code, 57, 0));
 
-  Style.IndentPPDirectives =
-      FormatStyle::PPDirectiveIndentStyle::PPDIS_AfterHash;
+  Style.IndentPPDirectives = FormatStyle::PPDIS_AfterHash;
   EXPECT_EQ("      class Foo {\n"
             "            void test() {\n"
             "    #ifdef 1\n"
diff --git a/clang/unittests/Format/IntegerLiteralSeparatorTest.cpp b/clang/unittests/Format/IntegerLiteralSeparatorTest.cpp
index b1e42e9..53b6dd8 100644
--- a/clang/unittests/Format/IntegerLiteralSeparatorTest.cpp
+++ b/clang/unittests/Format/IntegerLiteralSeparatorTest.cpp
@@ -24,7 +24,7 @@ TEST_F(IntegerLiteralSeparatorTest, SingleQuoteAsSeparator) {
   EXPECT_EQ(Style.IntegerLiteralSeparator.Decimal, 0);
   EXPECT_EQ(Style.IntegerLiteralSeparator.Hex, 0);
 
-  const StringRef Binary("b = 0b10011'11'0110'1u;");
+  constexpr StringRef Binary("b = 0b10011'11'0110'1u;");
   verifyFormat(Binary, Style);
   Style.IntegerLiteralSeparator.Binary = -1;
   verifyFormat("b = 0b100111101101u;", Binary, Style);
@@ -33,14 +33,14 @@ TEST_F(IntegerLiteralSeparatorTest, SingleQuoteAsSeparator) {
   Style.IntegerLiteralSeparator.Binary = 4;
   verifyFormat("b = 0b1001'1110'1101u;", Binary, Style);
 
-  const StringRef Decimal("d = 184467'440737'0'95505'92Ull;");
+  constexpr StringRef Decimal("d = 184467'440737'0'95505'92Ull;");
   verifyFormat(Decimal, Style);
   Style.IntegerLiteralSeparator.Decimal = -1;
   verifyFormat("d = 18446744073709550592Ull;", Decimal, Style);
   Style.IntegerLiteralSeparator.Decimal = 3;
   verifyFormat("d = 18'446'744'073'709'550'592Ull;", Decimal, Style);
 
-  const StringRef Hex("h = 0xDEAD'BEEF'DE'AD'BEE'Fuz;");
+  constexpr StringRef Hex("h = 0xDEAD'BEEF'DE'AD'BEE'Fuz;");
   verifyFormat(Hex, Style);
   Style.IntegerLiteralSeparator.Hex = -1;
   verifyFormat("h = 0xDEADBEEFDEADBEEFuz;", Hex, Style);
@@ -83,13 +83,16 @@ TEST_F(IntegerLiteralSeparatorTest, SingleQuoteAsSeparator) {
                "d = 5678_km;\n"
                "h = 0xDEF_u16;",
                Style);
+
+  Style.Standard = FormatStyle::LS_Cpp11;
+  verifyFormat("ld = 1234L;", Style);
 }
 
 TEST_F(IntegerLiteralSeparatorTest, UnderscoreAsSeparator) {
   FormatStyle Style = getLLVMStyle();
-  const StringRef Binary("B = 0B10011_11_0110_1;");
-  const StringRef Decimal("d = 184467_440737_0_95505_92;");
-  const StringRef Hex("H = 0XDEAD_BEEF_DE_AD_BEE_F;");
+  constexpr StringRef Binary("B = 0B10011_11_0110_1;");
+  constexpr StringRef Decimal("d = 184467_440737_0_95505_92;");
+  constexpr StringRef Hex("H = 0XDEAD_BEEF_DE_AD_BEE_F;");
 
   auto TestUnderscore = [&](auto Language) {
     Style.Language = Language;
@@ -173,16 +176,16 @@ TEST_F(IntegerLiteralSeparatorTest, FixRanges) {
   FormatStyle Style = getLLVMStyle();
   Style.IntegerLiteralSeparator.Decimal = 3;
 
-  const StringRef Code("i = -12'34;\n"
-                       "// clang-format off\n"
-                       "j = 123'4;\n"
-                       "// clang-format on\n"
-                       "k = +1'23'4;");
-  const StringRef Expected("i = -1'234;\n"
+  constexpr StringRef Code("i = -12'34;\n"
                            "// clang-format off\n"
                            "j = 123'4;\n"
                            "// clang-format on\n"
-                           "k = +1'234;");
+                           "k = +1'23'4;");
+  constexpr StringRef Expected("i = -1'234;\n"
+                               "// clang-format off\n"
+                               "j = 123'4;\n"
+                               "// clang-format on\n"
+                               "k = +1'234;");
 
   verifyFormat(Expected, Code, Style);
 
diff --git a/clang/unittests/Format/SortIncludesTest.cpp b/clang/unittests/Format/SortIncludesTest.cpp
index 5194d65..48ecd5d 100644
--- a/clang/unittests/Format/SortIncludesTest.cpp
+++ b/clang/unittests/Format/SortIncludesTest.cpp
@@ -1084,10 +1084,10 @@ TEST_F(SortIncludesTest, DoNotSortLikelyXml) {
 }
 
 TEST_F(SortIncludesTest, DoNotSortCSharp) {
-  constexpr StringRef Code{"const string expectedDataStruct = @\"\n"
+  constexpr StringRef Code("const string expectedDataStruct = @\"\n"
                            "            #include <b.h>\n"
                            "            #include <a.h>\n"
-                           "        \";"};
+                           "        \";");
   FmtStyle.Language = FormatStyle::LK_CSharp;
   EXPECT_TRUE(sortIncludes(FmtStyle, Code, GetCodeRange(Code), "a.cs").empty());
 }
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index ce7787e..7f99655 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -618,7 +618,7 @@ TEST_F(TokenAnnotatorTest, UnderstandsStructs) {
   EXPECT_TOKEN(Tokens[19], tok::l_brace, TT_StructLBrace);
   EXPECT_TOKEN(Tokens[20], tok::r_brace, TT_StructRBrace);
 
-  constexpr StringRef Code{"struct EXPORT StructName {};"};
+  constexpr StringRef Code("struct EXPORT StructName {};");
 
   Tokens = annotate(Code);
   ASSERT_EQ(Tokens.size(), 7u) << Tokens;
@@ -3958,7 +3958,7 @@ TEST_F(TokenAnnotatorTest, SplitPenalty) {
 }
 
 TEST_F(TokenAnnotatorTest, TemplateName) {
-  constexpr StringRef Code{"return Foo < A || B > (C ^ D);"};
+  constexpr StringRef Code("return Foo < A || B > (C ^ D);");
 
   auto Tokens = annotate(Code);
   ASSERT_EQ(Tokens.size(), 14u) << Tokens;
diff --git a/clang/unittests/Frontend/ASTUnitTest.cpp b/clang/unittests/Frontend/ASTUnitTest.cpp
index afa64b5..7148ca0 100644
--- a/clang/unittests/Frontend/ASTUnitTest.cpp
+++ b/clang/unittests/Frontend/ASTUnitTest.cpp
@@ -119,8 +119,7 @@ TEST_F(ASTUnitTest, GetBufferForFileMemoryMapping) {
 }
 
 TEST_F(ASTUnitTest, ModuleTextualHeader) {
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFs =
-      new llvm::vfs::InMemoryFileSystem();
+  auto InMemoryFs = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   InMemoryFs->addFile("test.cpp", 0, llvm::MemoryBuffer::getMemBuffer(R"cpp(
       #include "Textual.h"
       void foo() {}
diff --git a/clang/unittests/Frontend/PCHPreambleTest.cpp b/clang/unittests/Frontend/PCHPreambleTest.cpp
index faad408..d27f793 100644
--- a/clang/unittests/Frontend/PCHPreambleTest.cpp
+++ b/clang/unittests/Frontend/PCHPreambleTest.cpp
@@ -62,7 +62,7 @@ public:
   void TearDown() override {}
 
   void ResetVFS() {
-    VFS = new ReadCountingInMemoryFileSystem();
+    VFS = llvm::makeIntrusiveRefCnt<ReadCountingInMemoryFileSystem>();
     // We need the working directory to be set to something absolute,
     // otherwise it ends up being inadvertently set to the current
     // working directory in the real file system due to a series of
diff --git a/clang/unittests/Frontend/ReparseWorkingDirTest.cpp b/clang/unittests/Frontend/ReparseWorkingDirTest.cpp
index 1b8051f..38ef468 100644
--- a/clang/unittests/Frontend/ReparseWorkingDirTest.cpp
+++ b/clang/unittests/Frontend/ReparseWorkingDirTest.cpp
@@ -28,7 +28,9 @@ class ReparseWorkingDirTest : public ::testing::Test {
   std::shared_ptr<PCHContainerOperations> PCHContainerOpts;
 
 public:
-  void SetUp() override { VFS = new vfs::InMemoryFileSystem(); }
+  void SetUp() override {
+    VFS = llvm::makeIntrusiveRefCnt<vfs::InMemoryFileSystem>();
+  }
   void TearDown() override {}
 
   void setWorkingDirectory(StringRef Path) {
diff --git a/clang/unittests/Frontend/SearchPathTest.cpp b/clang/unittests/Frontend/SearchPathTest.cpp
index c74a5c7..014f482 100644
--- a/clang/unittests/Frontend/SearchPathTest.cpp
+++ b/clang/unittests/Frontend/SearchPathTest.cpp
@@ -41,7 +41,7 @@ class SearchPathTest : public ::testing::Test {
 protected:
   SearchPathTest()
       : Diags(new DiagnosticIDs(), DiagOpts, new IgnoringDiagConsumer()),
-        VFS(new llvm::vfs::InMemoryFileSystem),
+        VFS(llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>()),
         FileMgr(FileSystemOptions(), VFS), SourceMgr(Diags, FileMgr),
         Invocation(std::make_unique<CompilerInvocation>()) {}
 
diff --git a/clang/unittests/Frontend/UtilsTest.cpp b/clang/unittests/Frontend/UtilsTest.cpp
index cf385a5..fc411e4 100644
--- a/clang/unittests/Frontend/UtilsTest.cpp
+++ b/clang/unittests/Frontend/UtilsTest.cpp
@@ -29,7 +29,7 @@ TEST(BuildCompilerInvocationTest, RecoverMultipleJobs) {
   clang::DiagnosticOptions DiagOpts;
   CreateInvocationOptions Opts;
   Opts.RecoverOnError = true;
-  Opts.VFS = new llvm::vfs::InMemoryFileSystem();
+  Opts.VFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   Opts.Diags = clang::CompilerInstance::createDiagnostics(*Opts.VFS, DiagOpts,
                                                           &D, false);
   std::unique_ptr<CompilerInvocation> CI = createInvocation(Args, Opts);
diff --git a/clang/unittests/Interpreter/InterpreterTest.cpp b/clang/unittests/Interpreter/InterpreterTest.cpp
index fb9e98d..768058b 100644
--- a/clang/unittests/Interpreter/InterpreterTest.cpp
+++ b/clang/unittests/Interpreter/InterpreterTest.cpp
@@ -160,12 +160,12 @@ TEST_F(InterpreterTest, UndoCommand) {
 
   // Fail to undo.
   auto Err1 = Interp->Undo();
-  EXPECT_EQ("Operation failed. Too many undos",
+  EXPECT_EQ("Operation failed. No input left to undo",
             llvm::toString(std::move(Err1)));
   auto Err2 = Interp->Parse("int foo = 42;");
   EXPECT_TRUE(!!Err2);
   auto Err3 = Interp->Undo(2);
-  EXPECT_EQ("Operation failed. Too many undos",
+  EXPECT_EQ("Operation failed. Wanted to undo 2 inputs, only have 1.",
             llvm::toString(std::move(Err3)));
 
   // Succeed to undo.
@@ -392,9 +392,6 @@ TEST_F(InterpreterTest, Value) {
   EXPECT_EQ(V9.getKind(), Value::K_PtrOrObj);
   EXPECT_TRUE(V9.isManuallyAlloc());
 
-  if (llvm::Triple(llvm::sys::getDefaultTargetTriple()).isSystemZ())
-    GTEST_SKIP(); // Enum printing is broken for unknown reasons on SystemZ.
-
   Value V10;
   llvm::cantFail(Interp->ParseAndExecute(
       "enum D : unsigned int {Zero = 0, One}; One", &V10));
diff --git a/clang/unittests/Lex/HeaderSearchTest.cpp b/clang/unittests/Lex/HeaderSearchTest.cpp
index 9903c12..45eb855 100644
--- a/clang/unittests/Lex/HeaderSearchTest.cpp
+++ b/clang/unittests/Lex/HeaderSearchTest.cpp
@@ -28,8 +28,8 @@ namespace {
 class HeaderSearchTest : public ::testing::Test {
 protected:
   HeaderSearchTest()
-      : VFS(new llvm::vfs::InMemoryFileSystem), FileMgr(FileMgrOpts, VFS),
-        DiagID(new DiagnosticIDs()),
+      : VFS(llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>()),
+        FileMgr(FileMgrOpts, VFS), DiagID(new DiagnosticIDs()),
         Diags(DiagID, DiagOpts, new IgnoringDiagConsumer()),
         SourceMgr(Diags, FileMgr), TargetOpts(new TargetOptions),
         Search(HSOpts, SourceMgr, Diags, LangOpts, Target.get()) {
diff --git a/clang/unittests/Lex/PPCallbacksTest.cpp b/clang/unittests/Lex/PPCallbacksTest.cpp
index af86c18..a186246 100644
--- a/clang/unittests/Lex/PPCallbacksTest.cpp
+++ b/clang/unittests/Lex/PPCallbacksTest.cpp
@@ -133,7 +133,8 @@ public:
 class PPCallbacksTest : public ::testing::Test {
 protected:
   PPCallbacksTest()
-      : InMemoryFileSystem(new llvm::vfs::InMemoryFileSystem),
+      : InMemoryFileSystem(
+            llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>()),
         FileMgr(FileSystemOptions(), InMemoryFileSystem),
         DiagID(new DiagnosticIDs()),
         Diags(DiagID, DiagOpts, new IgnoringDiagConsumer()),
diff --git a/clang/unittests/Lex/PPDependencyDirectivesTest.cpp b/clang/unittests/Lex/PPDependencyDirectivesTest.cpp
index 061cb13..dec22a2 100644
--- a/clang/unittests/Lex/PPDependencyDirectivesTest.cpp
+++ b/clang/unittests/Lex/PPDependencyDirectivesTest.cpp
@@ -75,7 +75,7 @@ TEST_F(PPDependencyDirectivesTest, MacroGuard) {
   // "head2.h" and "head3.h" have tokens following the macro check, they should
   // be included multiple times.
 
-  auto VFS = new llvm::vfs::InMemoryFileSystem();
+  auto VFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   VFS->addFile(
       "head1.h", 0,
       llvm::MemoryBuffer::getMemBuffer("#ifndef H1_H\n#define H1_H\n#endif\n"));
diff --git a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp b/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp
index 4b24800..4d08f8d 100644
--- a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp
+++ b/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp
@@ -136,15 +136,13 @@ TEST_F(ParseHLSLRootSignatureTest, ValidParseEmptyTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test no diagnostics produced
   Consumer->setNoDiag();
 
   ASSERT_FALSE(Parser.parse());
-  ASSERT_EQ((int)Elements.size(), 0);
+  ASSERT_EQ((int)Parser.getElements().size(), 0);
 
   ASSERT_TRUE(Consumer->isSatisfied());
 }
@@ -172,15 +170,14 @@ TEST_F(ParseHLSLRootSignatureTest, ValidParseDTClausesTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test no diagnostics produced
   Consumer->setNoDiag();
 
   ASSERT_FALSE(Parser.parse());
 
+  auto Elements = Parser.getElements();
   // First Descriptor Table with 4 elements
   RootElement Elem = Elements[0].getElement();
   ASSERT_TRUE(std::holds_alternative<DescriptorTableClause>(Elem));
@@ -277,15 +274,14 @@ TEST_F(ParseHLSLRootSignatureTest, ValidParseStaticSamplerTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test no diagnostics produced
   Consumer->setNoDiag();
 
   ASSERT_FALSE(Parser.parse());
 
+  auto Elements = Parser.getElements();
   ASSERT_EQ(Elements.size(), 2u);
 
   // Check default values are as expected
@@ -364,15 +360,14 @@ TEST_F(ParseHLSLRootSignatureTest, ValidParseFloatsTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test no diagnostics produced
   Consumer->setNoDiag();
 
   ASSERT_FALSE(Parser.parse());
 
+  auto Elements = Parser.getElements();
   RootElement Elem = Elements[0].getElement();
   ASSERT_TRUE(std::holds_alternative<StaticSampler>(Elem));
   ASSERT_FLOAT_EQ(std::get<StaticSampler>(Elem).MipLODBias, 0.f);
@@ -441,15 +436,14 @@ TEST_F(ParseHLSLRootSignatureTest, ValidSamplerFlagsTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test no diagnostics produced
   Consumer->setNoDiag();
 
   ASSERT_FALSE(Parser.parse());
 
+  auto Elements = Parser.getElements();
   RootElement Elem = Elements[0].getElement();
   ASSERT_TRUE(std::holds_alternative<DescriptorTableClause>(Elem));
   ASSERT_EQ(std::get<DescriptorTableClause>(Elem).Type, ClauseType::Sampler);
@@ -474,15 +468,14 @@ TEST_F(ParseHLSLRootSignatureTest, ValidParseRootConsantsTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test no diagnostics produced
   Consumer->setNoDiag();
 
   ASSERT_FALSE(Parser.parse());
 
+  auto Elements = Parser.getElements();
   ASSERT_EQ(Elements.size(), 2u);
 
   RootElement Elem = Elements[0].getElement();
@@ -533,15 +526,14 @@ TEST_F(ParseHLSLRootSignatureTest, ValidParseRootFlagsTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test no diagnostics produced
   Consumer->setNoDiag();
 
   ASSERT_FALSE(Parser.parse());
 
+  auto Elements = Parser.getElements();
   ASSERT_EQ(Elements.size(), 3u);
 
   RootElement Elem = Elements[0].getElement();
@@ -588,15 +580,14 @@ TEST_F(ParseHLSLRootSignatureTest, ValidParseRootDescriptorsTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test no diagnostics produced
   Consumer->setNoDiag();
 
   ASSERT_FALSE(Parser.parse());
 
+  auto Elements = Parser.getElements();
   ASSERT_EQ(Elements.size(), 4u);
 
   RootElement Elem = Elements[0].getElement();
@@ -664,9 +655,7 @@ TEST_F(ParseHLSLRootSignatureTest, ValidTrailingCommaTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test no diagnostics produced
   Consumer->setNoDiag();
@@ -697,15 +686,14 @@ TEST_F(ParseHLSLRootSignatureTest, ValidVersion10Test) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_0, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_0, Signature, *PP);
 
   // Test no diagnostics produced
   Consumer->setNoDiag();
 
   ASSERT_FALSE(Parser.parse());
 
+  auto Elements = Parser.getElements();
   auto DefRootDescriptorFlag = llvm::dxbc::RootDescriptorFlags::DataVolatile;
   RootElement Elem = Elements[0].getElement();
   ASSERT_TRUE(std::holds_alternative<RootDescriptor>(Elem));
@@ -770,15 +758,14 @@ TEST_F(ParseHLSLRootSignatureTest, ValidVersion11Test) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test no diagnostics produced
   Consumer->setNoDiag();
 
   ASSERT_FALSE(Parser.parse());
 
+  auto Elements = Parser.getElements();
   RootElement Elem = Elements[0].getElement();
   ASSERT_TRUE(std::holds_alternative<RootDescriptor>(Elem));
   ASSERT_EQ(std::get<RootDescriptor>(Elem).Type, DescriptorType::CBuffer);
@@ -838,9 +825,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidParseUnexpectedTokenTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test correct diagnostic produced
   Consumer->setExpected(diag::err_expected_either);
@@ -860,9 +845,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidParseInvalidTokenTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test correct diagnostic produced - invalid token
   Consumer->setExpected(diag::err_hlsl_invalid_token);
@@ -882,9 +865,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidParseUnexpectedEndOfStreamTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test correct diagnostic produced - expected '(' after DescriptorTable
   Consumer->setExpected(diag::err_expected_after);
@@ -909,9 +890,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidMissingDTParameterTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test correct diagnostic produced
   Consumer->setExpected(diag::err_hlsl_rootsig_missing_param);
@@ -933,9 +912,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidMissingRDParameterTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test correct diagnostic produced
   Consumer->setExpected(diag::err_hlsl_rootsig_missing_param);
@@ -957,9 +934,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidMissingRCParameterTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test correct diagnostic produced
   Consumer->setExpected(diag::err_hlsl_rootsig_missing_param);
@@ -983,9 +958,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidRepeatedMandatoryDTParameterTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test correct diagnostic produced
   Consumer->setExpected(diag::err_hlsl_rootsig_repeat_param);
@@ -1007,9 +980,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidRepeatedMandatoryRCParameterTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test correct diagnostic produced
   Consumer->setExpected(diag::err_hlsl_rootsig_repeat_param);
@@ -1033,9 +1004,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidRepeatedOptionalDTParameterTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test correct diagnostic produced
   Consumer->setExpected(diag::err_hlsl_rootsig_repeat_param);
@@ -1061,9 +1030,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidRepeatedOptionalRCParameterTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test correct diagnostic produced
   Consumer->setExpected(diag::err_hlsl_rootsig_repeat_param);
@@ -1086,9 +1053,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidLexOverflowedNumberTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test correct diagnostic produced
   Consumer->setExpected(diag::err_hlsl_number_literal_overflow);
@@ -1110,9 +1075,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidParseOverflowedNegativeNumberTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test correct diagnostic produced
   Consumer->setExpected(diag::err_hlsl_number_literal_overflow);
@@ -1133,9 +1096,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidLexOverflowedFloatTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test correct diagnostic produced
   Consumer->setExpected(diag::err_hlsl_number_literal_overflow);
@@ -1156,9 +1117,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidLexNegOverflowedFloatTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test correct diagnostic produced
   Consumer->setExpected(diag::err_hlsl_number_literal_overflow);
@@ -1179,9 +1138,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidLexOverflowedDoubleTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test correct diagnostic produced
   Consumer->setExpected(diag::err_hlsl_number_literal_overflow);
@@ -1202,9 +1159,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidLexUnderflowFloatTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test correct diagnostic produced
   Consumer->setExpected(diag::err_hlsl_number_literal_underflow);
@@ -1228,9 +1183,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidNonZeroFlagsTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test correct diagnostic produced
   Consumer->setExpected(diag::err_hlsl_rootsig_non_zero_flag);
@@ -1253,9 +1206,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidRootElementMissingCommaTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test correct diagnostic produced
   Consumer->setExpected(diag::err_expected_either);
@@ -1280,9 +1231,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidDescriptorTableMissingCommaTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test correct diagnostic produced
   Consumer->setExpected(diag::err_expected_either);
@@ -1307,9 +1256,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidRootConstantParamsCommaTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test correct diagnostic produced
   Consumer->setExpected(diag::err_expected_either);
@@ -1334,9 +1281,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidRootDescriptorParamsCommaTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test correct diagnostic produced
   Consumer->setExpected(diag::err_expected_either);
@@ -1363,9 +1308,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidDescriptorClauseParamsCommaTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test correct diagnostic produced
   Consumer->setExpected(diag::err_expected_either);
@@ -1390,9 +1333,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidStaticSamplerCommaTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test correct diagnostic produced
   Consumer->setExpected(diag::err_expected_either);
@@ -1414,9 +1355,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidRootDescriptorParamTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test correct diagnostic produced
   Consumer->setExpected(diag::err_hlsl_invalid_token);
@@ -1441,9 +1380,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidDescriptorTableParamTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test correct diagnostic produced
   Consumer->setExpected(diag::err_hlsl_invalid_token);
@@ -1467,9 +1404,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidDescriptorTableClauseParamTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test correct diagnostic produced
   Consumer->setExpected(diag::err_hlsl_invalid_token);
@@ -1496,9 +1431,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidStaticSamplerParamTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test correct diagnostic produced
   Consumer->setExpected(diag::err_hlsl_invalid_token);
@@ -1523,9 +1456,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidVisibilityValueTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test correct diagnostic produced
   Consumer->setExpected(diag::err_hlsl_invalid_token);
@@ -1549,9 +1480,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidRegisterValueTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test correct diagnostic produced
   Consumer->setExpected(diag::err_hlsl_invalid_token);
@@ -1576,9 +1505,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidFilterValueTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test correct diagnostic produced
   Consumer->setExpected(diag::err_hlsl_invalid_token);
@@ -1603,9 +1530,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidTextureAddressModeValueTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test correct diagnostic produced
   Consumer->setExpected(diag::err_hlsl_invalid_token);
@@ -1630,9 +1555,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidComparisonFuncValueTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test correct diagnostic produced
   Consumer->setExpected(diag::err_hlsl_invalid_token);
@@ -1657,9 +1580,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidStaticBorderColorValueTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test correct diagnostic produced
   Consumer->setExpected(diag::err_hlsl_invalid_token);
@@ -1681,9 +1602,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidRootFlagsValueTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test correct diagnostic produced
   Consumer->setExpected(diag::err_hlsl_invalid_token);
@@ -1705,9 +1624,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidRootDescriptorFlagsValueTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test correct diagnostic produced
   Consumer->setExpected(diag::err_hlsl_invalid_token);
@@ -1733,9 +1650,7 @@ TEST_F(ParseHLSLRootSignatureTest, InvalidDescriptorRangeFlagsValueTest) {
   TrivialModuleLoader ModLoader;
   auto PP = createPP(Source, ModLoader);
 
-  SmallVector<RootSignatureElement> Elements;
-  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Elements,
-                                   Signature, *PP);
+  hlsl::RootSignatureParser Parser(RootSignatureVersion::V1_1, Signature, *PP);
 
   // Test correct diagnostic produced
   Consumer->setExpected(diag::err_hlsl_invalid_token);
diff --git a/clang/unittests/StaticAnalyzer/BlockEntranceCallbackTest.cpp b/clang/unittests/StaticAnalyzer/BlockEntranceCallbackTest.cpp
index 0f05c39..d15bec0 100644
--- a/clang/unittests/StaticAnalyzer/BlockEntranceCallbackTest.cpp
+++ b/clang/unittests/StaticAnalyzer/BlockEntranceCallbackTest.cpp
@@ -91,8 +91,7 @@ void addBlockEntranceTester(AnalysisASTConsumer &AnalysisConsumer,
   AnalysisConsumer.AddCheckerRegistrationFn([](CheckerRegistry &Registry) {
     Registry.addChecker(&registerChecker<BlockEntranceCallbackTester>,
                         &shouldAlwaysRegister, "test.BlockEntranceTester",
-                        "EmptyDescription", "EmptyDocsUri",
-                        /*IsHidden=*/false);
+                        "EmptyDescription");
   });
 }
 
@@ -102,8 +101,7 @@ void addBranchConditionTester(AnalysisASTConsumer &AnalysisConsumer,
   AnalysisConsumer.AddCheckerRegistrationFn([](CheckerRegistry &Registry) {
     Registry.addChecker(&registerChecker<BranchConditionCallbackTester>,
                         &shouldAlwaysRegister, "test.BranchConditionTester",
-                        "EmptyDescription", "EmptyDocsUri",
-                        /*IsHidden=*/false);
+                        "EmptyDescription");
   });
 }
 
diff --git a/clang/unittests/StaticAnalyzer/BugReportInterestingnessTest.cpp b/clang/unittests/StaticAnalyzer/BugReportInterestingnessTest.cpp
index 0ef63b0..fc50f00 100644
--- a/clang/unittests/StaticAnalyzer/BugReportInterestingnessTest.cpp
+++ b/clang/unittests/StaticAnalyzer/BugReportInterestingnessTest.cpp
@@ -120,7 +120,7 @@ public:
             std::move(ExpectedDiags), Compiler.getSourceManager()));
     AnalysisConsumer->AddCheckerRegistrationFn([](CheckerRegistry &Registry) {
       Registry.addChecker<InterestingnessTestChecker>("test.Interestingness",
-                                                      "Description", "");
+                                                      "MockDescription");
     });
     Compiler.getAnalyzerOpts().CheckersAndPackages = {
         {"test.Interestingness", true}};
diff --git a/clang/unittests/StaticAnalyzer/CallDescriptionTest.cpp b/clang/unittests/StaticAnalyzer/CallDescriptionTest.cpp
index 4cb6bd3..e2007a9 100644
--- a/clang/unittests/StaticAnalyzer/CallDescriptionTest.cpp
+++ b/clang/unittests/StaticAnalyzer/CallDescriptionTest.cpp
@@ -616,8 +616,8 @@ void addCallDescChecker(AnalysisASTConsumer &AnalysisConsumer,
                         AnalyzerOptions &AnOpts) {
   AnOpts.CheckersAndPackages = {{"test.CallDescChecker", true}};
   AnalysisConsumer.AddCheckerRegistrationFn([](CheckerRegistry &Registry) {
-    Registry.addChecker<CallDescChecker>("test.CallDescChecker", "Description",
-                                         "");
+    Registry.addChecker<CallDescChecker>("test.CallDescChecker",
+                                         "MockDescription");
   });
 }
 
diff --git a/clang/unittests/StaticAnalyzer/CallEventTest.cpp b/clang/unittests/StaticAnalyzer/CallEventTest.cpp
index 2843572e..8b5289e 100644
--- a/clang/unittests/StaticAnalyzer/CallEventTest.cpp
+++ b/clang/unittests/StaticAnalyzer/CallEventTest.cpp
@@ -56,7 +56,7 @@ void addCXXDeallocatorChecker(AnalysisASTConsumer &AnalysisConsumer,
   AnOpts.CheckersAndPackages = {{"test.CXXDeallocator", true}};
   AnalysisConsumer.AddCheckerRegistrationFn([](CheckerRegistry &Registry) {
     Registry.addChecker<CXXDeallocatorChecker>("test.CXXDeallocator",
-                                               "Description", "");
+                                               "MockDescription");
   });
 }
 
diff --git a/clang/unittests/StaticAnalyzer/ConflictingEvalCallsTest.cpp b/clang/unittests/StaticAnalyzer/ConflictingEvalCallsTest.cpp
index e410cca..cffdbf1 100644
--- a/clang/unittests/StaticAnalyzer/ConflictingEvalCallsTest.cpp
+++ b/clang/unittests/StaticAnalyzer/ConflictingEvalCallsTest.cpp
@@ -33,10 +33,8 @@ void addEvalFooCheckers(AnalysisASTConsumer &AnalysisConsumer,
   AnOpts.CheckersAndPackages = {{"test.EvalFoo1", true},
                                 {"test.EvalFoo2", true}};
   AnalysisConsumer.AddCheckerRegistrationFn([](CheckerRegistry &Registry) {
-    Registry.addChecker<EvalCallFoo1>("test.EvalFoo1", "EmptyDescription",
-                                      "EmptyDocsUri");
-    Registry.addChecker<EvalCallFoo2>("test.EvalFoo2", "EmptyDescription",
-                                      "EmptyDocsUri");
+    Registry.addChecker<EvalCallFoo1>("test.EvalFoo1", "MockDescription");
+    Registry.addChecker<EvalCallFoo2>("test.EvalFoo2", "MockDescription");
   });
 }
 } // namespace
diff --git a/clang/unittests/StaticAnalyzer/ExprEngineVisitTest.cpp b/clang/unittests/StaticAnalyzer/ExprEngineVisitTest.cpp
index b6eeb9c..12be228 100644
--- a/clang/unittests/StaticAnalyzer/ExprEngineVisitTest.cpp
+++ b/clang/unittests/StaticAnalyzer/ExprEngineVisitTest.cpp
@@ -78,7 +78,7 @@ void addExprEngineVisitPreChecker(AnalysisASTConsumer &AnalysisConsumer,
   AnOpts.CheckersAndPackages = {{"ExprEngineVisitPreChecker", true}};
   AnalysisConsumer.AddCheckerRegistrationFn([](CheckerRegistry &Registry) {
     Registry.addChecker<ExprEngineVisitPreChecker>("ExprEngineVisitPreChecker",
-                                                   "Desc", "DocsURI");
+                                                   "MockDescription");
   });
 }
 
@@ -87,7 +87,7 @@ void addExprEngineVisitPostChecker(AnalysisASTConsumer &AnalysisConsumer,
   AnOpts.CheckersAndPackages = {{"ExprEngineVisitPostChecker", true}};
   AnalysisConsumer.AddCheckerRegistrationFn([](CheckerRegistry &Registry) {
     Registry.addChecker<ExprEngineVisitPostChecker>(
-        "ExprEngineVisitPostChecker", "Desc", "DocsURI");
+        "ExprEngineVisitPostChecker", "MockDescription");
   });
 }
 
@@ -95,8 +95,8 @@ void addMemAccessChecker(AnalysisASTConsumer &AnalysisConsumer,
                          AnalyzerOptions &AnOpts) {
   AnOpts.CheckersAndPackages = {{"MemAccessChecker", true}};
   AnalysisConsumer.AddCheckerRegistrationFn([](CheckerRegistry &Registry) {
-    Registry.addChecker<MemAccessChecker>("MemAccessChecker", "Desc",
-                                          "DocsURI");
+    Registry.addChecker<MemAccessChecker>("MemAccessChecker",
+                                          "MockDescription");
   });
 }
 
diff --git a/clang/unittests/StaticAnalyzer/FalsePositiveRefutationBRVisitorTest.cpp b/clang/unittests/StaticAnalyzer/FalsePositiveRefutationBRVisitorTest.cpp
index 8f0a96d..146797f 100644
--- a/clang/unittests/StaticAnalyzer/FalsePositiveRefutationBRVisitorTest.cpp
+++ b/clang/unittests/StaticAnalyzer/FalsePositiveRefutationBRVisitorTest.cpp
@@ -92,8 +92,8 @@ void addFalsePositiveGenerator(AnalysisASTConsumer &AnalysisConsumer,
   AnOpts.CheckersAndPackages = {{"test.FalsePositiveGenerator", true},
                                 {"debug.ViewExplodedGraph", false}};
   AnalysisConsumer.AddCheckerRegistrationFn([](CheckerRegistry &Registry) {
-    Registry.addChecker<FalsePositiveGenerator>(
-        "test.FalsePositiveGenerator", "EmptyDescription", "EmptyDocsUri");
+    Registry.addChecker<FalsePositiveGenerator>("test.FalsePositiveGenerator",
+                                                "MockDescription");
   });
 }
 
diff --git a/clang/unittests/StaticAnalyzer/MemRegionDescriptiveNameTest.cpp b/clang/unittests/StaticAnalyzer/MemRegionDescriptiveNameTest.cpp
index 0f6e49b..7b837f3 100644
--- a/clang/unittests/StaticAnalyzer/MemRegionDescriptiveNameTest.cpp
+++ b/clang/unittests/StaticAnalyzer/MemRegionDescriptiveNameTest.cpp
@@ -46,7 +46,7 @@ void addDescriptiveNameChecker(AnalysisASTConsumer &AnalysisConsumer,
   AnOpts.CheckersAndPackages = {{"DescriptiveNameChecker", true}};
   AnalysisConsumer.AddCheckerRegistrationFn([](CheckerRegistry &Registry) {
     Registry.addChecker<DescriptiveNameChecker>("DescriptiveNameChecker",
-                                                "Desc", "DocsURI");
+                                                "MockDescription");
   });
 }
 
diff --git a/clang/unittests/StaticAnalyzer/NoStateChangeFuncVisitorTest.cpp b/clang/unittests/StaticAnalyzer/NoStateChangeFuncVisitorTest.cpp
index a903342..68d2678 100644
--- a/clang/unittests/StaticAnalyzer/NoStateChangeFuncVisitorTest.cpp
+++ b/clang/unittests/StaticAnalyzer/NoStateChangeFuncVisitorTest.cpp
@@ -140,7 +140,7 @@ void addNonThoroughStatefulChecker(AnalysisASTConsumer &AnalysisConsumer,
   AnalysisConsumer.AddCheckerRegistrationFn([](CheckerRegistry &Registry) {
     Registry
         .addChecker<StatefulChecker<NonThoroughErrorNotPreventedFuncVisitor>>(
-            "test.StatefulChecker", "Description", "");
+            "test.StatefulChecker", "MockDescription");
   });
 }
 
@@ -233,7 +233,7 @@ void addThoroughStatefulChecker(AnalysisASTConsumer &AnalysisConsumer,
   AnOpts.CheckersAndPackages = {{"test.StatefulChecker", true}};
   AnalysisConsumer.AddCheckerRegistrationFn([](CheckerRegistry &Registry) {
     Registry.addChecker<StatefulChecker<ThoroughErrorNotPreventedFuncVisitor>>(
-        "test.StatefulChecker", "Description", "");
+        "test.StatefulChecker", "MockDescription");
   });
 }
 
diff --git a/clang/unittests/StaticAnalyzer/ObjcBug-124477.cpp b/clang/unittests/StaticAnalyzer/ObjcBug-124477.cpp
index 51bd332..ab78090 100644
--- a/clang/unittests/StaticAnalyzer/ObjcBug-124477.cpp
+++ b/clang/unittests/StaticAnalyzer/ObjcBug-124477.cpp
@@ -37,7 +37,7 @@ void addFlagFlipperChecker(AnalysisASTConsumer &AnalysisConsumer,
   AnOpts.CheckersAndPackages = {{"test.FlipFlagOnCheckLocation", true}};
   AnalysisConsumer.AddCheckerRegistrationFn([](CheckerRegistry &Registry) {
     Registry.addChecker<FlipFlagOnCheckLocation>("test.FlipFlagOnCheckLocation",
-                                                 "Description", "");
+                                                 "MockDescription");
   });
 }
 
diff --git a/clang/unittests/StaticAnalyzer/RegisterCustomCheckersTest.cpp b/clang/unittests/StaticAnalyzer/RegisterCustomCheckersTest.cpp
index 454eee9..e17d107 100644
--- a/clang/unittests/StaticAnalyzer/RegisterCustomCheckersTest.cpp
+++ b/clang/unittests/StaticAnalyzer/RegisterCustomCheckersTest.cpp
@@ -44,7 +44,7 @@ void addCustomChecker(AnalysisASTConsumer &AnalysisConsumer,
                       AnalyzerOptions &AnOpts) {
   AnOpts.CheckersAndPackages = {{"test.CustomChecker", true}};
   AnalysisConsumer.AddCheckerRegistrationFn([](CheckerRegistry &Registry) {
-    Registry.addChecker<CustomChecker>("test.CustomChecker", "Description", "");
+    Registry.addChecker<CustomChecker>("test.CustomChecker", "MockDescription");
   });
 }
 
@@ -73,8 +73,8 @@ void addLocIncDecChecker(AnalysisASTConsumer &AnalysisConsumer,
                          AnalyzerOptions &AnOpts) {
   AnOpts.CheckersAndPackages = {{"test.LocIncDecChecker", true}};
   AnalysisConsumer.AddCheckerRegistrationFn([](CheckerRegistry &Registry) {
-    Registry.addChecker<CustomChecker>("test.LocIncDecChecker", "Description",
-                                       "");
+    Registry.addChecker<CustomChecker>("test.LocIncDecChecker",
+                                       "MockDescription");
   });
 }
 
@@ -119,10 +119,10 @@ bool shouldRegisterCheckerRegistrationOrderPrinter(const CheckerManager &mgr) {
 void addCheckerRegistrationOrderPrinter(CheckerRegistry &Registry) {
   Registry.addChecker(registerCheckerRegistrationOrderPrinter,
                       shouldRegisterCheckerRegistrationOrderPrinter,
-                      "test.RegistrationOrder", "Description", "", false);
+                      "test.RegistrationOrder", "Description");
 }
 
-#define UNITTEST_CHECKER(CHECKER_NAME, DIAG_MSG)                               \
+#define UNITTEST_CHECKER(CHECKER_NAME)                                         \
   class CHECKER_NAME : public Checker<check::PreStmt<DeclStmt>> {              \
   public:                                                                      \
     void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {}          \
@@ -137,11 +137,11 @@ void addCheckerRegistrationOrderPrinter(CheckerRegistry &Registry) {
   }                                                                            \
   void add##CHECKER_NAME(CheckerRegistry &Registry) {                          \
     Registry.addChecker(register##CHECKER_NAME, shouldRegister##CHECKER_NAME,  \
-                        "test." #CHECKER_NAME, "Description", "", false);      \
+                        "test." #CHECKER_NAME, "Description");                 \
   }
 
-UNITTEST_CHECKER(StrongDep, "Strong")
-UNITTEST_CHECKER(Dep, "Dep")
+UNITTEST_CHECKER(StrongDep)
+UNITTEST_CHECKER(Dep)
 
 bool shouldRegisterStrongFALSE(const CheckerManager &mgr) {
   return false;
@@ -154,7 +154,7 @@ void addDep(AnalysisASTConsumer &AnalysisConsumer,
                                 {"test.RegistrationOrder", true}};
   AnalysisConsumer.AddCheckerRegistrationFn([](CheckerRegistry &Registry) {
     Registry.addChecker(registerStrongDep, shouldRegisterStrongFALSE,
-                        "test.Strong", "Description", "", false);
+                        "test.Strong", "Description");
     addStrongDep(Registry);
     addDep(Registry);
     addCheckerRegistrationOrderPrinter(Registry);
@@ -172,7 +172,7 @@ TEST(RegisterDeps, UnsatisfiedDependency) {
 // Weak checker dependencies.
 //===----------------------------------------------------------------------===//
 
-UNITTEST_CHECKER(WeakDep, "Weak")
+UNITTEST_CHECKER(WeakDep)
 
 void addWeakDepCheckerBothEnabled(AnalysisASTConsumer &AnalysisConsumer,
                                   AnalyzerOptions &AnOpts) {
@@ -225,8 +225,8 @@ void addWeakDepCheckerDepUnspecified(AnalysisASTConsumer &AnalysisConsumer,
   });
 }
 
-UNITTEST_CHECKER(WeakDep2, "Weak2")
-UNITTEST_CHECKER(Dep2, "Dep2")
+UNITTEST_CHECKER(WeakDep2)
+UNITTEST_CHECKER(Dep2)
 
 void addWeakDepHasWeakDep(AnalysisASTConsumer &AnalysisConsumer,
                           AnalyzerOptions &AnOpts) {
diff --git a/clang/unittests/StaticAnalyzer/SValSimplifyerTest.cpp b/clang/unittests/StaticAnalyzer/SValSimplifyerTest.cpp
index 85cfe2c..4331ffc 100644
--- a/clang/unittests/StaticAnalyzer/SValSimplifyerTest.cpp
+++ b/clang/unittests/StaticAnalyzer/SValSimplifyerTest.cpp
@@ -68,8 +68,7 @@ static void addSimplifyChecker(AnalysisASTConsumer &AnalysisConsumer,
                                AnalyzerOptions &AnOpts) {
   AnOpts.CheckersAndPackages = {{"SimplifyChecker", true}};
   AnalysisConsumer.AddCheckerRegistrationFn([](CheckerRegistry &Registry) {
-    Registry.addChecker<SimplifyChecker>("SimplifyChecker", "EmptyDescription",
-                                         "EmptyDocsUri");
+    Registry.addChecker<SimplifyChecker>("SimplifyChecker", "MockDescription");
   });
 }
 
diff --git a/clang/unittests/StaticAnalyzer/SValTest.cpp b/clang/unittests/StaticAnalyzer/SValTest.cpp
index d8897b0..58e9a8d 100644
--- a/clang/unittests/StaticAnalyzer/SValTest.cpp
+++ b/clang/unittests/StaticAnalyzer/SValTest.cpp
@@ -139,10 +139,10 @@ class SValTest : public testing::TestWithParam<TestClangConfig> {};
                                                                                \
   void add##NAME##SValCollector(AnalysisASTConsumer &AnalysisConsumer,         \
                                 AnalyzerOptions &AnOpts) {                     \
-    AnOpts.CheckersAndPackages = {{"test.##NAME##SValCollector", true}};       \
+    AnOpts.CheckersAndPackages = {{"test." #NAME "SValColl", true}};           \
     AnalysisConsumer.AddCheckerRegistrationFn([](CheckerRegistry &Registry) {  \
-      Registry.addChecker<NAME##SValCollector>("test.##NAME##SValCollector",   \
-                                               "Description", "");             \
+      Registry.addChecker<NAME##SValCollector>("test." #NAME "SValColl",       \
+                                               "MockDescription");             \
     });                                                                        \
   }                                                                            \
                                                                                \
diff --git a/clang/unittests/StaticAnalyzer/TestReturnValueUnderConstruction.cpp b/clang/unittests/StaticAnalyzer/TestReturnValueUnderConstruction.cpp
index 5fc084a..0cb3c59 100644
--- a/clang/unittests/StaticAnalyzer/TestReturnValueUnderConstruction.cpp
+++ b/clang/unittests/StaticAnalyzer/TestReturnValueUnderConstruction.cpp
@@ -49,9 +49,9 @@ void addTestReturnValueUnderConstructionChecker(
   AnOpts.CheckersAndPackages =
     {{"test.TestReturnValueUnderConstruction", true}};
   AnalysisConsumer.AddCheckerRegistrationFn([](CheckerRegistry &Registry) {
-      Registry.addChecker<TestReturnValueUnderConstructionChecker>(
-          "test.TestReturnValueUnderConstruction", "", "");
-    });
+    Registry.addChecker<TestReturnValueUnderConstructionChecker>(
+        "test.TestReturnValueUnderConstruction", "MockDescription");
+  });
 }
 
 TEST(TestReturnValueUnderConstructionChecker,
diff --git a/clang/unittests/Support/TimeProfilerTest.cpp b/clang/unittests/Support/TimeProfilerTest.cpp
index 85d36b5..f70149d 100644
--- a/clang/unittests/Support/TimeProfilerTest.cpp
+++ b/clang/unittests/Support/TimeProfilerTest.cpp
@@ -46,8 +46,7 @@ std::string teardownProfiler() {
 bool compileFromString(StringRef Code, StringRef Standard, StringRef File,
                        llvm::StringMap<std::string> Headers = {}) {
 
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS(
-      new llvm::vfs::InMemoryFileSystem());
+  auto FS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   FS->addFile(File, 0, MemoryBuffer::getMemBuffer(Code));
   for (const auto &Header : Headers) {
     FS->addFile(Header.getKey(), 0,
diff --git a/clang/unittests/Tooling/CompilationDatabaseTest.cpp b/clang/unittests/Tooling/CompilationDatabaseTest.cpp
index c1febaf..2d68e09 100644
--- a/clang/unittests/Tooling/CompilationDatabaseTest.cpp
+++ b/clang/unittests/Tooling/CompilationDatabaseTest.cpp
@@ -971,7 +971,8 @@ TEST_F(TargetAndModeTest, TargetAndMode) {
 
 class ExpandResponseFilesTest : public MemDBTest {
 public:
-  ExpandResponseFilesTest() : FS(new llvm::vfs::InMemoryFileSystem) {}
+  ExpandResponseFilesTest()
+      : FS(llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>()) {}
 
 protected:
   void addFile(StringRef File, StringRef Content) {
diff --git a/clang/unittests/Tooling/DependencyScanning/DependencyScannerTest.cpp b/clang/unittests/Tooling/DependencyScanning/DependencyScannerTest.cpp
index 1cdb539..b16dd8e 100644
--- a/clang/unittests/Tooling/DependencyScanning/DependencyScannerTest.cpp
+++ b/clang/unittests/Tooling/DependencyScanning/DependencyScannerTest.cpp
@@ -85,7 +85,7 @@ TEST(DependencyScanner, ScanDepsReuseFilemanager) {
   StringRef CWD = "/root";
   FixedCompilationDatabase CDB(CWD, Compilation);
 
-  auto VFS = new llvm::vfs::InMemoryFileSystem();
+  auto VFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   VFS->setCurrentWorkingDirectory(CWD);
   auto Sept = llvm::sys::path::get_separator();
   std::string HeaderPath =
@@ -134,7 +134,7 @@ TEST(DependencyScanner, ScanDepsReuseFilemanagerSkippedFile) {
   StringRef CWD = "/root";
   FixedCompilationDatabase CDB(CWD, Compilation);
 
-  auto VFS = new llvm::vfs::InMemoryFileSystem();
+  auto VFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   VFS->setCurrentWorkingDirectory(CWD);
   auto Sept = llvm::sys::path::get_separator();
   std::string HeaderPath =
@@ -176,7 +176,7 @@ TEST(DependencyScanner, ScanDepsReuseFilemanagerHasInclude) {
   StringRef CWD = "/root";
   FixedCompilationDatabase CDB(CWD, Compilation);
 
-  auto VFS = new llvm::vfs::InMemoryFileSystem();
+  auto VFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   VFS->setCurrentWorkingDirectory(CWD);
   auto Sept = llvm::sys::path::get_separator();
   std::string HeaderPath =
@@ -218,7 +218,7 @@ TEST(DependencyScanner, ScanDepsWithFS) {
                                           "test.cpp.o"};
   StringRef CWD = "/root";
 
-  auto VFS = new llvm::vfs::InMemoryFileSystem();
+  auto VFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   VFS->setCurrentWorkingDirectory(CWD);
   auto Sept = llvm::sys::path::get_separator();
   std::string HeaderPath =
@@ -256,7 +256,7 @@ TEST(DependencyScanner, ScanDepsWithModuleLookup) {
   };
   StringRef CWD = "/root";
 
-  auto VFS = new llvm::vfs::InMemoryFileSystem();
+  auto VFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   VFS->setCurrentWorkingDirectory(CWD);
   auto Sept = llvm::sys::path::get_separator();
   std::string OtherPath =
@@ -306,7 +306,7 @@ TEST(DependencyScanner, ScanDepsWithModuleLookup) {
 TEST(DependencyScanner, ScanDepsWithDiagConsumer) {
   StringRef CWD = "/root";
 
-  auto VFS = new llvm::vfs::InMemoryFileSystem();
+  auto VFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   VFS->setCurrentWorkingDirectory(CWD);
   auto Sept = llvm::sys::path::get_separator();
   std::string HeaderPath =
diff --git a/clang/unittests/Tooling/RefactoringTest.cpp b/clang/unittests/Tooling/RefactoringTest.cpp
index 35d1143..aff7523e 100644
--- a/clang/unittests/Tooling/RefactoringTest.cpp
+++ b/clang/unittests/Tooling/RefactoringTest.cpp
@@ -1035,8 +1035,7 @@ static constexpr bool usesWindowsPaths() {
 
 TEST(DeduplicateByFileTest, PathsWithDots) {
   std::map<std::string, Replacements> FileToReplaces;
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> VFS(
-      new llvm::vfs::InMemoryFileSystem());
+  auto VFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   FileManager FileMgr(FileSystemOptions(), VFS);
   StringRef Path1 = usesWindowsPaths() ? "a\\b\\..\\.\\c.h" : "a/b/.././c.h";
   StringRef Path2 = usesWindowsPaths() ? "a\\c.h" : "a/c.h";
@@ -1051,8 +1050,7 @@ TEST(DeduplicateByFileTest, PathsWithDots) {
 
 TEST(DeduplicateByFileTest, PathWithDotSlash) {
   std::map<std::string, Replacements> FileToReplaces;
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> VFS(
-      new llvm::vfs::InMemoryFileSystem());
+  auto VFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   FileManager FileMgr(FileSystemOptions(), VFS);
   StringRef Path1 = usesWindowsPaths() ? ".\\a\\b\\c.h" : "./a/b/c.h";
   StringRef Path2 = usesWindowsPaths() ? "a\\b\\c.h" : "a/b/c.h";
@@ -1067,8 +1065,7 @@ TEST(DeduplicateByFileTest, PathWithDotSlash) {
 
 TEST(DeduplicateByFileTest, NonExistingFilePath) {
   std::map<std::string, Replacements> FileToReplaces;
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> VFS(
-      new llvm::vfs::InMemoryFileSystem());
+  auto VFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   FileManager FileMgr(FileSystemOptions(), VFS);
   StringRef Path1 = usesWindowsPaths() ? ".\\a\\b\\c.h" : "./a/b/c.h";
   StringRef Path2 = usesWindowsPaths() ? "a\\b\\c.h" : "a/b/c.h";
diff --git a/clang/unittests/Tooling/RewriterTestContext.h b/clang/unittests/Tooling/RewriterTestContext.h
index 2d697e2..c478b3f 100644
--- a/clang/unittests/Tooling/RewriterTestContext.h
+++ b/clang/unittests/Tooling/RewriterTestContext.h
@@ -51,9 +51,11 @@ class RewriterTestContext {
    RewriterTestContext()
        : Diagnostics(IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
                      DiagOpts),
-         InMemoryFileSystem(new llvm::vfs::InMemoryFileSystem),
+         InMemoryFileSystem(
+             llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>()),
          OverlayFileSystem(
-             new llvm::vfs::OverlayFileSystem(llvm::vfs::getRealFileSystem())),
+             llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(
+                 llvm::vfs::getRealFileSystem())),
          Files(FileSystemOptions(), OverlayFileSystem),
          Sources(Diagnostics, Files), Rewrite(Sources, Options) {
      Diagnostics.setClient(&DiagnosticPrinter, false);
diff --git a/clang/unittests/Tooling/Syntax/TokensTest.cpp b/clang/unittests/Tooling/Syntax/TokensTest.cpp
index b5f4445..a643738 100644
--- a/clang/unittests/Tooling/Syntax/TokensTest.cpp
+++ b/clang/unittests/Tooling/Syntax/TokensTest.cpp
@@ -252,7 +252,7 @@ public:
   llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
       new DiagnosticsEngine(new DiagnosticIDs, DiagOpts);
   IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS =
-      new llvm::vfs::InMemoryFileSystem;
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   llvm::IntrusiveRefCntPtr<FileManager> FileMgr =
       new FileManager(FileSystemOptions(), FS);
   llvm::IntrusiveRefCntPtr<SourceManager> SourceMgr =
diff --git a/clang/unittests/Tooling/Syntax/TreeTestBase.h b/clang/unittests/Tooling/Syntax/TreeTestBase.h
index 6110cff..01ff07b 100644
--- a/clang/unittests/Tooling/Syntax/TreeTestBase.h
+++ b/clang/unittests/Tooling/Syntax/TreeTestBase.h
@@ -44,7 +44,7 @@ protected:
   IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
       new DiagnosticsEngine(new DiagnosticIDs, DiagOpts);
   IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS =
-      new llvm::vfs::InMemoryFileSystem;
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   IntrusiveRefCntPtr<FileManager> FileMgr =
       new FileManager(FileSystemOptions(), FS);
   IntrusiveRefCntPtr<SourceManager> SourceMgr =
diff --git a/clang/unittests/Tooling/ToolingTest.cpp b/clang/unittests/Tooling/ToolingTest.cpp
index 32af4b6..c72676f 100644
--- a/clang/unittests/Tooling/ToolingTest.cpp
+++ b/clang/unittests/Tooling/ToolingTest.cpp
@@ -153,8 +153,8 @@ TEST(buildASTFromCode, ReportsErrors) {
 }
 
 TEST(buildASTFromCode, FileSystem) {
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   InMemoryFileSystem->addFile("included_file.h", 0,
                               llvm::MemoryBuffer::getMemBufferCopy("class X;"));
   std::unique_ptr<ASTUnit> AST = buildASTFromCodeWithArgs(
@@ -188,10 +188,11 @@ TEST(newFrontendActionFactory, CreatesFrontendActionFactoryFromFactoryType) {
 }
 
 TEST(ToolInvocation, TestMapVirtualFile) {
-  llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayFileSystem(
-      new llvm::vfs::OverlayFileSystem(llvm::vfs::getRealFileSystem()));
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto OverlayFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(
+          llvm::vfs::getRealFileSystem());
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   OverlayFileSystem->pushOverlay(InMemoryFileSystem);
   llvm::IntrusiveRefCntPtr<FileManager> Files(
       new FileManager(FileSystemOptions(), OverlayFileSystem));
@@ -214,10 +215,11 @@ TEST(ToolInvocation, TestVirtualModulesCompilation) {
   // mapped module.modulemap is found on the include path. In the future, expand
   // this test to run a full modules enabled compilation, so we make sure we can
   // rerun modules compilations with a virtual file system.
-  llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayFileSystem(
-      new llvm::vfs::OverlayFileSystem(llvm::vfs::getRealFileSystem()));
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto OverlayFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(
+          llvm::vfs::getRealFileSystem());
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   OverlayFileSystem->pushOverlay(InMemoryFileSystem);
   llvm::IntrusiveRefCntPtr<FileManager> Files(
       new FileManager(FileSystemOptions(), OverlayFileSystem));
@@ -240,10 +242,11 @@ TEST(ToolInvocation, TestVirtualModulesCompilation) {
 }
 
 TEST(ToolInvocation, DiagnosticsEngineProperlyInitializedForCC1Construction) {
-  llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayFileSystem(
-      new llvm::vfs::OverlayFileSystem(llvm::vfs::getRealFileSystem()));
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto OverlayFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(
+          llvm::vfs::getRealFileSystem());
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   OverlayFileSystem->pushOverlay(InMemoryFileSystem);
   llvm::IntrusiveRefCntPtr<FileManager> Files(
       new FileManager(FileSystemOptions(), OverlayFileSystem));
@@ -269,10 +272,11 @@ TEST(ToolInvocation, DiagnosticsEngineProperlyInitializedForCC1Construction) {
 }
 
 TEST(ToolInvocation, CustomDiagnosticOptionsOverwriteParsedOnes) {
-  llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayFileSystem(
-      new llvm::vfs::OverlayFileSystem(llvm::vfs::getRealFileSystem()));
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto OverlayFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(
+          llvm::vfs::getRealFileSystem());
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   OverlayFileSystem->pushOverlay(InMemoryFileSystem);
   llvm::IntrusiveRefCntPtr<FileManager> Files(
       new FileManager(FileSystemOptions(), OverlayFileSystem));
@@ -315,10 +319,11 @@ struct DiagnosticConsumerExpectingSourceManager : public DiagnosticConsumer {
 };
 
 TEST(ToolInvocation, DiagConsumerExpectingSourceManager) {
-  llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayFileSystem(
-      new llvm::vfs::OverlayFileSystem(llvm::vfs::getRealFileSystem()));
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto OverlayFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(
+          llvm::vfs::getRealFileSystem());
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   OverlayFileSystem->pushOverlay(InMemoryFileSystem);
   llvm::IntrusiveRefCntPtr<FileManager> Files(
       new FileManager(FileSystemOptions(), OverlayFileSystem));
@@ -341,10 +346,11 @@ TEST(ToolInvocation, DiagConsumerExpectingSourceManager) {
 }
 
 TEST(ToolInvocation, CC1Args) {
-  llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayFileSystem(
-      new llvm::vfs::OverlayFileSystem(llvm::vfs::getRealFileSystem()));
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto OverlayFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(
+          llvm::vfs::getRealFileSystem());
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   OverlayFileSystem->pushOverlay(InMemoryFileSystem);
   llvm::IntrusiveRefCntPtr<FileManager> Files(
       new FileManager(FileSystemOptions(), OverlayFileSystem));
@@ -361,10 +367,11 @@ TEST(ToolInvocation, CC1Args) {
 }
 
 TEST(ToolInvocation, CC1ArgsInvalid) {
-  llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayFileSystem(
-      new llvm::vfs::OverlayFileSystem(llvm::vfs::getRealFileSystem()));
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto OverlayFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(
+          llvm::vfs::getRealFileSystem());
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   OverlayFileSystem->pushOverlay(InMemoryFileSystem);
   llvm::IntrusiveRefCntPtr<FileManager> Files(
       new FileManager(FileSystemOptions(), OverlayFileSystem));
@@ -398,7 +405,7 @@ struct CommandLineExtractorTest : public ::testing::Test {
 
 public:
   CommandLineExtractorTest()
-      : InMemoryFS(new llvm::vfs::InMemoryFileSystem),
+      : InMemoryFS(llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>()),
         Diags(CompilerInstance::createDiagnostics(*InMemoryFS, DiagOpts)),
         Driver("clang", llvm::sys::getDefaultTargetTriple(), *Diags,
                "clang LLVM compiler", overlayRealFS(InMemoryFS)) {}
@@ -755,10 +762,11 @@ TEST(ClangToolTest, NoOutputCommands) {
 
 TEST(ClangToolTest, BaseVirtualFileSystemUsage) {
   FixedCompilationDatabase Compilations("/", std::vector<std::string>());
-  llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayFileSystem(
-      new llvm::vfs::OverlayFileSystem(llvm::vfs::getRealFileSystem()));
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto OverlayFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(
+          llvm::vfs::getRealFileSystem());
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   OverlayFileSystem->pushOverlay(InMemoryFileSystem);
 
   InMemoryFileSystem->addFile(
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index d4fb56e..89891fd 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -1401,14 +1401,12 @@ void Intrinsic::emitBodyAsBuiltinCall() {
       if (LocalCK == ClassB || (T.isHalf() && !T.isScalarForMangling())) {
         CastToType.makeInteger(8, true);
         Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
-      } else if (LocalCK == ClassI) {
-        if (CastToType.isInteger()) {
-          CastToType.makeSigned();
-          Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
-        }
+      } else if (LocalCK == ClassI &&
+                 (CastToType.isInteger() || CastToType.isPoly())) {
+        CastToType.makeSigned();
+        Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
       }
     }
-
     S += Arg + ", ";
   }