316 files changed, 6291 insertions, 3124 deletions
diff --git a/clang/cmake/caches/Release.cmake b/clang/cmake/caches/Release.cmake
index fb12dfc..a523cc5 100644
--- a/clang/cmake/caches/Release.cmake
+++ b/clang/cmake/caches/Release.cmake
@@ -102,7 +102,7 @@ if (LLVM_RELEASE_ENABLE_LTO)
   # FIXME: We can't use LLVM_ENABLE_LTO=Thin here, because it causes the CMake
   # step for the libcxx build to fail.  CMAKE_INTERPROCEDURAL_OPTIMIZATION does
   # enable ThinLTO, though.
-  set(RUNTIMES_CMAKE_ARGS "-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=ON -DLLVM_ENABLE_LLD=ON" CACHE STRING "")
+  set(RUNTIMES_CMAKE_ARGS "-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=ON -DLLVM_ENABLE_LLD=ON -DLLVM_ENABLE_FATLTO=ON" CACHE STRING "")
 endif()
 
 # Stage 1 Common Config
@@ -144,3 +144,7 @@ set_final_stage_var(CPACK_GENERATOR "TXZ" STRING)
 set_final_stage_var(CPACK_ARCHIVE_THREADS "0" STRING)
 
 set_final_stage_var(LLVM_USE_STATIC_ZSTD "ON" BOOL)
+if (LLVM_RELEASE_ENABLE_LTO)
+  set_final_stage_var(LLVM_ENABLE_FATLTO "ON" BOOL)
+  set_final_stage_var(CPACK_PRE_BUILD_SCRIPTS "${CMAKE_CURRENT_LIST_DIR}/release_cpack_pre_build_strip_lto.cmake" STRING)
+endif()
diff --git a/clang/cmake/caches/release_cpack_pre_build_strip_lto.cmake b/clang/cmake/caches/release_cpack_pre_build_strip_lto.cmake
new file mode 100644
index 0000000..743b64f
--- /dev/null
+++ b/clang/cmake/caches/release_cpack_pre_build_strip_lto.cmake
@@ -0,0 +1,5 @@
+file(GLOB files ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/lib/*.a)
+
+foreach(file ${files})
+  execute_process(COMMAND ${CPACK_TEMPORARY_INSTALL_DIRECTORY}/bin/llvm-strip --no-strip-all -R .llvm.lto ${file})
+endforeach()
diff --git a/clang/docs/InternalsManual.rst b/clang/docs/InternalsManual.rst
index 8a44db7..f33950b 100644
--- a/clang/docs/InternalsManual.rst
+++ b/clang/docs/InternalsManual.rst
@@ -312,7 +312,7 @@ Description:
   * number: A simple decimal number matches if the argument is the same as the
     number.  Example: ``"%plural{1:mouse|:mice}0"``
   * range: A range in square brackets matches if the argument is within the
-    range.  Then range is inclusive on both ends.  Example:
+    range.  The range is inclusive on both ends.  Example:
     ``"%plural{0:none|1:one|[2,5]:some|:many}0"``
   * modulo: A modulo operator is followed by a number, and equals sign and
     either a number or a range.  The tests are the same as for plain numbers
@@ -341,7 +341,7 @@ Example:
 Class:
   Integers
 Description:
-  This is a formatter which represents the argument number in a human readable
+  This is a formatter which represents the argument number in a human-readable
   format: the value ``123`` stays ``123``, ``12345`` becomes ``12.34k``,
   ``6666666` becomes ``6.67M``, and so on for 'G' and 'T'.
 
@@ -561,7 +561,7 @@ documentation for the ``-verify`` mode can be found at
 
 There are many other possible implementations of this interface, and this is
 why we prefer diagnostics to pass down rich structured information in
-arguments.  For example, an HTML output might want declaration names be
+arguments.  For example, an HTML output might want declaration names to be
 linkified to where they come from in the source.  Another example is that a GUI
 might let you click on typedefs to expand them.  This application would want to
 pass significantly more information about types through to the GUI than a
@@ -846,7 +846,7 @@ Option Marshalling Infrastructure
 The option marshalling infrastructure automates the parsing of the Clang
 ``-cc1`` frontend command line arguments into ``CompilerInvocation`` and their
 generation from ``CompilerInvocation``. The system replaces lots of repetitive
-C++ code with simple, declarative tablegen annotations and it's being used for
+C++ code with simple, declarative tablegen annotations and is being used for
 the majority of the ``-cc1`` command line interface. This section provides an
 overview of the system.
 
@@ -986,7 +986,7 @@ line.
     NegFlag<SetFalse, [], [], "Use the new pass manager in LLVM">,
     BothFlags<[], [ClangOption, CC1Option]>>;
 
-With most such pair of flags, the ``-cc1`` frontend accepts only the flag that
+With most such pairs of flags, the ``-cc1`` frontend accepts only the flag that
 changes the default key path value. The Clang driver is responsible for
 accepting both and either forwarding the changing flag or discarding the flag
 that would just set the key path to its default.
@@ -1042,8 +1042,8 @@ and the result is assigned to the key path on success.
 The key path defaults to the value specified in ``MarshallingInfoEnum`` prefixed
 by the contents of ``NormalizedValuesScope`` and ``::``. This ensures correct
 reference to an enum case is formed even if the enum resides in different
-namespace or is an enum class. If the value present on command line does not
-match any of the comma-separated values from ``Values``, an error diagnostics is
+namespace or is an enum class. If the value present on the command line does not
+match any of the comma-separated values from ``Values``, an error diagnostic is
 issued. Otherwise, the corresponding element from ``NormalizedValues`` at the
 same index is assigned to the key path (also correctly scoped). The number of
 comma-separated string values and elements of the array within
@@ -1111,7 +1111,7 @@ The Token class
 ---------------
 
 The ``Token`` class is used to represent a single lexed token.  Tokens are
-intended to be used by the lexer/preprocess and parser libraries, but are not
+intended to be used by the lexer/preprocessor and parser libraries, but are not
 intended to live beyond them (for example, they should not live in the ASTs).
 
 Tokens most often live on the stack (or some other location that is efficient
@@ -1253,7 +1253,7 @@ In order to do this, whenever the parser expects a ``tok::identifier`` or
 ``tok::coloncolon``, it should call the ``TryAnnotateTypeOrScopeToken`` or
 ``TryAnnotateCXXScopeToken`` methods to form the annotation token.  These
 methods will maximally form the specified annotation tokens and replace the
-current token with them, if applicable.  If the current tokens is not valid for
+current token with them, if applicable.  If the current token is not valid for
 an annotation token, it will remain an identifier or "``::``" token.
 
 .. _Lexer:
@@ -1276,7 +1276,7 @@ The lexer has a couple of interesting modal features:
   This mode is used for lexing within an "``#if 0``" block, for example.
 * The lexer can capture and return comments as tokens.  This is required to
   support the ``-C`` preprocessor mode, which passes comments through, and is
-  used by the diagnostic checker to identifier expect-error annotations.
+  used by the diagnostic checker to identify expect-error annotations.
 * The lexer can be in ``ParsingFilename`` mode, which happens when
   preprocessing after reading a ``#include`` directive.  This mode changes the
   parsing of "``<``" to return an "angled string" instead of a bunch of tokens
@@ -1308,7 +1308,7 @@ The ``TokenLexer`` class
 ------------------------
 
 The ``TokenLexer`` class is a token provider that returns tokens from a list of
-tokens that came from somewhere else.  It typically used for two things: 1)
+tokens that came from somewhere else.  It is typically used for two things: 1)
 returning tokens from a macro definition as it is being expanded 2) returning
 tokens from an arbitrary buffer of tokens.  The later use is used by
 ``_Pragma`` and will most likely be used to handle unbounded look-ahead for the
@@ -1509,7 +1509,7 @@ type checker must verify that the operand has a pointer type.  It would not be
 correct to check that with "``isa<PointerType>(SubExpr->getType())``", because
 this predicate would fail if the subexpression had a typedef type.
 
-The solution to this problem are a set of helper methods on ``Type``, used to
+The solution to this problem is a set of helper methods on ``Type``, used to
 check their properties.  In this case, it would be correct to use
 "``SubExpr->getType()->isPointerType()``" to do the check.  This predicate will
 return true if the *canonical type is a pointer*, which is true any time the
@@ -1632,7 +1632,7 @@ the names are inside the ``DeclarationName`` class).
 
 ``CXXLiteralOperatorName``
 
-  The name is a C++11 user defined literal operator.  User defined
+  The name is a C++11 user-defined literal operator.  User-defined
   Literal operators are named according to the suffix they define,
   e.g., "``_foo``" for "``operator "" _foo``".  Use
   ``N.getCXXLiteralIdentifier()`` to retrieve the corresponding
@@ -2215,7 +2215,7 @@ Consequently, we must either set the virtual flag for the definition (but then
 we create a malformed AST which the parser would never create), or we import
 the whole redeclaration chain of the function. The most recent version of the
 ``ASTImporter`` uses the latter mechanism. We do import all function
-declarations - regardless if they are definitions or prototypes - in the order
+declarations - regardless of whether they are definitions or prototypes - in the order
 as they appear in the "from" context.
 
 .. One definition
@@ -2338,7 +2338,7 @@ library receive an Error object, which they must check.
 During import of a specific declaration, it may happen that some AST nodes had
 already been created before we recognize an error. In this case, we signal back
 the error to the caller, but the "to" context remains polluted with those nodes
-which had been created. Ideally, those nodes should not had been created, but
+which had been created. Ideally, those nodes should not have been created, but
 that time we did not know about the error, the error happened later. Since the
 AST is immutable (most of the cases we can't remove existing nodes) we choose
 to mark these nodes as erroneous.
@@ -2579,7 +2579,7 @@ that there are global declarations which collide with declarations from other
 translation units, but they are not referenced outside from their translation
 unit. These declarations should be in an unnamed namespace ideally. If we treat
 these collisions liberally then CTU analysis can find more results. Note, the
-feature be able to choose between name conflict handling strategies is still an
+feature to be able to choose between name conflict handling strategies is still an
 ongoing work.
 
 .. _CFG:
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 4a5e184..4a2edae 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -134,6 +134,11 @@ Bug Fixes in This Version
 -------------------------
 - Fix a crash when marco name is empty in ``#pragma push_macro("")`` or
   ``#pragma pop_macro("")``. (#GH149762).
+- `-Wunreachable-code`` now diagnoses tautological or contradictory
+  comparisons such as ``x != 0 || x != 1.0`` and ``x == 0 && x == 1.0`` on
+  targets that treat ``_Float16``/``__fp16`` as native scalar types. Previously
+  the warning was silently lost because the operands differed only by an implicit
+  cast chain. (#GH149967).
 
 Bug Fixes to Compiler Builtins
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -149,6 +154,8 @@ Bug Fixes to C++ Support
 - Diagnose binding a reference to ``*nullptr`` during constant evaluation. (#GH48665)
 - Suppress ``-Wdeprecated-declarations`` in implicitly generated functions. (#GH147293)
 - Fix a crash when deleting a pointer to an incomplete array (#GH150359).
+- Fix an assertion failure when expression in assumption attribute
+  (``[[assume(expr)]]``) creates temporary objects.
 
 Bug Fixes to AST Handling
 ^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/docs/ShadowCallStack.rst b/clang/docs/ShadowCallStack.rst
index fc8bea8..9b104cc 100644
--- a/clang/docs/ShadowCallStack.rst
+++ b/clang/docs/ShadowCallStack.rst
@@ -61,7 +61,7 @@ The instrumentation makes use of the platform register ``x18`` on AArch64,
 ``x3`` (``gp``) on RISC-V with software shadow stack and ``ssp`` on RISC-V with
 hardware shadow stack, which needs `Zicfiss`_ and ``-fcf-protection=return``.
 Users can choose between the software and hardware based shadow stack
-implementation on RISC-V backend by passing ``-fsanitize=shadowcallstack``
+implementation on RISC-V backend by passing ``-fsanitize=shadow-call-stack``
 or ``Zicfiss`` with ``-fcf-protection=return``.
 For simplicity we will refer to this as the ``SCSReg``. On some platforms,
 ``SCSReg`` is reserved, and on others, it is designated as a scratch register.
diff --git a/clang/docs/analyzer/user-docs/CrossTranslationUnit.rst b/clang/docs/analyzer/user-docs/CrossTranslationUnit.rst
index 700dac0..a04b9f8 100644
--- a/clang/docs/analyzer/user-docs/CrossTranslationUnit.rst
+++ b/clang/docs/analyzer/user-docs/CrossTranslationUnit.rst
@@ -132,7 +132,7 @@ Once we have set up the `PATH` environment variable and we activated the python
 
 .. code-block:: bash
 
-  $ CodeChecker analyze --ctu compile_commands.json -o reports
+  $ CodeChecker analyze --ctu --ctu-ast-mode load-from-pch compile_commands.json -o reports
   $ ls -F
   compile_commands.json  foo.cpp  foo.cpp.ast  main.cpp  reports/
   $ tree reports
@@ -318,7 +318,7 @@ Once we have set up the `PATH` environment variable and we activated the python
 
 .. code-block:: bash
 
-  $ CodeChecker analyze --ctu --ctu-ast-loading-mode on-demand compile_commands.json -o reports
+  $ CodeChecker analyze --ctu compile_commands.json -o reports
   $ ls -F
   compile_commands.json  foo.cpp main.cpp  reports/
   $ tree reports
diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h
index 0273109..3b98274a 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -1818,7 +1818,7 @@ public:
         NumPositiveBits = std::max({NumPositiveBits, ActiveBits, 1u});
       } else {
         NumNegativeBits =
-            std::max(NumNegativeBits, (unsigned)InitVal.getSignificantBits());
+            std::max(NumNegativeBits, InitVal.getSignificantBits());
       }
 
       MembersRepresentableByInt &= isRepresentableIntegerValue(InitVal, IntTy);
diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 708c6e2..6c124aa 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -16,7 +16,6 @@
 #include "clang/AST/APNumericStorage.h"
 #include "clang/AST/APValue.h"
 #include "clang/AST/ASTVector.h"
-#include "clang/AST/Attr.h"
 #include "clang/AST/ComputeDependence.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclAccessPair.h"
@@ -58,6 +57,7 @@ namespace clang {
   class StringLiteral;
   class TargetInfo;
   class ValueDecl;
+  class WarnUnusedResultAttr;
 
 /// A simple array of base specifiers.
 typedef SmallVector<CXXBaseSpecifier*, 4> CXXCastPath;
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 224cb6a..b3ff45b 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -1642,7 +1642,7 @@ def DeviceKernel : DeclOrTypeAttr {
 }
 
 def SYCLKernelEntryPoint : InheritableAttr {
-  let Spellings = [Clang<"sycl_kernel_entry_point">];
+  let Spellings = [CXX11<"clang", "sycl_kernel_entry_point">];
   let Args = [
     // KernelName is required and specifies the kernel name type.
     TypeArgument<"KernelName">,
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 945e11b..bb3953e 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -651,6 +651,16 @@ TARGET_BUILTIN(__builtin_amdgcn_global_load_monitor_b128, "V4iV4i*1Ii", "nc", "g
 TARGET_BUILTIN(__builtin_amdgcn_flat_load_monitor_b32, "ii*0Ii", "nc", "gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_flat_load_monitor_b64, "V2iV2i*0Ii", "nc", "gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_flat_load_monitor_b128, "V4iV4i*0Ii", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_load_async_to_lds_b8, "vc*1c*3IiIi", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_load_async_to_lds_b32, "vi*1i*3IiIi", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_load_async_to_lds_b64, "vV2i*1V2i*3IiIi", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_load_async_to_lds_b128, "vV4i*1V4i*3IiIi", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_store_async_from_lds_b8, "vc*1c*3IiIi", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_store_async_from_lds_b32, "vi*1i*3IiIi", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_store_async_from_lds_b64, "vV2i*1V2i*3IiIi", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_store_async_from_lds_b128, "vV4i*1V4i*3IiIi", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_async_barrier_arrive_b64, "vLi*3", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_barrier_arrive_rtn_b64, "LiLi*3Li", "nc", "gfx1250-insts")
 
 TARGET_BUILTIN(__builtin_amdgcn_tensor_load_to_lds, "vV4iV8iV4iV4iIi", "nc", "gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_tensor_load_to_lds_d2, "vV4iV8iIi", "nc", "gfx1250-insts")
@@ -670,9 +680,6 @@ TARGET_BUILTIN(__builtin_amdgcn_ds_load_tr16_b128_v8i16, "V8sV8s*3", "nc", "gfx1
 TARGET_BUILTIN(__builtin_amdgcn_ds_load_tr16_b128_v8f16, "V8hV8h*3", "nc", "gfx1250-insts,wavefrontsize32")
 TARGET_BUILTIN(__builtin_amdgcn_ds_load_tr16_b128_v8bf16, "V8yV8y*3", "nc", "gfx1250-insts,wavefrontsize32")
 
-TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_async_barrier_arrive_b64, "vLi*3", "nc", "gfx1250-insts")
-TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_barrier_arrive_rtn_b64, "LiLi*3Li", "nc", "gfx1250-insts")
-
 TARGET_BUILTIN(__builtin_amdgcn_s_setprio_inc_wg, "vIs", "n", "setprio-inc-wg-inst")
 TARGET_BUILTIN(__builtin_amdgcn_s_monitor_sleep,  "vIs", "n", "gfx1250-insts")
 
@@ -690,10 +697,27 @@ TARGET_BUILTIN(__builtin_amdgcn_exp2_bf16, "yy", "nc", "bf16-trans-insts")
 TARGET_BUILTIN(__builtin_amdgcn_sin_bf16, "yy", "nc", "bf16-trans-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cos_bf16, "yy", "nc", "bf16-trans-insts")
 
+TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_pk_bf16_f32, "V2yffi", "nc", "bf16-cvt-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_pk_f16_f32, "V2hffi", "nc", "gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_fp8, "hiIi", "nc", "gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_bf8, "hiIi", "nc", "gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_fp8, "V2hs", "nc", "gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_f16_bf8, "V2hs", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f16, "sV2h", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_bf8_f16, "sV2h", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f16, "ihiUiIi", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf8_f16, "ihiUiIi", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk8_f16_fp8, "V8hV2UiUiIUi", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk8_bf16_fp8, "V8yV2UiUiIUi", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk8_f16_bf8, "V8hV2UiUiIUi", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk8_bf16_bf8, "V8yV2UiUiIUi", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk8_f16_fp4, "V8hUiUiIUi", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk8_bf16_fp4, "V8yUiUiIUi", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk8_f32_fp8, "V8fV2UiUiIUi", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk8_f32_bf8, "V8fV2UiUiIUi", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk8_f32_fp4, "V8fUiUiIUi", "nc", "gfx1250-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32_e5m3, "iffiIb", "nc", "fp8e5m3-insts")
+TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32_e5m3, "ifiiIi", "nc", "fp8e5m3-insts")
 TARGET_BUILTIN(__builtin_amdgcn_sat_pk4_i4_i8, "UsUi", "nc", "gfx1250-insts")
 TARGET_BUILTIN(__builtin_amdgcn_sat_pk4_u4_u8, "UsUi", "nc", "gfx1250-insts")
 
diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.td b/clang/include/clang/Basic/BuiltinsNVPTX.td
index 6e531ef..2d6fa17 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.td
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.td
@@ -21,13 +21,17 @@ class SM<string version, list<SMFeatures> newer_list> : SMFeatures {
                         !strconcat(f, "|", newer.Features));
 }
 
+let Features = "sm_121a" in def SM_121a : SMFeatures;
 let Features = "sm_120a" in def SM_120a : SMFeatures;
+let Features = "sm_103a" in def SM_103a : SMFeatures;
 let Features = "sm_101a" in def SM_101a : SMFeatures;
 let Features = "sm_100a" in def SM_100a : SMFeatures;
 let Features = "sm_90a" in def SM_90a : SMFeatures;
 
-def SM_120 : SM<"120", [SM_120a]>;
-def SM_101 : SM<"101", [SM_101a, SM_120]>;
+def SM_121 : SM<"121", [SM_121a]>;
+def SM_120 : SM<"120", [SM_120a, SM_121]>;
+def SM_103 : SM<"103", [SM_103a, SM_120]>;
+def SM_101 : SM<"101", [SM_101a, SM_103]>;
 def SM_100 : SM<"100", [SM_100a, SM_101]>;
 def SM_90 : SM<"90", [SM_90a, SM_100]>;
 def SM_89 : SM<"89", [SM_90]>;
@@ -50,8 +54,9 @@ class PTX<string version, PTXFeatures newer> : PTXFeatures {
   let Features = !strconcat("ptx", version, "|", newer.Features);
 }
 
-let Features = "ptx87" in def PTX87 : PTXFeatures;
+let Features = "ptx88" in def PTX88 : PTXFeatures;
 
+def PTX87 : PTX<"87", PTX88>;
 def PTX86 : PTX<"86", PTX87>;
 def PTX85 : PTX<"85", PTX86>;
 def PTX84 : PTX<"84", PTX85>;
diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h
index d6a22a7..81a792d 100644
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -47,9 +47,10 @@ enum class CudaVersion {
   CUDA_125,
   CUDA_126,
   CUDA_128,
-  FULLY_SUPPORTED = CUDA_123,
+  CUDA_129,
+  FULLY_SUPPORTED = CUDA_128,
   PARTIALLY_SUPPORTED =
-      CUDA_128, // Partially supported. Proceed with a warning.
+      CUDA_129, // Partially supported. Proceed with a warning.
   NEW = 10000,  // Too new. Issue a warning, but allow using it.
 };
 const char *CudaVersionToString(CudaVersion V);
diff --git a/clang/include/clang/Basic/CustomizableOptional.h b/clang/include/clang/Basic/CustomizableOptional.h
index 2d6ae6a..8559eaa 100644
--- a/clang/include/clang/Basic/CustomizableOptional.h
+++ b/clang/include/clang/Basic/CustomizableOptional.h
@@ -70,15 +70,6 @@ public:
 
   void reset() { Storage.reset(); }
 
-  LLVM_DEPRECATED("Use &*X instead.", "&*X")
-  constexpr const T *getPointer() const { return &Storage.value(); }
-  LLVM_DEPRECATED("Use &*X instead.", "&*X")
-  T *getPointer() { return &Storage.value(); }
-  LLVM_DEPRECATED("std::optional::value is throwing. Use *X instead", "*X")
-  constexpr const T &value() const & { return Storage.value(); }
-  LLVM_DEPRECATED("std::optional::value is throwing. Use *X instead", "*X")
-  T &value() & { return Storage.value(); }
-
   constexpr explicit operator bool() const { return has_value(); }
   constexpr bool has_value() const { return Storage.has_value(); }
   constexpr const T *operator->() const { return &Storage.value(); }
@@ -90,8 +81,6 @@ public:
     return has_value() ? operator*() : std::forward<U>(alt);
   }
 
-  LLVM_DEPRECATED("std::optional::value is throwing. Use *X instead", "*X")
-  T &&value() && { return std::move(Storage.value()); }
   T &&operator*() && { return std::move(Storage.value()); }
 
   template <typename U> T value_or(U &&alt) && {
diff --git a/clang/include/clang/Basic/DiagnosticASTKinds.td b/clang/include/clang/Basic/DiagnosticASTKinds.td
index 071a38f..46d04b0 100644
--- a/clang/include/clang/Basic/DiagnosticASTKinds.td
+++ b/clang/include/clang/Basic/DiagnosticASTKinds.td
@@ -511,6 +511,14 @@ def note_odr_number_of_bases : Note<
   "class has %0 base %plural{1:class|:classes}0">;
 def note_odr_enumerator : Note<"enumerator %0 with value %1 here">;
 def note_odr_missing_enumerator : Note<"no corresponding enumerator here">;
+def note_odr_incompatible_fixed_underlying_type : Note<
+  "enumeration %0 declared with incompatible fixed underlying types (%1 vs. "
+  "%2)">;
+def note_odr_fixed_underlying_type : Note<
+  "enumeration %0 has fixed underlying type here">;
+def note_odr_missing_fixed_underlying_type : Note<
+  "enumeration %0 missing fixed underlying type here">;
+
 def err_odr_field_type_inconsistent : Error<
   "field %0 declared with incompatible types in different "
   "translation units (%1 vs. %2)">;
diff --git a/clang/include/clang/Basic/DiagnosticIDs.h b/clang/include/clang/Basic/DiagnosticIDs.h
index f07a003..b21a3b6 100644
--- a/clang/include/clang/Basic/DiagnosticIDs.h
+++ b/clang/include/clang/Basic/DiagnosticIDs.h
@@ -272,6 +272,11 @@ public:
   DiagnosticIDs();
   ~DiagnosticIDs();
 
+  // Convenience method to construct a new refcounted DiagnosticIDs.
+  static llvm::IntrusiveRefCntPtr<DiagnosticIDs> create() {
+    return llvm::makeIntrusiveRefCnt<DiagnosticIDs>();
+  }
+
   /// Return an ID for a diagnostic with the specified format string and
   /// level.
   ///
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index da4216d..94b174c 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -1812,10 +1812,7 @@ def note_unsatisfied_trait_reason
            "%DeletedAssign{has a deleted %select{copy|move}1 "
            "assignment operator}|"
            "%UnionWithUserDeclaredSMF{is a union with a user-declared "
-           "%sub{select_special_member_kind}1}|"
-           "%FunctionType{is a function type}|"
-           "%CVVoidType{is a cv void type}|"
-           "%IncompleteArrayType{is an incomplete array type}"
+           "%sub{select_special_member_kind}1}"
            "}0">;
 
 def warn_consteval_if_always_true : Warning<
@@ -9331,8 +9328,28 @@ def err_atomic_builtin_pointer_size : Error<
   "address argument to atomic builtin must be a pointer to 1,2,4,8 or 16 byte "
   "type (%0 invalid)">;
 def err_atomic_exclusive_builtin_pointer_size : Error<
-  "address argument to load or store exclusive builtin must be a pointer to"
-  " 1,2,4 or 8 byte type (%0 invalid)">;
+  "address argument to load or store exclusive builtin must be a pointer to "
+  // Because the range of legal sizes for load/store exclusive varies with the
+  // Arm architecture version, this error message wants to be able to specify
+  // various different subsets of the sizes 1, 2, 4, 8. Rather than make a
+  // separate diagnostic for each subset, I've arranged here that _this_ error
+  // can display any combination of the sizes. For each size there are two
+  // %select parameters: the first chooses whether you need a "," or " or " to
+  // separate the number from a previous one (or neither), and the second
+  // parameter indicates whether to display the number itself.
+  //
+  // (The very first of these parameters isn't really necessary, since you
+  // never want to start with "," or " or " before the first number in the
+  // list, but it keeps it simple to make it look exactly like the other cases,
+  // and also allows a loop constructing this diagnostic to handle every case
+  // exactly the same.)
+  "%select{|,| or }1%select{|1}2"
+  "%select{|,| or }3%select{|2}4"
+  "%select{|,| or }5%select{|4}6"
+  "%select{|,| or }7%select{|8}8"
+  " byte type (%0 invalid)">;
+def err_atomic_exclusive_builtin_pointer_size_none : Error<
+  "load and store exclusive builtins are not available on this architecture">;
 def err_atomic_builtin_ext_int_size : Error<
   "atomic memory operand must have a power-of-two size">;
 def err_atomic_builtin_bit_int_prohibit : Error<
@@ -12919,31 +12936,29 @@ def err_sycl_special_type_num_init_method : Error<
 
 // SYCL kernel entry point diagnostics
 def err_sycl_entry_point_invalid : Error<
-  "'sycl_kernel_entry_point' attribute cannot be applied to a"
+  "the %0 attribute cannot be applied to a"
   " %select{non-static member function|variadic function|deleted function|"
            "defaulted function|constexpr function|consteval function|"
            "function declared with the 'noreturn' attribute|coroutine|"
-           "function defined with a function try block}0">;
+           "function defined with a function try block}1">;
 def err_sycl_entry_point_invalid_redeclaration : Error<
-  "'sycl_kernel_entry_point' kernel name argument does not match prior"
-  " declaration%diff{: $ vs $|}0,1">;
+  "the %0 kernel name argument does not match prior"
+  " declaration%diff{: $ vs $|}1,2">;
 def err_sycl_kernel_name_conflict : Error<
-  "'sycl_kernel_entry_point' kernel name argument conflicts with a previous"
-  " declaration">;
+  "the %0 kernel name argument conflicts with a previous declaration">;
 def warn_sycl_kernel_name_not_a_class_type : Warning<
   "%0 is not a valid SYCL kernel name type; a non-union class type is required">,
   InGroup<DiagGroup<"nonportable-sycl">>, DefaultError;
 def warn_sycl_entry_point_redundant_declaration : Warning<
-  "redundant 'sycl_kernel_entry_point' attribute">, InGroup<RedundantAttribute>;
+  "redundant %0 attribute">, InGroup<RedundantAttribute>;
 def err_sycl_entry_point_after_definition : Error<
-  "'sycl_kernel_entry_point' attribute cannot be added to a function after the"
-  " function is defined">;
+  "the %0 attribute cannot be added to a function after the function is"
+  " defined">;
 def err_sycl_entry_point_return_type : Error<
-  "'sycl_kernel_entry_point' attribute only applies to functions with a"
-  " 'void' return type">;
+  "the %0 attribute only applies to functions with a 'void' return type">;
 def err_sycl_entry_point_deduced_return_type : Error<
-  "'sycl_kernel_entry_point' attribute only applies to functions with a"
-  " non-deduced 'void' return type">;
+  "the %0 attribute only applies to functions with a non-deduced 'void' return"
+  " type">;
 
 def warn_cuda_maxclusterrank_sm_90 : Warning<
   "maxclusterrank requires sm_90 or higher, CUDA arch provided: %0, ignoring "
diff --git a/clang/include/clang/Basic/FileManager.h b/clang/include/clang/Basic/FileManager.h
index e83a61d..337911e 100644
--- a/clang/include/clang/Basic/FileManager.h
+++ b/clang/include/clang/Basic/FileManager.h
@@ -237,11 +237,6 @@ public:
   FileEntryRef getVirtualFileRef(StringRef Filename, off_t Size,
                                  time_t ModificationTime);
 
-  LLVM_DEPRECATED("Functions returning FileEntry are deprecated.",
-                  "getVirtualFileRef()")
-  const FileEntry *getVirtualFile(StringRef Filename, off_t Size,
-                                  time_t ModificationTime);
-
   /// Retrieve a FileEntry that bypasses VFE, which is expected to be a virtual
   /// file entry, to access the real file.  The returned FileEntry will have
   /// the same filename as FE but a different identity and its own stat.
diff --git a/clang/include/clang/Basic/OffloadArch.h b/clang/include/clang/Basic/OffloadArch.h
index 4dda3ec..387a684 100644
--- a/clang/include/clang/Basic/OffloadArch.h
+++ b/clang/include/clang/Basic/OffloadArch.h
@@ -45,8 +45,12 @@ enum class OffloadArch {
   SM_100a,
   SM_101,
   SM_101a,
+  SM_103,
+  SM_103a,
   SM_120,
   SM_120a,
+  SM_121,
+  SM_121a,
   GFX600,
   GFX601,
   GFX602,
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index abfbdfa..ce4677e 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -1071,6 +1071,17 @@ public:
   /// as Custom Datapath.
   uint32_t getARMCDECoprocMask() const { return ARMCDECoprocMask; }
 
+  /// For ARM targets returns a mask defining which data sizes are suitable for
+  /// __builtin_arm_ldrex and __builtin_arm_strex.
+  enum {
+    ARM_LDREX_B = (1 << 0), /// byte (8-bit)
+    ARM_LDREX_H = (1 << 1), /// half (16-bit)
+    ARM_LDREX_W = (1 << 2), /// word (32-bit)
+    ARM_LDREX_D = (1 << 3), /// double (64-bit)
+  };
+
+  virtual unsigned getARMLDREXMask() const { return 0; }
+
   /// Returns whether the passed in string is a valid clobber in an
   /// inline asm statement.
   ///
diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
index 5c04d59..b26e558 100644
--- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
+++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
@@ -75,6 +75,12 @@ public:
     return getConstant(loc, cir::IntAttr::get(ty, value));
   }
 
+  mlir::Value getSignedInt(mlir::Location loc, int64_t val, unsigned numBits) {
+    auto type = cir::IntType::get(getContext(), numBits, /*isSigned=*/true);
+    return getConstAPInt(loc, type,
+                         llvm::APInt(numBits, val, /*isSigned=*/true));
+  }
+
   mlir::Value getUnsignedInt(mlir::Location loc, uint64_t val,
                              unsigned numBits) {
     auto type = cir::IntType::get(getContext(), numBits, /*isSigned=*/false);
@@ -441,6 +447,10 @@ public:
     return create<cir::CmpOp>(loc, getBoolTy(), kind, lhs, rhs);
   }
 
+  mlir::Value createIsNaN(mlir::Location loc, mlir::Value operand) {
+    return createCompare(loc, cir::CmpOpKind::ne, operand, operand);
+  }
+
   mlir::Value createShift(mlir::Location loc, mlir::Value lhs, mlir::Value rhs,
                           bool isShiftLeft) {
     return create<cir::ShiftOp>(loc, lhs.getType(), lhs, rhs, isShiftLeft);
diff --git a/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td b/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td
index 9961544..588fb0d 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td
@@ -42,7 +42,7 @@ class CIR_TypedAttr<string name, string attrMnemonic, list<Trait> traits = []>
   let assemblyFormat = [{}];
 }
 
-class CIRUnitAttr<string name, string attrMnemonic, list<Trait> traits = []>
+class CIR_UnitAttr<string name, string attrMnemonic, list<Trait> traits = []>
     : CIR_Attr<name, attrMnemonic, traits> {
   let returnType = "bool";
   let defaultValue = "false";
@@ -127,7 +127,7 @@ def CIR_BoolAttr : CIR_Attr<"Bool", "bool", [TypedAttrInterface]> {
 // ZeroAttr
 //===----------------------------------------------------------------------===//
 
-def ZeroAttr : CIR_TypedAttr<"Zero", "zero"> {
+def CIR_ZeroAttr : CIR_TypedAttr<"Zero", "zero"> {
   let summary = "Attribute to represent zero initialization";
   let description = [{
     The ZeroAttr is used to indicate zero initialization on structs.
@@ -138,7 +138,7 @@ def ZeroAttr : CIR_TypedAttr<"Zero", "zero"> {
 // UndefAttr
 //===----------------------------------------------------------------------===//
 
-def UndefAttr : CIR_TypedAttr<"Undef", "undef"> {
+def CIR_UndefAttr : CIR_TypedAttr<"Undef", "undef"> {
   let summary = "Represent an undef constant";
   let description = [{
     The UndefAttr represents an undef constant, corresponding to LLVM's notion
@@ -264,7 +264,9 @@ def CIR_FPAttr : CIR_Attr<"FP", "fp", [TypedAttrInterface]> {
 // ConstArrayAttr
 //===----------------------------------------------------------------------===//
 
-def ConstArrayAttr : CIR_Attr<"ConstArray", "const_array", [TypedAttrInterface]> {
+def CIR_ConstArrayAttr : CIR_Attr<"ConstArray", "const_array", [
+  TypedAttrInterface
+]> {
   let summary = "A constant array from ArrayAttr or StringRefAttr";
   let description = [{
     An CIR array attribute is an array of literals of the specified attr types.
@@ -310,8 +312,9 @@ def ConstArrayAttr : CIR_Attr<"ConstArray", "const_array", [TypedAttrInterface]>
 // ConstVectorAttr
 //===----------------------------------------------------------------------===//
 
-def ConstVectorAttr : CIR_Attr<"ConstVector", "const_vector",
-                               [TypedAttrInterface]> {
+def CIR_ConstVectorAttr : CIR_Attr<"ConstVector", "const_vector", [
+  TypedAttrInterface
+]> {
   let summary = "A constant vector from ArrayAttr";
   let description = [{
     A CIR vector attribute is an array of literals of the specified attribute
@@ -342,7 +345,7 @@ def ConstVectorAttr : CIR_Attr<"ConstVector", "const_vector",
 // ConstPtrAttr
 //===----------------------------------------------------------------------===//
 
-def ConstPtrAttr : CIR_Attr<"ConstPtr", "ptr", [TypedAttrInterface]> {
+def CIR_ConstPtrAttr : CIR_Attr<"ConstPtr", "ptr", [TypedAttrInterface]> {
   let summary = "Holds a constant pointer value";
   let parameters = (ins
     AttributeSelfTypeParameter<"", "::cir::PointerType">:$type,
@@ -371,8 +374,9 @@ def ConstPtrAttr : CIR_Attr<"ConstPtr", "ptr", [TypedAttrInterface]> {
 // ConstComplexAttr
 //===----------------------------------------------------------------------===//
 
-def ConstComplexAttr : CIR_Attr<"ConstComplex", "const_complex",
-                                [TypedAttrInterface]> {
+def CIR_ConstComplexAttr : CIR_Attr<"ConstComplex", "const_complex", [
+  TypedAttrInterface
+]> {
   let summary = "An attribute that contains a constant complex value";
   let description = [{
     The `#cir.const_complex` attribute contains a constant value of complex
@@ -454,7 +458,7 @@ def CIR_VisibilityAttr : CIR_EnumAttr<CIR_VisibilityKind, "visibility"> {
 // BitfieldInfoAttr
 //===----------------------------------------------------------------------===//
 
-def BitfieldInfoAttr : CIR_Attr<"BitfieldInfo", "bitfield_info"> {
+def CIR_BitfieldInfoAttr : CIR_Attr<"BitfieldInfo", "bitfield_info"> {
   let summary = "Represents info for a bit-field member";
   let description = [{
     Holds the following information about bitfields: name, storage type, size
@@ -512,5 +516,4 @@ def BitfieldInfoAttr : CIR_Attr<"BitfieldInfo", "bitfield_info"> {
   ];
 }
 
-
 #endif // CLANG_CIR_DIALECT_IR_CIRATTRS_TD
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index e2ddbd1..5ef5b60 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -607,8 +607,8 @@ def CIR_ConditionOp : CIR_Op<"condition", [
 //===----------------------------------------------------------------------===//
 
 defvar CIR_YieldableScopes = [
-  "ArrayCtor", "CaseOp", "DoWhileOp", "ForOp", "IfOp", "ScopeOp", "SwitchOp",
-  "TernaryOp", "WhileOp"
+  "ArrayCtor", "ArrayDtor", "CaseOp", "DoWhileOp", "ForOp", "IfOp", "ScopeOp",
+  "SwitchOp", "TernaryOp", "WhileOp"
 ];
 
 def CIR_YieldOp : CIR_Op<"yield", [
@@ -1747,7 +1747,7 @@ def CIR_SetBitfieldOp : CIR_Op<"set_bitfield"> {
   let arguments = (ins
     Arg<CIR_PointerType, "the address to store the value", [MemWrite]>:$addr,
     CIR_AnyType:$src,
-    BitfieldInfoAttr:$bitfield_info,
+    CIR_BitfieldInfoAttr:$bitfield_info,
     DefaultValuedOptionalAttr<I64Attr, "0">:$alignment,
     UnitAttr:$is_volatile
   );
@@ -1834,7 +1834,7 @@ def CIR_GetBitfieldOp : CIR_Op<"get_bitfield"> {
 
   let arguments = (ins
     Arg<CIR_PointerType, "the address to load from", [MemRead]>:$addr,
-    BitfieldInfoAttr:$bitfield_info,
+    CIR_BitfieldInfoAttr:$bitfield_info,
     DefaultValuedOptionalAttr<I64Attr, "0">:$alignment,
     UnitAttr:$is_volatile
     );
@@ -1946,6 +1946,10 @@ def CIR_FuncOp : CIR_Op<"func", [
     The function linkage information is specified by `linkage`, as defined by
     `GlobalLinkageKind` attribute.
 
+    The `no_proto` keyword is used to identify functions that were declared
+    without a prototype and, consequently, may contain calls with invalid
+    arguments and undefined behavior.
+
     Example:
 
     ```mlir
@@ -1964,6 +1968,7 @@ def CIR_FuncOp : CIR_Op<"func", [
   let arguments = (ins SymbolNameAttr:$sym_name,
                        CIR_VisibilityAttr:$global_visibility,
                        TypeAttrOf<CIR_FuncType>:$function_type,
+                       UnitAttr:$no_proto,
                        UnitAttr:$dso_local,
                        DefaultValuedAttr<CIR_GlobalLinkageKind,
                                          "cir::GlobalLinkageKind::ExternalLinkage">:$linkage,
@@ -2005,13 +2010,6 @@ def CIR_FuncOp : CIR_Op<"func", [
        return getFunctionType().getReturnTypes();
     }
 
-    // TODO(cir): this should be an operand attribute, but for now we just hard-
-    // wire this as a function. Will later add a $no_proto argument to this op.
-    bool getNoProto() {
-      assert(!cir::MissingFeatures::opFuncNoProto());
-      return false;
-    }
-
     //===------------------------------------------------------------------===//
     // SymbolOpInterface Methods
     //===------------------------------------------------------------------===//
@@ -2229,7 +2227,7 @@ def CIR_TrapOp : CIR_Op<"trap", [Terminator]> {
 }
 
 //===----------------------------------------------------------------------===//
-// ArrayCtor
+// ArrayCtor & ArrayDtor
 //===----------------------------------------------------------------------===//
 
 class CIR_ArrayInitDestroy<string mnemonic> : CIR_Op<mnemonic> {
@@ -2260,7 +2258,9 @@ def CIR_ArrayCtor : CIR_ArrayInitDestroy<"array.ctor"> {
   let description = [{
     Initialize each array element using the same C++ constructor. This
     operation has one region, with one single block. The block has an
-    incoming argument for the current array index to initialize.
+    incoming argument for the current array element to initialize.
+
+    Example:
 
     ```mlir
     cir.array.ctor(%0 : !cir.ptr<!cir.array<!rec_S x 42>>) {
@@ -2272,6 +2272,25 @@ def CIR_ArrayCtor : CIR_ArrayInitDestroy<"array.ctor"> {
   }];
 }
 
+def CIR_ArrayDtor : CIR_ArrayInitDestroy<"array.dtor"> {
+  let summary = "Destroy array elements with C++ dtors";
+  let description = [{
+    Destroy each array element using the same C++ destructor. This
+    operation has one region, with one single block. The block has an
+    incoming argument for the current array element to destruct.
+
+    Example:
+
+    ```mlir
+    cir.array.dtor(%0 : !cir.ptr<!cir.array<!rec_S x 42>>) {
+      ^bb0(%arg0: !cir.ptr<!rec_S>):
+        cir.call @some_dtor(%arg0) : (!cir.ptr<!rec_S>) -> ()
+        cir.yield
+    }
+    ```
+  }];
+}
+
 //===----------------------------------------------------------------------===//
 // VecCreate
 //===----------------------------------------------------------------------===//
@@ -2804,6 +2823,53 @@ def CIR_ComplexSubOp : CIR_Op<"complex.sub", [
   }];
 }
 
+//===----------------------------------------------------------------------===//
+// ComplexMulOp
+//===----------------------------------------------------------------------===//
+
+def CIR_ComplexRangeKind : CIR_I32EnumAttr<
+  "ComplexRangeKind", "complex multiplication and division implementation", [
+    I32EnumAttrCase<"Full", 0, "full">,
+    I32EnumAttrCase<"Improved", 1, "improved">,
+    I32EnumAttrCase<"Promoted", 2, "promoted">,
+    I32EnumAttrCase<"Basic", 3, "basic">,
+]>;
+
+def CIR_ComplexMulOp : CIR_Op<"complex.mul", [
+  Pure, SameOperandsAndResultType
+]> {
+  let summary = "Complex multiplication";
+  let description = [{
+    The `cir.complex.mul` operation takes two complex numbers and returns
+    their product.
+
+    Range is used to select the implementation used when the operation
+    is lowered to the LLVM dialect. For multiplication, 'improved',
+    'promoted', and 'basic' are all handled equivalently, producing the
+    algebraic formula with no special handling for NaN value. If 'full' is
+    used, a runtime-library function is called if one of the intermediate
+    calculations produced a NaN value.
+
+    Example:
+
+    ```mlir
+    %2 = cir.complex.mul %0, %1 range(basic) : !cir.complex<!cir.float>
+    %2 = cir.complex.mul %0, %1 range(full) : !cir.complex<!cir.float>
+    ```
+  }];
+
+  let arguments = (ins
+    CIR_ComplexType:$lhs,
+    CIR_ComplexType:$rhs,
+    CIR_ComplexRangeKind:$range
+  );
+
+  let results = (outs CIR_ComplexType:$result);
+
+  let assemblyFormat = [{
+    $lhs `,` $rhs `range` `(` $range `)` `:` qualified(type($result)) attr-dict
+  }];
+}
 
 //===----------------------------------------------------------------------===//
 // Bit Manipulation Operations
@@ -2911,6 +2977,28 @@ def CIR_BitCtzOp : CIR_BitZeroCountOpBase<"ctz",
   }];
 }
 
+def CIR_BitFfsOp : CIR_BitOpBase<"ffs", CIR_SIntOfWidths<[32, 64]>> {
+  let summary = "Get the position of the least significant 1-bit in input";
+  let description = [{
+    Compute the 1-based position of the least significant 1-bit of the input.
+
+    The input integer must be a signed integer. The `cir.ffs` operation returns
+    one plus the index of the least significant 1-bit of the input signed
+    integer. If the input integer is 0, `cir.ffs` yields 0.
+
+    Example:
+
+    ```mlir
+    !s32i = !cir.int<s, 32>
+
+    // %0 = 0x0010_1000
+    %0 = cir.const #cir.int<40> : !s32i
+    // #1 will be 4 since the 4th least significant bit is 1.
+    %1 = cir.ffs %0 : !s32i
+    ```
+  }];
+}
+
 def CIR_BitParityOp : CIR_BitOpBase<"parity", CIR_UIntOfWidths<[32, 64]>> {
   let summary = "Get the parity of input";
   let description = [{
diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h
index e1a5c3d..adc7b5f 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -73,14 +73,16 @@ struct MissingFeatures {
   // FuncOp handling
   static bool opFuncOpenCLKernelMetadata() { return false; }
   static bool opFuncAstDeclAttr() { return false; }
+  static bool opFuncAttributesForDefinition() { return false; }
   static bool opFuncCallingConv() { return false; }
-  static bool opFuncExtraAttrs() { return false; }
-  static bool opFuncNoProto() { return false; }
   static bool opFuncCPUAndFeaturesAttributes() { return false; }
-  static bool opFuncSection() { return false; }
-  static bool opFuncMultipleReturnVals() { return false; }
-  static bool opFuncAttributesForDefinition() { return false; }
+  static bool opFuncExceptions() { return false; }
+  static bool opFuncExtraAttrs() { return false; }
   static bool opFuncMaybeHandleStaticInExternC() { return false; }
+  static bool opFuncMultipleReturnVals() { return false; }
+  static bool opFuncOperandBundles() { return false; }
+  static bool opFuncParameterAttributes() { return false; }
+  static bool opFuncSection() { return false; }
   static bool setLLVMFunctionFEnvAttributes() { return false; }
   static bool setFunctionAttributes() { return false; }
 
@@ -96,7 +98,6 @@ struct MissingFeatures {
   static bool opCallReturn() { return false; }
   static bool opCallArgEvaluationOrder() { return false; }
   static bool opCallCallConv() { return false; }
-  static bool opCallNoPrototypeFunc() { return false; }
   static bool opCallMustTail() { return false; }
   static bool opCallVirtual() { return false; }
   static bool opCallInAlloca() { return false; }
@@ -109,6 +110,7 @@ struct MissingFeatures {
   static bool opCallCIRGenFuncInfoExtParamInfo() { return false; }
   static bool opCallLandingPad() { return false; }
   static bool opCallContinueBlock() { return false; }
+  static bool opCallChain() { return false; }
 
   // CXXNewExpr
   static bool exprNewNullCheck() { return false; }
@@ -215,6 +217,7 @@ struct MissingFeatures {
   static bool intrinsics() { return false; }
   static bool isMemcpyEquivalentSpecialMember() { return false; }
   static bool isTrivialCtorOrDtor() { return false; }
+  static bool lambdaCaptures() { return false; }
   static bool lambdaFieldToName() { return false; }
   static bool loopInfoStack() { return false; }
   static bool lowerAggregateLoadStore() { return false; }
@@ -224,7 +227,6 @@ struct MissingFeatures {
   static bool moduleNameHash() { return false; }
   static bool msabi() { return false; }
   static bool needsGlobalCtorDtor() { return false; }
-  static bool nonFineGrainedBitfields() { return false; }
   static bool objCBlocks() { return false; }
   static bool objCGC() { return false; }
   static bool objCLifetime() { return false; }
diff --git a/clang/include/clang/Driver/CudaInstallationDetector.h b/clang/include/clang/Driver/CudaInstallationDetector.h
new file mode 100644
index 0000000..0fecbfe
--- /dev/null
+++ b/clang/include/clang/Driver/CudaInstallationDetector.h
@@ -0,0 +1,76 @@
+//===-- CudaInstallationDetector.h - Cuda Instalation Detector --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_DRIVER_CUDAINSTALLATIONDETECTOR_H
+#define LLVM_CLANG_DRIVER_CUDAINSTALLATIONDETECTOR_H
+
+#include "clang/Basic/Cuda.h"
+#include "clang/Driver/Driver.h"
+#include <bitset>
+
+namespace clang {
+namespace driver {
+
+/// A class to find a viable CUDA installation
+class CudaInstallationDetector {
+private:
+  const Driver &D;
+  bool IsValid = false;
+  CudaVersion Version = CudaVersion::UNKNOWN;
+  std::string InstallPath;
+  std::string BinPath;
+  std::string LibDevicePath;
+  std::string IncludePath;
+  llvm::StringMap<std::string> LibDeviceMap;
+
+  // CUDA architectures for which we have raised an error in
+  // CheckCudaVersionSupportsArch.
+  mutable std::bitset<(int)OffloadArch::LAST> ArchsWithBadVersion;
+
+public:
+  CudaInstallationDetector(const Driver &D, const llvm::Triple &HostTriple,
+                           const llvm::opt::ArgList &Args);
+
+  void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs,
+                          llvm::opt::ArgStringList &CC1Args) const;
+
+  /// Emit an error if Version does not support the given Arch.
+  ///
+  /// If either Version or Arch is unknown, does not emit an error.  Emits at
+  /// most one error per Arch.
+  void CheckCudaVersionSupportsArch(OffloadArch Arch) const;
+
+  /// Check whether we detected a valid Cuda install.
+  bool isValid() const { return IsValid; }
+  /// Print information about the detected CUDA installation.
+  void print(raw_ostream &OS) const;
+
+  /// Get the detected Cuda install's version.
+  CudaVersion version() const {
+    return Version == CudaVersion::NEW ? CudaVersion::PARTIALLY_SUPPORTED
+                                       : Version;
+  }
+  /// Get the detected Cuda installation path.
+  StringRef getInstallPath() const { return InstallPath; }
+  /// Get the detected path to Cuda's bin directory.
+  StringRef getBinPath() const { return BinPath; }
+  /// Get the detected Cuda Include path.
+  StringRef getIncludePath() const { return IncludePath; }
+  /// Get the detected Cuda device library path.
+  StringRef getLibDevicePath() const { return LibDevicePath; }
+  /// Get libdevice file for given architecture
+  std::string getLibDeviceFile(StringRef Gpu) const {
+    return LibDeviceMap.lookup(Gpu);
+  }
+  void WarnIfUnsupportedVersion() const;
+};
+
+} // namespace driver
+} // namespace clang
+
+#endif // LLVM_CLANG_DRIVER_CUDAINSTALLATIONDETECTOR_H
diff --git a/clang/lib/Driver/ToolChains/LazyDetector.h b/clang/include/clang/Driver/LazyDetector.h
index 813d00a..5bd1d01 100644
--- a/clang/lib/Driver/ToolChains/LazyDetector.h
+++ b/clang/include/clang/Driver/LazyDetector.h
@@ -6,8 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_LAZYDETECTOR_H
-#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_LAZYDETECTOR_H
+#ifndef LLVM_CLANG_DRIVER_LAZYDETECTOR_H
+#define LLVM_CLANG_DRIVER_LAZYDETECTOR_H
 
 #include "clang/Driver/Tool.h"
 #include "clang/Driver/ToolChain.h"
@@ -42,4 +42,4 @@ public:
 
 } // end namespace clang
 
-#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_LAZYDETECTOR_H
+#endif // LLVM_CLANG_DRIVER_LAZYDETECTOR_H
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index eb53821..3c04aeb 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -5312,6 +5312,8 @@ def mextended_const : Flag<["-"], "mextended-const">, Group<m_wasm_Features_Grou
 def mno_extended_const : Flag<["-"], "mno-extended-const">, Group<m_wasm_Features_Group>;
 def mfp16 : Flag<["-"], "mfp16">, Group<m_wasm_Features_Group>;
 def mno_fp16 : Flag<["-"], "mno-fp16">, Group<m_wasm_Features_Group>;
+def mgc : Flag<["-"], "mgc">, Group<m_wasm_Features_Group>;
+def mno_gc : Flag<["-"], "mno-gc">, Group<m_wasm_Features_Group>;
 def mmultimemory : Flag<["-"], "mmultimemory">, Group<m_wasm_Features_Group>;
 def mno_multimemory : Flag<["-"], "mno-multimemory">, Group<m_wasm_Features_Group>;
 def mmultivalue : Flag<["-"], "mmultivalue">, Group<m_wasm_Features_Group>;
diff --git a/clang/lib/Driver/ToolChains/ROCm.h b/clang/include/clang/Driver/RocmInstallationDetector.h
index ebd5443..3a325a6 100644
--- a/clang/lib/Driver/ToolChains/ROCm.h
+++ b/clang/include/clang/Driver/RocmInstallationDetector.h
@@ -1,4 +1,4 @@
-//===--- ROCm.h - ROCm installation detector --------------------*- C++ -*-===//
+//===-- RocmInstallationDetector.h - ROCm Instalation Detector --*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -6,21 +6,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ROCM_H
-#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ROCM_H
+#ifndef LLVM_CLANG_DRIVER_ROCMINSTALLATIONDETECTOR_H
+#define LLVM_CLANG_DRIVER_ROCMINSTALLATIONDETECTOR_H
 
-#include "clang/Basic/Cuda.h"
-#include "clang/Basic/LLVM.h"
-#include "clang/Driver/CommonArgs.h"
 #include "clang/Driver/Driver.h"
-#include "clang/Driver/Options.h"
-#include "clang/Driver/SanitizerArgs.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/Option/ArgList.h"
-#include "llvm/Support/VersionTuple.h"
-#include "llvm/TargetParser/TargetParser.h"
-#include "llvm/TargetParser/Triple.h"
 
 namespace clang {
 namespace driver {
@@ -308,7 +297,7 @@ public:
   StringRef getHIPVersion() const { return DetectedVersion; }
 };
 
-} // end namespace driver
-} // end namespace clang
+} // namespace driver
+} // namespace clang
 
-#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ROCM_H
+#endif // LLVM_CLANG_DRIVER_ROCMINSTALLATIONDETECTOR_H
diff --git a/clang/include/clang/Driver/SyclInstallationDetector.h b/clang/include/clang/Driver/SyclInstallationDetector.h
new file mode 100644
index 0000000..6925ec2
--- /dev/null
+++ b/clang/include/clang/Driver/SyclInstallationDetector.h
@@ -0,0 +1,29 @@
+//===-- SyclInstallationDetector.h - SYCL Instalation Detector --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_DRIVER_SYCLINSTALLATIONDETECTOR_H
+#define LLVM_CLANG_DRIVER_SYCLINSTALLATIONDETECTOR_H
+
+#include "clang/Driver/Driver.h"
+
+namespace clang {
+namespace driver {
+
+class SYCLInstallationDetector {
+public:
+  SYCLInstallationDetector(const Driver &D, const llvm::Triple &HostTriple,
+                           const llvm::opt::ArgList &Args);
+
+  void addSYCLIncludeArgs(const llvm::opt::ArgList &DriverArgs,
+                          llvm::opt::ArgStringList &CC1Args) const;
+};
+
+} // namespace driver
+} // namespace clang
+
+#endif // LLVM_CLANG_DRIVER_SYCLINSTALLATIONDETECTOR_H
diff --git a/clang/include/clang/Frontend/ASTUnit.h b/clang/include/clang/Frontend/ASTUnit.h
index 1286fe4..7dd9aef 100644
--- a/clang/include/clang/Frontend/ASTUnit.h
+++ b/clang/include/clang/Frontend/ASTUnit.h
@@ -445,6 +445,9 @@ public:
 
   const DiagnosticsEngine &getDiagnostics() const { return *Diagnostics; }
   DiagnosticsEngine &getDiagnostics() { return *Diagnostics; }
+  llvm::IntrusiveRefCntPtr<DiagnosticsEngine> getDiagnosticsPtr() {
+    return Diagnostics;
+  }
 
   const SourceManager &getSourceManager() const { return *SourceMgr; }
   SourceManager &getSourceManager() { return *SourceMgr; }
@@ -918,8 +921,9 @@ public:
                     bool IncludeCodePatterns, bool IncludeBriefComments,
                     CodeCompleteConsumer &Consumer,
                     std::shared_ptr<PCHContainerOperations> PCHContainerOps,
-                    DiagnosticsEngine &Diag, LangOptions &LangOpts,
-                    SourceManager &SourceMgr, FileManager &FileMgr,
+                    llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diag,
+                    LangOptions &LangOpts, SourceManager &SourceMgr,
+                    FileManager &FileMgr,
                     SmallVectorImpl<StoredDiagnostic> &StoredDiagnostics,
                     SmallVectorImpl<const llvm::MemoryBuffer *> &OwnedBuffers,
                     std::unique_ptr<SyntaxOnlyAction> Act);
diff --git a/clang/include/clang/Frontend/CompilerInstance.h b/clang/include/clang/Frontend/CompilerInstance.h
index 2408367..a24decd 100644
--- a/clang/include/clang/Frontend/CompilerInstance.h
+++ b/clang/include/clang/Frontend/CompilerInstance.h
@@ -361,7 +361,7 @@ public:
   }
 
   /// setDiagnostics - Replace the current diagnostics engine.
-  void setDiagnostics(DiagnosticsEngine *Value);
+  void setDiagnostics(llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Value);
 
   DiagnosticConsumer &getDiagnosticClient() const {
     assert(Diagnostics && Diagnostics->getClient() &&
@@ -420,6 +420,8 @@ public:
   /// @{
 
   llvm::vfs::FileSystem &getVirtualFileSystem() const;
+  llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>
+  getVirtualFileSystemPtr() const;
 
   /// @}
   /// @name File Manager
diff --git a/clang/include/clang/Frontend/FrontendAction.h b/clang/include/clang/Frontend/FrontendAction.h
index 718684a..08c5fbc 100644
--- a/clang/include/clang/Frontend/FrontendAction.h
+++ b/clang/include/clang/Frontend/FrontendAction.h
@@ -84,6 +84,8 @@ protected:
   /// \return True on success; on failure ExecutionAction() and
   /// EndSourceFileAction() will not be called.
   virtual bool BeginSourceFileAction(CompilerInstance &CI) {
+    if (CurrentInput.isPreprocessed())
+      CI.getPreprocessor().SetMacroExpansionOnlyInDirectives();
     return true;
   }
 
@@ -98,7 +100,11 @@ protected:
   ///
   /// This is guaranteed to only be called following a successful call to
   /// BeginSourceFileAction (and BeginSourceFile).
-  virtual void EndSourceFileAction() {}
+  virtual void EndSourceFileAction() {
+    if (CurrentInput.isPreprocessed())
+      // Reset the preprocessor macro expansion to the default.
+      getCompilerInstance().getPreprocessor().SetEnableMacroExpansion();
+  }
 
   /// Callback at the end of processing a single input, to determine
   /// if the output files should be erased or not.
diff --git a/clang/include/clang/Frontend/PrecompiledPreamble.h b/clang/include/clang/Frontend/PrecompiledPreamble.h
index 624df00..565395b 100644
--- a/clang/include/clang/Frontend/PrecompiledPreamble.h
+++ b/clang/include/clang/Frontend/PrecompiledPreamble.h
@@ -84,7 +84,7 @@ public:
   static llvm::ErrorOr<PrecompiledPreamble>
   Build(const CompilerInvocation &Invocation,
         const llvm::MemoryBuffer *MainFileBuffer, PreambleBounds Bounds,
-        DiagnosticsEngine &Diagnostics,
+        IntrusiveRefCntPtr<DiagnosticsEngine> Diagnostics,
         IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS,
         std::shared_ptr<PCHContainerOperations> PCHContainerOps,
         bool StoreInMemory, StringRef StoragePath,
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index 4d82e20..71b0f8e 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -1847,6 +1847,10 @@ public:
     MacroExpansionInDirectivesOverride = true;
   }
 
+  void SetEnableMacroExpansion() {
+    DisableMacroExpansion = MacroExpansionInDirectivesOverride = false;
+  }
+
   /// Peeks ahead N tokens and returns that token without consuming any
   /// tokens.
   ///
diff --git a/clang/include/clang/Sema/SemaARM.h b/clang/include/clang/Sema/SemaARM.h
index e77d65f..104992e 100644
--- a/clang/include/clang/Sema/SemaARM.h
+++ b/clang/include/clang/Sema/SemaARM.h
@@ -44,8 +44,8 @@ public:
 
   bool CheckImmediateArg(CallExpr *TheCall, unsigned CheckTy, unsigned ArgIdx,
                          unsigned EltBitWidth, unsigned VecBitWidth);
-  bool CheckARMBuiltinExclusiveCall(unsigned BuiltinID, CallExpr *TheCall,
-                                    unsigned MaxWidth);
+  bool CheckARMBuiltinExclusiveCall(const TargetInfo &TI, unsigned BuiltinID,
+                                    CallExpr *TheCall);
   bool CheckNeonBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
                                     CallExpr *TheCall);
   bool PerformNeonImmChecks(
diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
index 6225977..36196cd 100644
--- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
+++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
@@ -233,15 +233,11 @@ def UndefResultChecker : Checker<"UndefinedBinaryOperatorResult">,
   HelpText<"Check for undefined results of binary operators">,
   Documentation<HasDocumentation>;
 
-def StackAddrEscapeBase : Checker<"StackAddrEscapeBase">,
-  HelpText<"Generate information about stack address escapes.">,
-  Documentation<NotDocumented>,
-  Hidden;
-
-def StackAddrEscapeChecker : Checker<"StackAddressEscape">,
-  HelpText<"Check that addresses to stack memory do not escape the function">,
-  Dependencies<[StackAddrEscapeBase]>,
-  Documentation<HasDocumentation>;
+def StackAddrEscapeChecker
+    : Checker<"StackAddressEscape">,
+      HelpText<
+          "Check that addresses to stack memory do not escape the function">,
+      Documentation<HasDocumentation>;
 
 def DynamicTypePropagation : Checker<"DynamicTypePropagation">,
   HelpText<"Generate dynamic type information">,
@@ -295,10 +291,11 @@ def DynamicTypeChecker
       Dependencies<[DynamicTypePropagation]>,
       Documentation<HasDocumentation>;
 
-def StackAddrAsyncEscapeChecker : Checker<"StackAddressAsyncEscape">,
-  HelpText<"Check that addresses to stack memory do not escape the function">,
-  Dependencies<[StackAddrEscapeBase]>,
-  Documentation<HasDocumentation>;
+def StackAddrAsyncEscapeChecker
+    : Checker<"StackAddressAsyncEscape">,
+      HelpText<
+          "Check that addresses to stack memory do not escape the function">,
+      Documentation<HasDocumentation>;
 
 def PthreadLockBase : Checker<"PthreadLockBase">,
   HelpText<"Helper registering multiple checks.">,
@@ -1047,11 +1044,6 @@ def RetainCountBase : Checker<"RetainCountBase">,
 
 let ParentPackage = OSX in {
 
-def NSOrCFErrorDerefChecker : Checker<"NSOrCFErrorDerefChecker">,
-  HelpText<"Implementation checker for NSErrorChecker and CFErrorChecker">,
-  Documentation<NotDocumented>,
-  Hidden;
-
 def NumberObjectConversionChecker : Checker<"NumberObjectConversion">,
   HelpText<"Check for erroneous conversions of objects representing numbers "
            "into numbers">,
@@ -1149,9 +1141,8 @@ def ObjCSuperCallChecker : Checker<"MissingSuperCall">,
   Documentation<NotDocumented>;
 
 def NSErrorChecker : Checker<"NSError">,
-  HelpText<"Check usage of NSError** parameters">,
-  Dependencies<[NSOrCFErrorDerefChecker]>,
-  Documentation<HasDocumentation>;
+                     HelpText<"Check usage of NSError** parameters">,
+                     Documentation<HasDocumentation>;
 
 def RetainCountChecker : Checker<"RetainCount">,
   HelpText<"Check for leaks and improper reference count management">,
@@ -1253,9 +1244,8 @@ def CFRetainReleaseChecker : Checker<"CFRetainRelease">,
   Documentation<HasDocumentation>;
 
 def CFErrorChecker : Checker<"CFError">,
-  HelpText<"Check usage of CFErrorRef* parameters">,
-  Dependencies<[NSOrCFErrorDerefChecker]>,
-  Documentation<HasDocumentation>;
+                     HelpText<"Check usage of CFErrorRef* parameters">,
+                     Documentation<HasDocumentation>;
 
 } // end "osx.coreFoundation"
 
diff --git a/clang/lib/AST/ASTStructuralEquivalence.cpp b/clang/lib/AST/ASTStructuralEquivalence.cpp
index 0f2762d..22bb4cb 100644
--- a/clang/lib/AST/ASTStructuralEquivalence.cpp
+++ b/clang/lib/AST/ASTStructuralEquivalence.cpp
@@ -456,7 +456,9 @@ CheckStructurallyEquivalentAttributes(StructuralEquivalenceContext &Context,
                                       const Decl *D1, const Decl *D2,
                                       const Decl *PrimaryDecl = nullptr) {
   // If either declaration has an attribute on it, we treat the declarations
-  // as not being structurally equivalent.
+  // as not being structurally equivalent unless both declarations are implicit
+  // (ones generated by the compiler like __NSConstantString_tag).
+  //
   // FIXME: this should be handled on a case-by-case basis via tablegen in
   // Attr.td. There are multiple cases to consider: one declaration with the
   // attribute, another without it; different attribute syntax|spellings for
@@ -468,7 +470,7 @@ CheckStructurallyEquivalentAttributes(StructuralEquivalenceContext &Context,
     D1Attr = *D1->getAttrs().begin();
   if (D2->hasAttrs())
     D2Attr = *D2->getAttrs().begin();
-  if (D1Attr || D2Attr) {
+  if ((D1Attr || D2Attr) && !D1->isImplicit() && !D2->isImplicit()) {
     const auto *DiagnoseDecl = cast<TypeDecl>(PrimaryDecl ? PrimaryDecl : D2);
     Context.Diag2(DiagnoseDecl->getLocation(),
                   diag::warn_odr_tag_type_with_attributes)
@@ -870,7 +872,27 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
     else if (T1->getTypeClass() == Type::FunctionNoProto &&
              T2->getTypeClass() == Type::FunctionProto)
       TC = Type::FunctionNoProto;
-    else
+    else if (Context.LangOpts.C23 && !Context.StrictTypeSpelling &&
+             (T1->getTypeClass() == Type::Enum ||
+              T2->getTypeClass() == Type::Enum)) {
+      // In C23, if not being strict about token equivalence, we need to handle
+      // the case where one type is an enumeration and the other type is an
+      // integral type.
+      //
+      // C23 6.7.3.3p16: The enumerated type is compatible with the underlying
+      // type of the enumeration.
+      //
+      // Treat the enumeration as its underlying type and use the builtin type
+      // class comparison.
+      if (T1->getTypeClass() == Type::Enum) {
+        T1 = T1->getAs<EnumType>()->getDecl()->getIntegerType();
+        assert(T2->isBuiltinType() && !T1.isNull()); // Sanity check
+      } else if (T2->getTypeClass() == Type::Enum) {
+        T2 = T2->getAs<EnumType>()->getDecl()->getIntegerType();
+        assert(T1->isBuiltinType() && !T2.isNull()); // Sanity check
+      }
+      TC = Type::Builtin;
+    } else
       return false;
   }
 
@@ -2071,6 +2093,48 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
       !CheckStructurallyEquivalentAttributes(Context, D1, D2))
     return false;
 
+  // In C23, if one enumeration has a fixed underlying type, the other shall
+  // have a compatible fixed underlying type (6.2.7).
+  if (Context.LangOpts.C23) {
+    if (D1->isFixed() != D2->isFixed()) {
+      if (Context.Complain) {
+        Context.Diag2(D2->getLocation(),
+                      Context.getApplicableDiagnostic(
+                          diag::err_odr_tag_type_inconsistent))
+            << Context.ToCtx.getTypeDeclType(D2)
+            << (&Context.FromCtx != &Context.ToCtx);
+        Context.Diag1(D1->getLocation(),
+                      D1->isFixed()
+                          ? diag::note_odr_fixed_underlying_type
+                          : diag::note_odr_missing_fixed_underlying_type)
+            << D1;
+        Context.Diag2(D2->getLocation(),
+                      D2->isFixed()
+                          ? diag::note_odr_fixed_underlying_type
+                          : diag::note_odr_missing_fixed_underlying_type)
+            << D2;
+      }
+      return false;
+    }
+    if (D1->isFixed()) {
+      assert(D2->isFixed() && "enums expected to have fixed underlying types");
+      if (!IsStructurallyEquivalent(Context, D1->getIntegerType(),
+                                    D2->getIntegerType())) {
+        if (Context.Complain) {
+          Context.Diag2(D2->getLocation(),
+                        Context.getApplicableDiagnostic(
+                            diag::err_odr_tag_type_inconsistent))
+              << Context.ToCtx.getTypeDeclType(D2)
+              << (&Context.FromCtx != &Context.ToCtx);
+          Context.Diag2(D2->getLocation(),
+                        diag::note_odr_incompatible_fixed_underlying_type)
+              << D2 << D2->getIntegerType() << D1->getIntegerType();
+        }
+        return false;
+      }
+    }
+  }
+
   llvm::SmallVector<const EnumConstantDecl *, 8> D1Enums, D2Enums;
   auto CopyEnumerators =
       [](auto &&Range, llvm::SmallVectorImpl<const EnumConstantDecl *> &Cont) {
diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp
index d0ddb2e..8b9e5e0 100644
--- a/clang/lib/AST/ByteCode/Compiler.cpp
+++ b/clang/lib/AST/ByteCode/Compiler.cpp
@@ -106,25 +106,14 @@ bool InitLink::emit(Compiler<Emitter> *Ctx, const Expr *E) const {
   return true;
 }
 
-/// Scope managing label targets.
-template <class Emitter> class LabelScope {
-public:
-  virtual ~LabelScope() {}
-
-protected:
-  LabelScope(Compiler<Emitter> *Ctx) : Ctx(Ctx) {}
-  /// Compiler instance.
-  Compiler<Emitter> *Ctx;
-};
-
 /// Sets the context for break/continue statements.
-template <class Emitter> class LoopScope final : public LabelScope<Emitter> {
+template <class Emitter> class LoopScope final {
 public:
   using LabelTy = typename Compiler<Emitter>::LabelTy;
   using OptLabelTy = typename Compiler<Emitter>::OptLabelTy;
 
   LoopScope(Compiler<Emitter> *Ctx, LabelTy BreakLabel, LabelTy ContinueLabel)
-      : LabelScope<Emitter>(Ctx), OldBreakLabel(Ctx->BreakLabel),
+      : Ctx(Ctx), OldBreakLabel(Ctx->BreakLabel),
         OldContinueLabel(Ctx->ContinueLabel),
         OldBreakVarScope(Ctx->BreakVarScope),
         OldContinueVarScope(Ctx->ContinueVarScope) {
@@ -142,6 +131,7 @@ public:
   }
 
 private:
+  Compiler<Emitter> *Ctx;
   OptLabelTy OldBreakLabel;
   OptLabelTy OldContinueLabel;
   VariableScope<Emitter> *OldBreakVarScope;
@@ -149,7 +139,7 @@ private:
 };
 
 // Sets the context for a switch scope, mapping labels.
-template <class Emitter> class SwitchScope final : public LabelScope<Emitter> {
+template <class Emitter> class SwitchScope final {
 public:
   using LabelTy = typename Compiler<Emitter>::LabelTy;
   using OptLabelTy = typename Compiler<Emitter>::OptLabelTy;
@@ -157,7 +147,7 @@ public:
 
   SwitchScope(Compiler<Emitter> *Ctx, CaseMap &&CaseLabels, LabelTy BreakLabel,
               OptLabelTy DefaultLabel)
-      : LabelScope<Emitter>(Ctx), OldBreakLabel(Ctx->BreakLabel),
+      : Ctx(Ctx), OldBreakLabel(Ctx->BreakLabel),
         OldDefaultLabel(this->Ctx->DefaultLabel),
         OldCaseLabels(std::move(this->Ctx->CaseLabels)),
         OldLabelVarScope(Ctx->BreakVarScope) {
@@ -175,6 +165,7 @@ public:
   }
 
 private:
+  Compiler<Emitter> *Ctx;
   OptLabelTy OldBreakLabel;
   OptLabelTy OldDefaultLabel;
   CaseMap OldCaseLabels;
@@ -457,13 +448,17 @@ bool Compiler<Emitter>::VisitCastExpr(const CastExpr *CE) {
     assert(isPtrType(*FromT));
     assert(isPtrType(*ToT));
     if (FromT == ToT) {
-      if (CE->getType()->isVoidPointerType())
+      if (CE->getType()->isVoidPointerType() &&
+          !SubExprTy->isFunctionPointerType()) {
         return this->delegate(SubExpr);
+      }
 
       if (!this->visit(SubExpr))
         return false;
-      if (CE->getType()->isFunctionPointerType())
-        return true;
+      if (CE->getType()->isFunctionPointerType() ||
+          SubExprTy->isFunctionPointerType()) {
+        return this->emitFnPtrCast(CE);
+      }
       if (FromT == PT_Ptr)
         return this->emitPtrPtrCast(SubExprTy->isVoidPointerType(), CE);
       return true;
@@ -1763,6 +1758,9 @@ bool Compiler<Emitter>::visitInitList(ArrayRef<const Expr *> Inits,
     if (Inits.size() == 1 && E->getType() == Inits[0]->getType())
       return this->delegate(Inits[0]);
 
+    if (!R)
+      return false;
+
     auto initPrimitiveField = [=](const Record::Field *FieldToInit,
                                   const Expr *Init, PrimType T,
                                   bool Activate = false) -> bool {
diff --git a/clang/lib/AST/ByteCode/Compiler.h b/clang/lib/AST/ByteCode/Compiler.h
index 3a26342..16f108f 100644
--- a/clang/lib/AST/ByteCode/Compiler.h
+++ b/clang/lib/AST/ByteCode/Compiler.h
@@ -21,7 +21,6 @@
 #include "clang/AST/Decl.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/StmtVisitor.h"
-#include "clang/Basic/TargetInfo.h"
 
 namespace clang {
 class QualType;
diff --git a/clang/lib/AST/ByteCode/Disasm.cpp b/clang/lib/AST/ByteCode/Disasm.cpp
index 74399d1..5049a65 100644
--- a/clang/lib/AST/ByteCode/Disasm.cpp
+++ b/clang/lib/AST/ByteCode/Disasm.cpp
@@ -531,7 +531,7 @@ LLVM_DUMP_METHOD void Block::dump(llvm::raw_ostream &OS) const {
   Desc->dump(OS);
   OS << ")\n";
   unsigned NPointers = 0;
-  for (const Pointer *P = Pointers; P; P = P->Next) {
+  for (const Pointer *P = Pointers; P; P = P->asBlockPointer().Next) {
     ++NPointers;
   }
   OS << "  EvalID: " << EvalID << '\n';
diff --git a/clang/lib/AST/ByteCode/DynamicAllocator.cpp b/clang/lib/AST/ByteCode/DynamicAllocator.cpp
index 4f0f511..169250c 100644
--- a/clang/lib/AST/ByteCode/DynamicAllocator.cpp
+++ b/clang/lib/AST/ByteCode/DynamicAllocator.cpp
@@ -27,7 +27,7 @@ void DynamicAllocator::cleanup() {
       B->invokeDtor();
       if (B->hasPointers()) {
         while (B->Pointers) {
-          Pointer *Next = B->Pointers->Next;
+          Pointer *Next = B->Pointers->asBlockPointer().Next;
           B->Pointers->PointeeStorage.BS.Pointee = nullptr;
           B->Pointers = Next;
         }
diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h
index 9012442..9a325ab 100644
--- a/clang/lib/AST/ByteCode/Interp.h
+++ b/clang/lib/AST/ByteCode/Interp.h
@@ -25,7 +25,6 @@
 #include "InterpStack.h"
 #include "InterpState.h"
 #include "MemberPointer.h"
-#include "Opcode.h"
 #include "PrimType.h"
 #include "Program.h"
 #include "State.h"
@@ -481,13 +480,11 @@ inline bool Mulc(InterpState &S, CodePtr OpPC) {
     Floating RA = S.allocFloat(A.getSemantics());
     RA.copy(ResR);
     Result.elem<Floating>(0) = RA; // Floating(ResR);
-    Result.atIndex(0).initialize();
 
     Floating RI = S.allocFloat(A.getSemantics());
     RI.copy(ResI);
     Result.elem<Floating>(1) = RI; // Floating(ResI);
-    Result.atIndex(1).initialize();
-    Result.initialize();
+    Result.initializeAllElements();
   } else {
     // Integer element type.
     const T &LHSR = LHS.elem<T>(0);
@@ -505,7 +502,6 @@ inline bool Mulc(InterpState &S, CodePtr OpPC) {
       return false;
     if (T::sub(A, B, Bits, &Result.elem<T>(0)))
       return false;
-    Result.atIndex(0).initialize();
 
     // imag(Result) = (real(LHS) * imag(RHS)) + (imag(LHS) * real(RHS))
     if (T::mul(LHSR, RHSI, Bits, &A))
@@ -514,8 +510,8 @@ inline bool Mulc(InterpState &S, CodePtr OpPC) {
       return false;
     if (T::add(A, B, Bits, &Result.elem<T>(1)))
       return false;
-    Result.atIndex(1).initialize();
     Result.initialize();
+    Result.initializeAllElements();
   }
 
   return true;
@@ -541,14 +537,12 @@ inline bool Divc(InterpState &S, CodePtr OpPC) {
     Floating RA = S.allocFloat(A.getSemantics());
     RA.copy(ResR);
     Result.elem<Floating>(0) = RA; // Floating(ResR);
-    Result.atIndex(0).initialize();
 
     Floating RI = S.allocFloat(A.getSemantics());
     RI.copy(ResI);
     Result.elem<Floating>(1) = RI; // Floating(ResI);
-    Result.atIndex(1).initialize();
 
-    Result.initialize();
+    Result.initializeAllElements();
   } else {
     // Integer element type.
     const T &LHSR = LHS.elem<T>(0);
@@ -590,7 +584,6 @@ inline bool Divc(InterpState &S, CodePtr OpPC) {
       return false;
     if (T::div(ResultR, Den, Bits, &ResultR))
       return false;
-    Result.atIndex(0).initialize();
 
     // imag(Result) = ((imag(LHS) * real(RHS)) - (real(LHS) * imag(RHS))) / Den
     if (T::mul(LHSI, RHSR, Bits, &A) || T::mul(LHSR, RHSI, Bits, &B))
@@ -599,8 +592,7 @@ inline bool Divc(InterpState &S, CodePtr OpPC) {
       return false;
     if (T::div(ResultI, Den, Bits, &ResultI))
       return false;
-    Result.atIndex(1).initialize();
-    Result.initialize();
+    Result.initializeAllElements();
   }
 
   return true;
@@ -1138,8 +1130,9 @@ inline bool CmpHelperEQ<Pointer>(InterpState &S, CodePtr OpPC, CompareFn Fn) {
     S.FFDiag(Loc, diag::note_constexpr_pointer_comparison_past_end)
         << LHS.toDiagnosticString(S.getASTContext());
     return false;
-  } else if (RHS.isOnePastEnd() && !LHS.isOnePastEnd() && !LHS.isZero() &&
-             LHS.getOffset() == 0) {
+  }
+  if (RHS.isOnePastEnd() && !LHS.isOnePastEnd() && !LHS.isZero() &&
+      LHS.getOffset() == 0) {
     const SourceInfo &Loc = S.Current->getSource(OpPC);
     S.FFDiag(Loc, diag::note_constexpr_pointer_comparison_past_end)
         << RHS.toDiagnosticString(S.getASTContext());
@@ -1157,8 +1150,9 @@ inline bool CmpHelperEQ<Pointer>(InterpState &S, CodePtr OpPC, CompareFn Fn) {
         const SourceInfo &Loc = S.Current->getSource(OpPC);
         S.FFDiag(Loc, diag::note_constexpr_literal_comparison);
         return false;
-      } else if (const auto *CE = dyn_cast<CallExpr>(E);
-                 CE && IsOpaqueConstantCall(CE)) {
+      }
+      if (const auto *CE = dyn_cast<CallExpr>(E);
+          CE && IsOpaqueConstantCall(CE)) {
         const SourceInfo &Loc = S.Current->getSource(OpPC);
         S.FFDiag(Loc, diag::note_constexpr_opaque_call_comparison)
             << P.toDiagnosticString(S.getASTContext());
@@ -2688,6 +2682,14 @@ static inline bool CastFixedPointIntegral(InterpState &S, CodePtr OpPC) {
   return true;
 }
 
+static inline bool FnPtrCast(InterpState &S, CodePtr OpPC) {
+  const SourceInfo &E = S.Current->getSource(OpPC);
+  S.CCEDiag(E, diag::note_constexpr_invalid_cast)
+      << diag::ConstexprInvalidCastKind::ThisConversionOrReinterpret
+      << S.getLangOpts().CPlusPlus << S.Current->getRange(OpPC);
+  return true;
+}
+
 static inline bool PtrPtrCast(InterpState &S, CodePtr OpPC, bool SrcIsVoidPtr) {
   const auto &Ptr = S.Stk.peek<Pointer>();
 
@@ -3273,7 +3275,8 @@ inline bool InvalidCast(InterpState &S, CodePtr OpPC, CastKind Kind,
     S.CCEDiag(Loc, diag::note_constexpr_invalid_cast)
         << static_cast<unsigned>(Kind) << S.Current->getRange(OpPC);
     return !Fatal;
-  } else if (Kind == CastKind::Volatile) {
+  }
+  if (Kind == CastKind::Volatile) {
     if (!S.checkingPotentialConstantExpression()) {
       const auto *E = cast<CastExpr>(S.Current->getExpr(OpPC));
       if (S.getLangOpts().CPlusPlus)
@@ -3284,7 +3287,8 @@ inline bool InvalidCast(InterpState &S, CodePtr OpPC, CastKind Kind,
     }
 
     return false;
-  } else if (Kind == CastKind::Dynamic) {
+  }
+  if (Kind == CastKind::Dynamic) {
     assert(!S.getLangOpts().CPlusPlus20);
     S.CCEDiag(S.Current->getSource(OpPC), diag::note_constexpr_invalid_cast)
         << diag::ConstexprInvalidCastKind::Dynamic;
diff --git a/clang/lib/AST/ByteCode/InterpBlock.cpp b/clang/lib/AST/ByteCode/InterpBlock.cpp
index f603078..963b54e 100644
--- a/clang/lib/AST/ByteCode/InterpBlock.cpp
+++ b/clang/lib/AST/ByteCode/InterpBlock.cpp
@@ -27,9 +27,9 @@ void Block::addPointer(Pointer *P) {
   assert(!hasPointer(P));
 #endif
   if (Pointers)
-    Pointers->Prev = P;
-  P->Next = Pointers;
-  P->Prev = nullptr;
+    Pointers->PointeeStorage.BS.Prev = P;
+  P->PointeeStorage.BS.Next = Pointers;
+  P->PointeeStorage.BS.Prev = nullptr;
   Pointers = P;
 #ifndef NDEBUG
   assert(hasPointer(P));
@@ -48,13 +48,15 @@ void Block::removePointer(Pointer *P) {
   assert(hasPointer(P));
 #endif
 
+  BlockPointer &BP = P->PointeeStorage.BS;
+
   if (Pointers == P)
-    Pointers = P->Next;
+    Pointers = BP.Next;
 
-  if (P->Prev)
-    P->Prev->Next = P->Next;
-  if (P->Next)
-    P->Next->Prev = P->Prev;
+  if (BP.Prev)
+    BP.Prev->PointeeStorage.BS.Next = BP.Next;
+  if (BP.Next)
+    BP.Next->PointeeStorage.BS.Prev = BP.Prev;
   P->PointeeStorage.BS.Pointee = nullptr;
 #ifndef NDEBUG
   assert(!hasPointer(P));
@@ -68,7 +70,9 @@ void Block::cleanup() {
 
 void Block::replacePointer(Pointer *Old, Pointer *New) {
   assert(Old);
+  assert(Old->isBlockPointer());
   assert(New);
+  assert(New->isBlockPointer());
   assert(Old != New);
   if (IsStatic) {
     assert(!Pointers);
@@ -78,17 +82,20 @@ void Block::replacePointer(Pointer *Old, Pointer *New) {
   assert(hasPointer(Old));
 #endif
 
-  if (Old->Prev)
-    Old->Prev->Next = New;
-  if (Old->Next)
-    Old->Next->Prev = New;
-  New->Prev = Old->Prev;
-  New->Next = Old->Next;
+  BlockPointer &OldBP = Old->PointeeStorage.BS;
+  BlockPointer &NewBP = New->PointeeStorage.BS;
+
+  if (OldBP.Prev)
+    OldBP.Prev->PointeeStorage.BS.Next = New;
+  if (OldBP.Next)
+    OldBP.Next->PointeeStorage.BS.Prev = New;
+  NewBP.Prev = OldBP.Prev;
+  NewBP.Next = OldBP.Next;
   if (Pointers == Old)
     Pointers = New;
 
-  Old->PointeeStorage.BS.Pointee = nullptr;
-  New->PointeeStorage.BS.Pointee = this;
+  OldBP.Pointee = nullptr;
+  NewBP.Pointee = this;
 #ifndef NDEBUG
   assert(!hasPointer(Old));
   assert(hasPointer(New));
@@ -97,7 +104,7 @@ void Block::replacePointer(Pointer *Old, Pointer *New) {
 
 #ifndef NDEBUG
 bool Block::hasPointer(const Pointer *P) const {
-  for (const Pointer *C = Pointers; C; C = C->Next) {
+  for (const Pointer *C = Pointers; C; C = C->asBlockPointer().Next) {
     if (C == P)
       return true;
   }
@@ -120,7 +127,7 @@ DeadBlock::DeadBlock(DeadBlock *&Root, Block *Blk)
 
   // Transfer pointers.
   B.Pointers = Blk->Pointers;
-  for (Pointer *P = Blk->Pointers; P; P = P->Next)
+  for (Pointer *P = Blk->Pointers; P; P = P->asBlockPointer().Next)
     P->PointeeStorage.BS.Pointee = &B;
   Blk->Pointers = nullptr;
 }
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 3ece7054..f908d02 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -1099,10 +1099,8 @@ static bool interp__builtin_complex(InterpState &S, CodePtr OpPC,
   Pointer &Result = S.Stk.peek<Pointer>();
 
   Result.elem<Floating>(0) = Arg1;
-  Result.atIndex(0).initialize();
   Result.elem<Floating>(1) = Arg2;
-  Result.atIndex(1).initialize();
-  Result.initialize();
+  Result.initializeAllElements();
 
   return true;
 }
@@ -1728,9 +1726,9 @@ static bool interp__builtin_elementwise_popcount(InterpState &S, CodePtr OpPC,
         Dst.elem<T>(I) =
             T::from(Arg.elem<T>(I).toAPSInt().reverseBits().getZExtValue());
       }
-      Dst.atIndex(I).initialize();
     });
   }
+  Dst.initializeAllElements();
 
   return true;
 }
@@ -2314,12 +2312,10 @@ static bool interp__builtin_elementwise_sat(InterpState &S, CodePtr OpPC,
       llvm_unreachable("Wrong builtin ID");
     }
 
-    INT_TYPE_SWITCH_NO_BOOL(ElemT, {
-      const Pointer &E = Dst.atIndex(I);
-      E.deref<T>() = static_cast<T>(Result);
-      E.initialize();
-    });
+    INT_TYPE_SWITCH_NO_BOOL(ElemT,
+                            { Dst.elem<T>(I) = static_cast<T>(Result); });
   }
+  Dst.initializeAllElements();
 
   return true;
 }
diff --git a/clang/lib/AST/ByteCode/InterpState.cpp b/clang/lib/AST/ByteCode/InterpState.cpp
index 7848f29..3010847 100644
--- a/clang/lib/AST/ByteCode/InterpState.cpp
+++ b/clang/lib/AST/ByteCode/InterpState.cpp
@@ -52,7 +52,7 @@ void InterpState::cleanup() {
   // As a last resort, make sure all pointers still pointing to a dead block
   // don't point to it anymore.
   for (DeadBlock *DB = DeadBlocks; DB; DB = DB->Next) {
-    for (Pointer *P = DB->B.Pointers; P; P = P->Next) {
+    for (Pointer *P = DB->B.Pointers; P; P = P->asBlockPointer().Next) {
       P->PointeeStorage.BS.Pointee = nullptr;
     }
   }
diff --git a/clang/lib/AST/ByteCode/Opcodes.td b/clang/lib/AST/ByteCode/Opcodes.td
index abfed77..95a4433 100644
--- a/clang/lib/AST/ByteCode/Opcodes.td
+++ b/clang/lib/AST/ByteCode/Opcodes.td
@@ -412,7 +412,7 @@ def CheckDecl : Opcode {
 
 def CheckEnumValue : Opcode {
   let Args = [ArgEnumDecl];
-  let Types = [FixedSizeIntegralTypeClass];
+  let Types = [IntegralTypeClass];
   let HasGroup = 1;
 }
 
@@ -735,6 +735,8 @@ def PtrPtrCast : Opcode {
 
 }
 
+def FnPtrCast : Opcode;
+
 def DecayPtr : Opcode {
   let Types = [PtrTypeClass, PtrTypeClass];
   let HasGroup = 1;
diff --git a/clang/lib/AST/ByteCode/Pointer.cpp b/clang/lib/AST/ByteCode/Pointer.cpp
index 9341bc1..dec2088 100644
--- a/clang/lib/AST/ByteCode/Pointer.cpp
+++ b/clang/lib/AST/ByteCode/Pointer.cpp
@@ -42,7 +42,7 @@ Pointer::Pointer(Block *Pointee, unsigned Base, uint64_t Offset)
     : Offset(Offset), StorageKind(Storage::Block) {
   assert((Base == RootPtrMark || Base % alignof(void *) == 0) && "wrong base");
 
-  PointeeStorage.BS = {Pointee, Base};
+  PointeeStorage.BS = {Pointee, Base, nullptr, nullptr};
 
   if (Pointee)
     Pointee->addPointer(this);
@@ -89,7 +89,6 @@ Pointer &Pointer::operator=(const Pointer &P) {
 
   if (P.isBlockPointer()) {
     PointeeStorage.BS = P.PointeeStorage.BS;
-    PointeeStorage.BS.Pointee = P.PointeeStorage.BS.Pointee;
 
     if (PointeeStorage.BS.Pointee)
       PointeeStorage.BS.Pointee->addPointer(this);
@@ -127,7 +126,6 @@ Pointer &Pointer::operator=(Pointer &&P) {
 
   if (P.isBlockPointer()) {
     PointeeStorage.BS = P.PointeeStorage.BS;
-    PointeeStorage.BS.Pointee = P.PointeeStorage.BS.Pointee;
 
     if (PointeeStorage.BS.Pointee)
       PointeeStorage.BS.Pointee->addPointer(this);
@@ -495,6 +493,19 @@ void Pointer::initialize() const {
   getInlineDesc()->IsInitialized = true;
 }
 
+void Pointer::initializeAllElements() const {
+  assert(getFieldDesc()->isPrimitiveArray());
+  assert(isArrayRoot());
+
+  InitMapPtr &IM = getInitMap();
+  if (!IM) {
+    IM = std::make_pair(true, nullptr);
+  } else {
+    IM->first = true;
+    IM->second.reset();
+  }
+}
+
 void Pointer::activate() const {
   // Field has its bit in an inline descriptor.
   assert(PointeeStorage.BS.Base != 0 &&
diff --git a/clang/lib/AST/ByteCode/Pointer.h b/clang/lib/AST/ByteCode/Pointer.h
index 059f176..5bafc5b 100644
--- a/clang/lib/AST/ByteCode/Pointer.h
+++ b/clang/lib/AST/ByteCode/Pointer.h
@@ -39,6 +39,10 @@ struct BlockPointer {
   Block *Pointee;
   /// Start of the current subfield.
   unsigned Base;
+  /// Previous link in the pointer chain.
+  Pointer *Prev;
+  /// Next link in the pointer chain.
+  Pointer *Next;
 };
 
 struct IntPointer {
@@ -725,6 +729,10 @@ public:
 
   /// Initializes a field.
   void initialize() const;
+  /// Initialize all elements of a primitive array at once. This can be
+  /// used in situations where we *know* we have initialized *all* elements
+  /// of a primtive array.
+  void initializeAllElements() const;
   /// Activats a field.
   void activate() const;
   /// Deactivates an entire strurcutre.
@@ -828,15 +836,10 @@ private:
   /// Offset into the storage.
   uint64_t Offset = 0;
 
-  /// Previous link in the pointer chain.
-  Pointer *Prev = nullptr;
-  /// Next link in the pointer chain.
-  Pointer *Next = nullptr;
-
   Storage StorageKind = Storage::Int;
   union {
-    BlockPointer BS;
     IntPointer Int;
+    BlockPointer BS;
     FunctionPointer Fn;
     TypeidPointer Typeid;
   } PointeeStorage;
diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index 83fcd87..1588be4 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -5988,11 +5988,10 @@ bool clang::IsArmStreamingFunction(const FunctionDecl *FD,
     if (FD->hasAttr<ArmLocallyStreamingAttr>())
       return true;
 
-  if (const Type *Ty = FD->getType().getTypePtrOrNull())
-    if (const auto *FPT = Ty->getAs<FunctionProtoType>())
-      if (FPT->getAArch64SMEAttributes() &
-          FunctionType::SME_PStateSMEnabledMask)
-        return true;
+  assert(!FD->getType().isNull() && "Expected a valid FunctionDecl");
+  if (const auto *FPT = FD->getType()->getAs<FunctionProtoType>())
+    if (FPT->getAArch64SMEAttributes() & FunctionType::SME_PStateSMEnabledMask)
+      return true;
 
   return false;
 }
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index d85655b..cd9672d 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -4233,8 +4233,15 @@ bool Expr::isSameComparisonOperand(const Expr* E1, const Expr* E2) {
       // template parameters.
       const auto *DRE1 = cast<DeclRefExpr>(E1);
       const auto *DRE2 = cast<DeclRefExpr>(E2);
-      return DRE1->isPRValue() && DRE2->isPRValue() &&
-             DRE1->getDecl() == DRE2->getDecl();
+
+      if (DRE1->getDecl() != DRE2->getDecl())
+        return false;
+
+      if ((DRE1->isPRValue() && DRE2->isPRValue()) ||
+          (DRE1->isLValue() && DRE2->isLValue()))
+        return true;
+
+      return false;
     }
     case ImplicitCastExprClass: {
       // Peel off implicit casts.
@@ -4244,7 +4251,8 @@ bool Expr::isSameComparisonOperand(const Expr* E1, const Expr* E2) {
         if (!ICE1 || !ICE2)
           return false;
         if (ICE1->getCastKind() != ICE2->getCastKind())
-          return false;
+          return isSameComparisonOperand(ICE1->IgnoreParenImpCasts(),
+                                         ICE2->IgnoreParenImpCasts());
         E1 = ICE1->getSubExpr()->IgnoreParens();
         E2 = ICE2->getSubExpr()->IgnoreParens();
         // The final cast must be one of these types.
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 9808298..993b64b 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -9741,10 +9741,19 @@ bool PointerExprEvaluator::VisitCastExpr(const CastExpr *E) {
   case CK_AddressSpaceConversion:
     if (!Visit(SubExpr))
       return false;
-    // Bitcasts to cv void* are static_casts, not reinterpret_casts, so are
-    // permitted in constant expressions in C++11. Bitcasts from cv void* are
-    // also static_casts, but we disallow them as a resolution to DR1312.
-    if (!E->getType()->isVoidPointerType()) {
+    if (E->getType()->isFunctionPointerType() ||
+        SubExpr->getType()->isFunctionPointerType()) {
+      // Casting between two function pointer types, or between a function
+      // pointer and an object pointer, is always a reinterpret_cast.
+      CCEDiag(E, diag::note_constexpr_invalid_cast)
+          << diag::ConstexprInvalidCastKind::ThisConversionOrReinterpret
+          << Info.Ctx.getLangOpts().CPlusPlus;
+      Result.Designator.setInvalid();
+    } else if (!E->getType()->isVoidPointerType()) {
+      // Bitcasts to cv void* are static_casts, not reinterpret_casts, so are
+      // permitted in constant expressions in C++11. Bitcasts from cv void* are
+      // also static_casts, but we disallow them as a resolution to DR1312.
+      //
       // In some circumstances, we permit casting from void* to cv1 T*, when the
       // actual pointee object is actually a cv2 T.
       bool HasValidResult = !Result.InvalidBase && !Result.Designator.Invalid &&
diff --git a/clang/lib/AST/RecordLayoutBuilder.cpp b/clang/lib/AST/RecordLayoutBuilder.cpp
index 6a74e98..760b2fc 100644
--- a/clang/lib/AST/RecordLayoutBuilder.cpp
+++ b/clang/lib/AST/RecordLayoutBuilder.cpp
@@ -1953,7 +1953,7 @@ void ItaniumRecordLayoutBuilder::LayoutField(const FieldDecl *D,
           // silently there. For other targets that have ms_struct enabled
           // (most probably via a pragma or attribute), trigger a diagnostic
           // that defaults to an error.
-          if (!Context.getTargetInfo().getTriple().isWindowsGNUEnvironment())
+          if (!Context.getTargetInfo().getTriple().isOSCygMing())
             Diag(D->getLocation(), diag::warn_npot_ms_struct);
         }
         if (TypeSize > FieldAlign &&
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index ac47b12..40dff7e 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -25,6 +25,7 @@
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Lex/Lexer.h"
 #include "clang/Lex/Preprocessor.h"
+#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/STLFunctionalExtras.h"
 #include "llvm/ADT/SmallSet.h"
@@ -809,28 +810,86 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg,
     const CallExpr *Call;
     unsigned FmtArgIdx;
     const Expr *&UnsafeArg;
+    ASTContext &Ctx;
+
+    // Returns an `Expr` representing the precision if specified, null
+    // otherwise.
+    // The parameter `Call` is a printf call and the parameter `Precision` is
+    // the precision of a format specifier of the `Call`.
+    //
+    // For example, for the `printf("%d, %.10s", 10, p)` call
+    // `Precision` can be the precision of either "%d" or "%.10s". The former
+    // one will have `NotSpecified` kind.
+    const Expr *
+    getPrecisionAsExpr(const analyze_printf::OptionalAmount &Precision,
+                       const CallExpr *Call) {
+      unsigned PArgIdx = -1;
+
+      if (Precision.hasDataArgument())
+        PArgIdx = Precision.getPositionalArgIndex() + FmtArgIdx;
+      if (0 < PArgIdx && PArgIdx < Call->getNumArgs()) {
+        const Expr *PArg = Call->getArg(PArgIdx);
+
+        // Strip the cast if `PArg` is a cast-to-int expression:
+        if (auto *CE = dyn_cast<CastExpr>(PArg);
+            CE && CE->getType()->isSignedIntegerType())
+          PArg = CE->getSubExpr();
+        return PArg;
+      }
+      if (Precision.getHowSpecified() ==
+          analyze_printf::OptionalAmount::HowSpecified::Constant) {
+        auto SizeTy = Ctx.getSizeType();
+        llvm::APSInt PArgVal = llvm::APSInt(
+            llvm::APInt(Ctx.getTypeSize(SizeTy), Precision.getConstantAmount()),
+            true);
+
+        return IntegerLiteral::Create(Ctx, PArgVal, Ctx.getSizeType(), {});
+      }
+      return nullptr;
+    }
 
   public:
     StringFormatStringHandler(const CallExpr *Call, unsigned FmtArgIdx,
-                              const Expr *&UnsafeArg)
-        : Call(Call), FmtArgIdx(FmtArgIdx), UnsafeArg(UnsafeArg) {}
+                              const Expr *&UnsafeArg, ASTContext &Ctx)
+        : Call(Call), FmtArgIdx(FmtArgIdx), UnsafeArg(UnsafeArg), Ctx(Ctx) {}
 
     bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
                                const char *startSpecifier,
                                unsigned specifierLen,
                                const TargetInfo &Target) override {
-      if (FS.getConversionSpecifier().getKind() ==
-          analyze_printf::PrintfConversionSpecifier::sArg) {
-        unsigned ArgIdx = FS.getPositionalArgIndex() + FmtArgIdx;
-
-        if (0 < ArgIdx && ArgIdx < Call->getNumArgs())
-          if (!isNullTermPointer(Call->getArg(ArgIdx))) {
-            UnsafeArg = Call->getArg(ArgIdx); // output
-            // returning false stops parsing immediately
-            return false;
-          }
-      }
-      return true; // continue parsing
+      if (FS.getConversionSpecifier().getKind() !=
+          analyze_printf::PrintfConversionSpecifier::sArg)
+        return true; // continue parsing
+
+      unsigned ArgIdx = FS.getPositionalArgIndex() + FmtArgIdx;
+
+      if (!(0 < ArgIdx && ArgIdx < Call->getNumArgs()))
+        // If the `ArgIdx` is invalid, give up.
+        return true; // continue parsing
+
+      const Expr *Arg = Call->getArg(ArgIdx);
+
+      if (isNullTermPointer(Arg))
+        // If Arg is a null-terminated pointer, it is safe anyway.
+        return true; // continue parsing
+
+      // Otherwise, check if the specifier has a precision and if the character
+      // pointer is safely bound by the precision:
+      auto LengthModifier = FS.getLengthModifier();
+      QualType ArgType = Arg->getType();
+      bool IsArgTypeValid = // Is ArgType a character pointer type?
+          ArgType->isPointerType() &&
+          (LengthModifier.getKind() == LengthModifier.AsWideChar
+               ? ArgType->getPointeeType()->isWideCharType()
+               : ArgType->getPointeeType()->isCharType());
+
+      if (auto *Precision = getPrecisionAsExpr(FS.getPrecision(), Call);
+          Precision && IsArgTypeValid)
+        if (isPtrBufferSafe(Arg, Precision, Ctx))
+          return true;
+      // Handle unsafe case:
+      UnsafeArg = Call->getArg(ArgIdx); // output
+      return false; // returning false stops parsing immediately
     }
   };
 
@@ -846,7 +905,7 @@ static bool hasUnsafeFormatOrSArg(const CallExpr *Call, const Expr *&UnsafeArg,
     else
       goto CHECK_UNSAFE_PTR;
 
-    StringFormatStringHandler Handler(Call, FmtArgIdx, UnsafeArg);
+    StringFormatStringHandler Handler(Call, FmtArgIdx, UnsafeArg, Ctx);
 
     return analyze_format_string::ParsePrintfString(
         Handler, FmtStr.begin(), FmtStr.end(), Ctx.getLangOpts(),
diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp
index 53b36d3..dc81b71 100644
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@@ -44,6 +44,7 @@ static const CudaVersionMapEntry CudaNameVersionMap[] = {
     CUDA_ENTRY(12, 5),
     CUDA_ENTRY(12, 6),
     CUDA_ENTRY(12, 8),
+    CUDA_ENTRY(12, 9),
     {"", CudaVersion::NEW, llvm::VersionTuple(std::numeric_limits<int>::max())},
     {"unknown", CudaVersion::UNKNOWN, {}} // End of list tombstone.
 };
@@ -119,6 +120,11 @@ CudaVersion MinVersionForOffloadArch(OffloadArch A) {
   case OffloadArch::SM_120:
   case OffloadArch::SM_120a:
     return CudaVersion::CUDA_128;
+  case OffloadArch::SM_103:
+  case OffloadArch::SM_103a:
+  case OffloadArch::SM_121:
+  case OffloadArch::SM_121a:
+    return CudaVersion::CUDA_129;
   default:
     llvm_unreachable("invalid enum");
   }
diff --git a/clang/lib/Basic/FileManager.cpp b/clang/lib/Basic/FileManager.cpp
index fc4ec78..7481e1e 100644
--- a/clang/lib/Basic/FileManager.cpp
+++ b/clang/lib/Basic/FileManager.cpp
@@ -368,11 +368,6 @@ void FileManager::trackVFSUsage(bool Active) {
   });
 }
 
-const FileEntry *FileManager::getVirtualFile(StringRef Filename, off_t Size,
-                                             time_t ModificationTime) {
-  return &getVirtualFileRef(Filename, Size, ModificationTime).getFileEntry();
-}
-
 FileEntryRef FileManager::getVirtualFileRef(StringRef Filename, off_t Size,
                                             time_t ModificationTime) {
   ++NumFileLookups;
diff --git a/clang/lib/Basic/OffloadArch.cpp b/clang/lib/Basic/OffloadArch.cpp
index dce9ffa..4348178 100644
--- a/clang/lib/Basic/OffloadArch.cpp
+++ b/clang/lib/Basic/OffloadArch.cpp
@@ -33,8 +33,12 @@ static const OffloadArchToStringMap ArchNames[] = {
     SM(100a),                        // Blackwell
     SM(101),                         // Blackwell
     SM(101a),                        // Blackwell
+    SM(103),                         // Blackwell
+    SM(103a),                        // Blackwell
     SM(120),                         // Blackwell
     SM(120a),                        // Blackwell
+    SM(121),                         // Blackwell
+    SM(121a),                        // Blackwell
     GFX(600),  // gfx600
     GFX(601),  // gfx601
     GFX(602),  // gfx602
diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp
index b2b1488..343c26e 100644
--- a/clang/lib/Basic/SourceManager.cpp
+++ b/clang/lib/Basic/SourceManager.cpp
@@ -2366,23 +2366,21 @@ size_t SourceManager::getDataStructureSizes() const {
 
 SourceManagerForFile::SourceManagerForFile(StringRef FileName,
                                            StringRef Content) {
-  // This is referenced by `FileMgr` and will be released by `FileMgr` when it
-  // is deleted.
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   InMemoryFileSystem->addFile(
       FileName, 0,
       llvm::MemoryBuffer::getMemBuffer(Content, FileName,
                                        /*RequiresNullTerminator=*/false));
   // This is passed to `SM` as reference, so the pointer has to be referenced
   // in `Environment` so that `FileMgr` can out-live this function scope.
-  FileMgr =
-      std::make_unique<FileManager>(FileSystemOptions(), InMemoryFileSystem);
+  FileMgr = std::make_unique<FileManager>(FileSystemOptions(),
+                                          std::move(InMemoryFileSystem));
   DiagOpts = std::make_unique<DiagnosticOptions>();
   // This is passed to `SM` as reference, so the pointer has to be referenced
   // by `Environment` due to the same reason above.
-  Diagnostics = std::make_unique<DiagnosticsEngine>(
-      IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs), *DiagOpts);
+  Diagnostics =
+      std::make_unique<DiagnosticsEngine>(DiagnosticIDs::create(), *DiagOpts);
   SourceMgr = std::make_unique<SourceManager>(*Diagnostics, *FileMgr);
   FileEntryRef FE = llvm::cantFail(FileMgr->getFileRef(FileName));
   FileID ID =
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp
index cebcfa3..52cbdbc 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -266,8 +266,11 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
 
   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
   CUMode = !(GPUFeatures & llvm::AMDGPU::FEATURE_WGP);
-  for (auto F : {"image-insts", "gws", "vmem-to-lds-load-insts"})
-    ReadOnlyFeatures.insert(F);
+
+  for (auto F : {"image-insts", "gws", "vmem-to-lds-load-insts"}) {
+    if (GPUKind != llvm::AMDGPU::GK_NONE)
+      ReadOnlyFeatures.insert(F);
+  }
   HalfArgsAndReturns = true;
 }
 
diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp
index 29de34bb..6bec2fa 100644
--- a/clang/lib/Basic/Targets/ARM.cpp
+++ b/clang/lib/Basic/Targets/ARM.cpp
@@ -618,21 +618,21 @@ bool ARMTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       LDREX = 0;
     else if (ArchKind == llvm::ARM::ArchKind::ARMV6K ||
              ArchKind == llvm::ARM::ArchKind::ARMV6KZ)
-      LDREX = LDREX_D | LDREX_W | LDREX_H | LDREX_B;
+      LDREX = ARM_LDREX_D | ARM_LDREX_W | ARM_LDREX_H | ARM_LDREX_B;
     else
-      LDREX = LDREX_W;
+      LDREX = ARM_LDREX_W;
     break;
   case 7:
   case 8:
     if (ArchProfile == llvm::ARM::ProfileKind::M)
-      LDREX = LDREX_W | LDREX_H | LDREX_B;
+      LDREX = ARM_LDREX_W | ARM_LDREX_H | ARM_LDREX_B;
     else
-      LDREX = LDREX_D | LDREX_W | LDREX_H | LDREX_B;
+      LDREX = ARM_LDREX_D | ARM_LDREX_W | ARM_LDREX_H | ARM_LDREX_B;
     break;
   case 9:
     assert(ArchProfile != llvm::ARM::ProfileKind::M &&
            "No Armv9-M architectures defined");
-    LDREX = LDREX_D | LDREX_W | LDREX_H | LDREX_B;
+    LDREX = ARM_LDREX_D | ARM_LDREX_W | ARM_LDREX_H | ARM_LDREX_B;
   }
 
   if (!(FPU & NeonFPU) && FPMath == FP_Neon) {
diff --git a/clang/lib/Basic/Targets/ARM.h b/clang/lib/Basic/Targets/ARM.h
index 1719217..43c4718 100644
--- a/clang/lib/Basic/Targets/ARM.h
+++ b/clang/lib/Basic/Targets/ARM.h
@@ -98,13 +98,6 @@ class LLVM_LIBRARY_VISIBILITY ARMTargetInfo : public TargetInfo {
   LLVM_PREFERRED_TYPE(bool)
   unsigned HasBTI : 1;
 
-  enum {
-    LDREX_B = (1 << 0), /// byte (8-bit)
-    LDREX_H = (1 << 1), /// half (16-bit)
-    LDREX_W = (1 << 2), /// word (32-bit)
-    LDREX_D = (1 << 3), /// double (64-bit)
-  };
-
   uint32_t LDREX;
 
   // ACLE 6.5.1 Hardware floating point
@@ -225,6 +218,8 @@ public:
 
   bool hasBitIntType() const override { return true; }
 
+  unsigned getARMLDREXMask() const override { return LDREX; }
+
   const char *getBFloat16Mangling() const override { return "u6__bf16"; };
 
   std::pair<unsigned, unsigned> hardwareInterferenceSizes() const override {
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
index 54b39fd..79995cc 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -295,10 +295,16 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
         return "1000";
       case OffloadArch::SM_101:
       case OffloadArch::SM_101a:
-         return "1010";
+        return "1010";
+      case OffloadArch::SM_103:
+      case OffloadArch::SM_103a:
+        return "1030";
       case OffloadArch::SM_120:
       case OffloadArch::SM_120a:
-         return "1200";
+        return "1200";
+      case OffloadArch::SM_121:
+      case OffloadArch::SM_121a:
+        return "1210";
       }
       llvm_unreachable("unhandled OffloadArch");
     }();
@@ -307,7 +313,9 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
       case OffloadArch::SM_90a:
       case OffloadArch::SM_100a:
       case OffloadArch::SM_101a:
+      case OffloadArch::SM_103a:
       case OffloadArch::SM_120a:
+      case OffloadArch::SM_121a:
         Builder.defineMacro("__CUDA_ARCH_FEAT_SM" + CUDAArchCode.drop_back() + "_ALL", "1");
         break;
       default:
diff --git a/clang/lib/Basic/Targets/WebAssembly.cpp b/clang/lib/Basic/Targets/WebAssembly.cpp
index e362350e..55ffe1d 100644
--- a/clang/lib/Basic/Targets/WebAssembly.cpp
+++ b/clang/lib/Basic/Targets/WebAssembly.cpp
@@ -59,12 +59,12 @@ bool WebAssemblyTargetInfo::hasFeature(StringRef Feature) const {
       .Case("exception-handling", HasExceptionHandling)
       .Case("extended-const", HasExtendedConst)
       .Case("fp16", HasFP16)
+      .Case("gc", HasGC)
       .Case("multimemory", HasMultiMemory)
       .Case("multivalue", HasMultivalue)
       .Case("mutable-globals", HasMutableGlobals)
       .Case("nontrapping-fptoint", HasNontrappingFPToInt)
       .Case("reference-types", HasReferenceTypes)
-      .Case("gc", HasGC)
       .Case("relaxed-simd", SIMDLevel >= RelaxedSIMD)
       .Case("sign-ext", HasSignExt)
       .Case("simd128", SIMDLevel >= SIMD128)
@@ -99,6 +99,8 @@ void WebAssemblyTargetInfo::getTargetDefines(const LangOptions &Opts,
     Builder.defineMacro("__wasm_multimemory__");
   if (HasFP16)
     Builder.defineMacro("__wasm_fp16__");
+  if (HasGC)
+    Builder.defineMacro("__wasm_gc__");
   if (HasMultivalue)
     Builder.defineMacro("__wasm_multivalue__");
   if (HasMutableGlobals)
@@ -107,8 +109,6 @@ void WebAssemblyTargetInfo::getTargetDefines(const LangOptions &Opts,
     Builder.defineMacro("__wasm_nontrapping_fptoint__");
   if (HasReferenceTypes)
     Builder.defineMacro("__wasm_reference_types__");
-  if (HasGC)
-    Builder.defineMacro("__wasm_gc__");
   if (SIMDLevel >= RelaxedSIMD)
     Builder.defineMacro("__wasm_relaxed_simd__");
   if (HasSignExt)
@@ -194,6 +194,7 @@ bool WebAssemblyTargetInfo::initFeatureMap(
     Features["exception-handling"] = true;
     Features["extended-const"] = true;
     Features["fp16"] = true;
+    Features["gc"] = true;
     Features["multimemory"] = true;
     Features["tail-call"] = true;
     Features["wide-arithmetic"] = true;
@@ -270,6 +271,14 @@ bool WebAssemblyTargetInfo::handleTargetFeatures(
       HasFP16 = false;
       continue;
     }
+    if (Feature == "+gc") {
+      HasGC = true;
+      continue;
+    }
+    if (Feature == "-gc") {
+      HasGC = false;
+      continue;
+    }
     if (Feature == "+multimemory") {
       HasMultiMemory = true;
       continue;
@@ -310,14 +319,6 @@ bool WebAssemblyTargetInfo::handleTargetFeatures(
       HasReferenceTypes = false;
       continue;
     }
-    if (Feature == "+gc") {
-      HasGC = true;
-      continue;
-    }
-    if (Feature == "-gc") {
-      HasGC = false;
-      continue;
-    }
     if (Feature == "+relaxed-simd") {
       SIMDLevel = std::max(SIMDLevel, RelaxedSIMD);
       continue;
diff --git a/clang/lib/Basic/Targets/WebAssembly.h b/clang/lib/Basic/Targets/WebAssembly.h
index c47c8cc..eba7422 100644
--- a/clang/lib/Basic/Targets/WebAssembly.h
+++ b/clang/lib/Basic/Targets/WebAssembly.h
@@ -64,12 +64,12 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyTargetInfo : public TargetInfo {
   bool HasExceptionHandling = false;
   bool HasExtendedConst = false;
   bool HasFP16 = false;
+  bool HasGC = false;
   bool HasMultiMemory = false;
   bool HasMultivalue = false;
   bool HasMutableGlobals = false;
   bool HasNontrappingFPToInt = false;
   bool HasReferenceTypes = false;
-  bool HasGC = false;
   bool HasSignExt = false;
   bool HasTailCall = false;
   bool HasWideArithmetic = false;
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
index ef136f8..9049a01 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -190,6 +190,11 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
     assert(!cir::MissingFeatures::builtinCheckKind());
     return emitBuiltinBitOp<cir::BitClzOp>(*this, e, /*poisonZero=*/true);
 
+  case Builtin::BI__builtin_ffs:
+  case Builtin::BI__builtin_ffsl:
+  case Builtin::BI__builtin_ffsll:
+    return emitBuiltinBitOp<cir::BitFfsOp>(*this, e);
+
   case Builtin::BI__builtin_parity:
   case Builtin::BI__builtin_parityl:
   case Builtin::BI__builtin_parityll:
diff --git a/clang/lib/CIR/CodeGen/CIRGenCall.cpp b/clang/lib/CIR/CodeGen/CIRGenCall.cpp
index 938d143..fc208ff 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCall.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenCall.cpp
@@ -582,6 +582,14 @@ RValue CIRGenFunction::emitCall(const CIRGenFunctionInfo &funcInfo,
   cir::FuncOp directFuncOp;
   if (auto fnOp = dyn_cast<cir::FuncOp>(calleePtr)) {
     directFuncOp = fnOp;
+  } else if (auto getGlobalOp = mlir::dyn_cast<cir::GetGlobalOp>(calleePtr)) {
+    // FIXME(cir): This peephole optimization avoids indirect calls for
+    // builtins. This should be fixed in the builtin declaration instead by
+    // not emitting an unecessary get_global in the first place.
+    // However, this is also used for no-prototype functions.
+    mlir::Operation *globalOp = cgm.getGlobalValue(getGlobalOp.getName());
+    assert(globalOp && "undefined global function");
+    directFuncOp = mlir::cast<cir::FuncOp>(globalOp);
   } else {
     [[maybe_unused]] mlir::ValueTypeRange<mlir::ResultRange> resultTypes =
         calleePtr->getResultTypes();
diff --git a/clang/lib/CIR/CodeGen/CIRGenCall.h b/clang/lib/CIR/CodeGen/CIRGenCall.h
index bd11329..28576a1 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCall.h
+++ b/clang/lib/CIR/CodeGen/CIRGenCall.h
@@ -116,6 +116,11 @@ public:
     assert(isOrdinary());
     return reinterpret_cast<mlir::Operation *>(kindOrFunctionPtr);
   }
+
+  void setFunctionPointer(mlir::Operation *functionPtr) {
+    assert(isOrdinary());
+    kindOrFunctionPtr = SpecialKind(reinterpret_cast<uintptr_t>(functionPtr));
+  }
 };
 
 /// Type for representing both the decl and type of parameters to a function.
@@ -132,7 +137,7 @@ private:
 
   /// A data-flow flag to make sure getRValue and/or copyInto are not
   /// called twice for duplicated IR emission.
-  mutable bool isUsed;
+  [[maybe_unused]] mutable bool isUsed;
 
 public:
   clang::QualType ty;
diff --git a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
index a28ac3c..9e8eaa5 100644
--- a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp
@@ -520,7 +520,7 @@ void CIRGenFunction::emitExprAsInit(const Expr *init, const ValueDecl *d,
   llvm_unreachable("bad evaluation kind");
 }
 
-void CIRGenFunction::emitDecl(const Decl &d) {
+void CIRGenFunction::emitDecl(const Decl &d, bool evaluateConditionDecl) {
   switch (d.getKind()) {
   case Decl::BuiltinTemplate:
   case Decl::TranslationUnit:
@@ -608,11 +608,14 @@ void CIRGenFunction::emitDecl(const Decl &d) {
   case Decl::UsingDirective: // using namespace X; [C++]
     assert(!cir::MissingFeatures::generateDebugInfo());
     return;
-  case Decl::Var: {
+  case Decl::Var:
+  case Decl::Decomposition: {
     const VarDecl &vd = cast<VarDecl>(d);
     assert(vd.isLocalVarDecl() &&
            "Should not see file-scope variables inside a function!");
     emitVarDecl(vd);
+    if (evaluateConditionDecl)
+      maybeEmitDeferredVarDeclInit(&vd);
     return;
   }
   case Decl::OpenACCDeclare:
@@ -632,7 +635,6 @@ void CIRGenFunction::emitDecl(const Decl &d) {
   case Decl::ImplicitConceptSpecialization:
   case Decl::TopLevelStmt:
   case Decl::UsingPack:
-  case Decl::Decomposition: // This could be moved to join Decl::Var
   case Decl::OMPDeclareReduction:
   case Decl::OMPDeclareMapper:
     cgm.errorNYI(d.getSourceRange(),
@@ -649,6 +651,38 @@ void CIRGenFunction::emitNullabilityCheck(LValue lhs, mlir::Value rhs,
   assert(!cir::MissingFeatures::sanitizers());
 }
 
+/// Destroys all the elements of the given array, beginning from last to first.
+/// The array cannot be zero-length.
+///
+/// \param begin - a type* denoting the first element of the array
+/// \param end - a type* denoting one past the end of the array
+/// \param elementType - the element type of the array
+/// \param destroyer - the function to call to destroy elements
+void CIRGenFunction::emitArrayDestroy(mlir::Value begin, mlir::Value end,
+                                      QualType elementType,
+                                      CharUnits elementAlign,
+                                      Destroyer *destroyer) {
+  assert(!elementType->isArrayType());
+
+  // Differently from LLVM traditional codegen, use a higher level
+  // representation instead of lowering directly to a loop.
+  mlir::Type cirElementType = convertTypeForMem(elementType);
+  cir::PointerType ptrToElmType = builder.getPointerTo(cirElementType);
+
+  // Emit the dtor call that will execute for every array element.
+  cir::ArrayDtor::create(
+      builder, *currSrcLoc, begin, [&](mlir::OpBuilder &b, mlir::Location loc) {
+        auto arg = b.getInsertionBlock()->addArgument(ptrToElmType, loc);
+        Address curAddr = Address(arg, cirElementType, elementAlign);
+        assert(!cir::MissingFeatures::dtorCleanups());
+
+        // Perform the actual destruction there.
+        destroyer(*this, curAddr, elementType);
+
+        cir::YieldOp::create(builder, loc);
+      });
+}
+
 /// Immediately perform the destruction of the given object.
 ///
 /// \param addr - the address of the object; a type*
@@ -658,10 +692,34 @@ void CIRGenFunction::emitNullabilityCheck(LValue lhs, mlir::Value rhs,
 ///   elements
 void CIRGenFunction::emitDestroy(Address addr, QualType type,
                                  Destroyer *destroyer) {
-  if (getContext().getAsArrayType(type))
-    cgm.errorNYI("emitDestroy: array type");
+  const ArrayType *arrayType = getContext().getAsArrayType(type);
+  if (!arrayType)
+    return destroyer(*this, addr, type);
+
+  mlir::Value length = emitArrayLength(arrayType, type, addr);
+
+  CharUnits elementAlign = addr.getAlignment().alignmentOfArrayElement(
+      getContext().getTypeSizeInChars(type));
+
+  auto constantCount = length.getDefiningOp<cir::ConstantOp>();
+  if (!constantCount) {
+    assert(!cir::MissingFeatures::vlas());
+    cgm.errorNYI("emitDestroy: variable length array");
+    return;
+  }
+
+  auto constIntAttr = mlir::dyn_cast<cir::IntAttr>(constantCount.getValue());
+  // If it's constant zero, we can just skip the entire thing.
+  if (constIntAttr && constIntAttr.getUInt() == 0)
+    return;
+
+  mlir::Value begin = addr.getPointer();
+  mlir::Value end; // This will be used for future non-constant counts.
+  emitArrayDestroy(begin, end, type, elementAlign, destroyer);
 
-  return destroyer(*this, addr, type);
+  // If the array destroy didn't use the length op, we can erase it.
+  if (constantCount.use_empty())
+    constantCount.erase();
 }
 
 CIRGenFunction::Destroyer *
@@ -741,3 +799,11 @@ void CIRGenFunction::emitAutoVarTypeCleanup(
   assert(!cir::MissingFeatures::ehCleanupFlags());
   ehStack.pushCleanup<DestroyObject>(cleanupKind, addr, type, destroyer);
 }
+
+void CIRGenFunction::maybeEmitDeferredVarDeclInit(const VarDecl *vd) {
+  if (auto *dd = dyn_cast_if_present<DecompositionDecl>(vd)) {
+    for (auto *b : dd->flat_bindings())
+      if (auto *hd = b->getHoldingVar())
+        emitVarDecl(*hd);
+  }
+}
diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
index 64dc1ce..d267504 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -584,6 +584,15 @@ LValue CIRGenFunction::emitDeclRefLValue(const DeclRefExpr *e) {
     return lv;
   }
 
+  if (const auto *bd = dyn_cast<BindingDecl>(nd)) {
+    if (e->refersToEnclosingVariableOrCapture()) {
+      assert(!cir::MissingFeatures::lambdaCaptures());
+      cgm.errorNYI(e->getSourceRange(), "emitDeclRefLValue: lambda captures");
+      return LValue();
+    }
+    return emitLValue(bd->getBinding());
+  }
+
   cgm.errorNYI(e->getSourceRange(), "emitDeclRefLValue: unhandled decl type");
   return LValue();
 }
@@ -1280,7 +1289,7 @@ RValue CIRGenFunction::getUndefRValue(QualType ty) {
 }
 
 RValue CIRGenFunction::emitCall(clang::QualType calleeTy,
-                                const CIRGenCallee &callee,
+                                const CIRGenCallee &origCallee,
                                 const clang::CallExpr *e,
                                 ReturnValueSlot returnValue) {
   // Get the actual function type. The callee type will always be a pointer to
@@ -1291,6 +1300,8 @@ RValue CIRGenFunction::emitCall(clang::QualType calleeTy,
   calleeTy = getContext().getCanonicalType(calleeTy);
   auto pointeeTy = cast<PointerType>(calleeTy)->getPointeeType();
 
+  CIRGenCallee callee = origCallee;
+
   if (getLangOpts().CPlusPlus)
     assert(!cir::MissingFeatures::sanitizers());
 
@@ -1307,7 +1318,44 @@ RValue CIRGenFunction::emitCall(clang::QualType calleeTy,
   const CIRGenFunctionInfo &funcInfo =
       cgm.getTypes().arrangeFreeFunctionCall(args, fnType);
 
-  assert(!cir::MissingFeatures::opCallNoPrototypeFunc());
+  // C99 6.5.2.2p6:
+  //   If the expression that denotes the called function has a type that does
+  //   not include a prototype, [the default argument promotions are performed].
+  //   If the number of arguments does not equal the number of parameters, the
+  //   behavior is undefined. If the function is defined with a type that
+  //   includes a prototype, and either the prototype ends with an ellipsis (,
+  //   ...) or the types of the arguments after promotion are not compatible
+  //   with the types of the parameters, the behavior is undefined. If the
+  //   function is defined with a type that does not include a prototype, and
+  //   the types of the arguments after promotion are not compatible with those
+  //   of the parameters after promotion, the behavior is undefined [except in
+  //   some trivial cases].
+  // That is, in the general case, we should assume that a call through an
+  // unprototyped function type works like a *non-variadic* call. The way we
+  // make this work is to cast to the exxact type fo the promoted arguments.
+  if (isa<FunctionNoProtoType>(fnType)) {
+    assert(!cir::MissingFeatures::opCallChain());
+    assert(!cir::MissingFeatures::addressSpace());
+    cir::FuncType calleeTy = getTypes().getFunctionType(funcInfo);
+    // get non-variadic function type
+    calleeTy = cir::FuncType::get(calleeTy.getInputs(),
+                                  calleeTy.getReturnType(), false);
+    auto calleePtrTy = cir::PointerType::get(calleeTy);
+
+    mlir::Operation *fn = callee.getFunctionPointer();
+    mlir::Value addr;
+    if (auto funcOp = mlir::dyn_cast<cir::FuncOp>(fn)) {
+      addr = builder.create<cir::GetGlobalOp>(
+          getLoc(e->getSourceRange()),
+          cir::PointerType::get(funcOp.getFunctionType()), funcOp.getSymName());
+    } else {
+      addr = fn->getResult(0);
+    }
+
+    fn = builder.createBitcast(addr, calleePtrTy).getDefiningOp();
+    callee.setFunctionPointer(fn);
+  }
+
   assert(!cir::MissingFeatures::opCallFnInfoOpts());
   assert(!cir::MissingFeatures::hip());
   assert(!cir::MissingFeatures::opCallMustTail());
@@ -1433,9 +1481,10 @@ Address CIRGenFunction::emitArrayToPointerDecay(const Expr *e) {
   if (e->getType()->isVariableArrayType())
     return addr;
 
-  auto pointeeTy = mlir::cast<cir::ArrayType>(lvalueAddrTy.getPointee());
+  [[maybe_unused]] auto pointeeTy =
+      mlir::cast<cir::ArrayType>(lvalueAddrTy.getPointee());
 
-  mlir::Type arrayTy = convertType(e->getType());
+  [[maybe_unused]] mlir::Type arrayTy = convertType(e->getType());
   assert(mlir::isa<cir::ArrayType>(arrayTy) && "expected array");
   assert(pointeeTy == arrayTy);
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp b/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp
index a09d739..3aa170e 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprComplex.cpp
@@ -91,6 +91,14 @@ public:
   }
 
   mlir::Value VisitUnaryDeref(const Expr *e);
+
+  mlir::Value VisitUnaryPlus(const UnaryOperator *e);
+
+  mlir::Value VisitPlusMinus(const UnaryOperator *e, cir::UnaryOpKind kind,
+                             QualType promotionType);
+
+  mlir::Value VisitUnaryMinus(const UnaryOperator *e);
+
   mlir::Value VisitUnaryNot(const UnaryOperator *e);
 
   struct BinOpInfo {
@@ -110,6 +118,7 @@ public:
 
   mlir::Value emitBinAdd(const BinOpInfo &op);
   mlir::Value emitBinSub(const BinOpInfo &op);
+  mlir::Value emitBinMul(const BinOpInfo &op);
 
   QualType getPromotionType(QualType ty, bool isDivOpCode = false) {
     if (auto *complexTy = ty->getAs<ComplexType>()) {
@@ -142,16 +151,20 @@ public:
 
   HANDLEBINOP(Add)
   HANDLEBINOP(Sub)
+  HANDLEBINOP(Mul)
 #undef HANDLEBINOP
 };
 } // namespace
 
+#ifndef NDEBUG
+// Only used in asserts
 static const ComplexType *getComplexType(QualType type) {
   type = type.getCanonicalType();
   if (const ComplexType *comp = dyn_cast<ComplexType>(type))
     return comp;
   return cast<ComplexType>(cast<AtomicType>(type)->getValueType());
 }
+#endif // NDEBUG
 
 LValue ComplexExprEmitter::emitBinAssignLValue(const BinaryOperator *e,
                                                mlir::Value &value) {
@@ -282,6 +295,41 @@ mlir::Value ComplexExprEmitter::emitCast(CastKind ck, Expr *op,
   llvm_unreachable("unknown cast resulting in complex value");
 }
 
+mlir::Value ComplexExprEmitter::VisitUnaryPlus(const UnaryOperator *e) {
+  QualType promotionTy = getPromotionType(e->getSubExpr()->getType());
+  mlir::Value result = VisitPlusMinus(e, cir::UnaryOpKind::Plus, promotionTy);
+  if (!promotionTy.isNull()) {
+    cgf.cgm.errorNYI("ComplexExprEmitter::VisitUnaryPlus emitUnPromotedValue");
+    return {};
+  }
+  return result;
+}
+
+mlir::Value ComplexExprEmitter::VisitPlusMinus(const UnaryOperator *e,
+                                               cir::UnaryOpKind kind,
+                                               QualType promotionType) {
+  assert(kind == cir::UnaryOpKind::Plus ||
+         kind == cir::UnaryOpKind::Minus &&
+             "Invalid UnaryOp kind for ComplexType Plus or Minus");
+
+  mlir::Value op;
+  if (!promotionType.isNull())
+    op = cgf.emitPromotedComplexExpr(e->getSubExpr(), promotionType);
+  else
+    op = Visit(e->getSubExpr());
+  return builder.createUnaryOp(cgf.getLoc(e->getExprLoc()), kind, op);
+}
+
+mlir::Value ComplexExprEmitter::VisitUnaryMinus(const UnaryOperator *e) {
+  QualType promotionTy = getPromotionType(e->getSubExpr()->getType());
+  mlir::Value result = VisitPlusMinus(e, cir::UnaryOpKind::Minus, promotionTy);
+  if (!promotionTy.isNull()) {
+    cgf.cgm.errorNYI("ComplexExprEmitter::VisitUnaryMinus emitUnPromotedValue");
+    return {};
+  }
+  return result;
+}
+
 mlir::Value ComplexExprEmitter::emitConstant(
     const CIRGenFunction::ConstantEmission &constant, Expr *e) {
   assert(constant && "not a constant");
@@ -534,13 +582,22 @@ mlir::Value ComplexExprEmitter::emitPromoted(const Expr *e,
     return emitBin##OP(emitBinOps(bo, promotionTy));
       HANDLE_BINOP(Add)
       HANDLE_BINOP(Sub)
+      HANDLE_BINOP(Mul)
 #undef HANDLE_BINOP
     default:
       break;
     }
-  } else if (isa<UnaryOperator>(e)) {
-    cgf.cgm.errorNYI("emitPromoted UnaryOperator");
-    return {};
+  } else if (const auto *unaryOp = dyn_cast<UnaryOperator>(e)) {
+    switch (unaryOp->getOpcode()) {
+    case UO_Minus:
+    case UO_Plus: {
+      auto kind = unaryOp->getOpcode() == UO_Plus ? cir::UnaryOpKind::Plus
+                                                  : cir::UnaryOpKind::Minus;
+      return VisitPlusMinus(unaryOp, kind, promotionTy);
+    }
+    default:
+      break;
+    }
   }
 
   mlir::Value result = Visit(const_cast<Expr *>(e));
@@ -585,6 +642,31 @@ mlir::Value ComplexExprEmitter::emitBinSub(const BinOpInfo &op) {
   return builder.create<cir::ComplexSubOp>(op.loc, op.lhs, op.rhs);
 }
 
+static cir::ComplexRangeKind
+getComplexRangeAttr(LangOptions::ComplexRangeKind range) {
+  switch (range) {
+  case LangOptions::CX_Full:
+    return cir::ComplexRangeKind::Full;
+  case LangOptions::CX_Improved:
+    return cir::ComplexRangeKind::Improved;
+  case LangOptions::CX_Promoted:
+    return cir::ComplexRangeKind::Promoted;
+  case LangOptions::CX_Basic:
+    return cir::ComplexRangeKind::Basic;
+  case LangOptions::CX_None:
+    // The default value for ComplexRangeKind is Full is no option is selected
+    return cir::ComplexRangeKind::Full;
+  }
+}
+
+mlir::Value ComplexExprEmitter::emitBinMul(const BinOpInfo &op) {
+  assert(!cir::MissingFeatures::fastMathFlags());
+  assert(!cir::MissingFeatures::cgFPOptionsRAII());
+  cir::ComplexRangeKind rangeKind =
+      getComplexRangeAttr(op.fpFeatures.getComplexRange());
+  return builder.create<cir::ComplexMulOp>(op.loc, op.lhs, op.rhs, rangeKind);
+}
+
 LValue CIRGenFunction::emitComplexAssignmentLValue(const BinaryOperator *e) {
   assert(e->getOpcode() == BO_Assign && "Expected assign op");
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
index 2523b0f..f62be49 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
@@ -439,7 +439,7 @@ public:
       value = builder.getTrue(cgf.getLoc(e->getExprLoc()));
     } else if (type->isIntegerType()) {
       QualType promotedType;
-      bool canPerformLossyDemotionCheck = false;
+      [[maybe_unused]] bool canPerformLossyDemotionCheck = false;
       if (cgf.getContext().isPromotableIntegerType(type)) {
         promotedType = cgf.getContext().getPromotedIntegerType(type);
         assert(promotedType != type && "Shouldn't promote to the same type.");
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
index c65d025..0c9bc38 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
@@ -216,8 +216,7 @@ void CIRGenFunction::emitAndUpdateRetAlloca(QualType type, mlir::Location loc,
 void CIRGenFunction::declare(mlir::Value addrVal, const Decl *var, QualType ty,
                              mlir::Location loc, CharUnits alignment,
                              bool isParam) {
-  const auto *namedVar = dyn_cast_or_null<NamedDecl>(var);
-  assert(namedVar && "Needs a named decl");
+  assert(isa<NamedDecl>(var) && "Needs a named decl");
   assert(!cir::MissingFeatures::cgfSymbolTable());
 
   auto allocaOp = cast<cir::AllocaOp>(addrVal.getDefiningOp());
@@ -943,6 +942,7 @@ void CIRGenFunction::emitVariablyModifiedType(QualType type) {
     case Type::HLSLInlineSpirv:
     case Type::PredefinedSugar:
       cgm.errorNYI("CIRGenFunction::emitVariablyModifiedType");
+      break;
 
 #define TYPE(Class, Base)
 #define ABSTRACT_TYPE(Class, Base)
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index 77539d7..f9c8636 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -848,6 +848,10 @@ public:
   /// even if no aggregate location is provided.
   RValue emitAnyExprToTemp(const clang::Expr *e);
 
+  void emitArrayDestroy(mlir::Value begin, mlir::Value end,
+                        QualType elementType, CharUnits elementAlign,
+                        Destroyer *destroyer);
+
   mlir::Value emitArrayLength(const clang::ArrayType *arrayType,
                               QualType &baseType, Address &addr);
   LValue emitArraySubscriptExpr(const clang::ArraySubscriptExpr *e);
@@ -866,6 +870,8 @@ public:
   void emitAutoVarTypeCleanup(const AutoVarEmission &emission,
                               clang::QualType::DestructionKind dtorKind);
 
+  void maybeEmitDeferredVarDeclInit(const VarDecl *vd);
+
   void emitBaseInitializer(mlir::Location loc, const CXXRecordDecl *classDecl,
                            CXXCtorInitializer *baseInit);
 
@@ -1055,7 +1061,7 @@ public:
 
   void emitCompoundStmtWithoutScope(const clang::CompoundStmt &s);
 
-  void emitDecl(const clang::Decl &d);
+  void emitDecl(const clang::Decl &d, bool evaluateConditionDecl = false);
   mlir::LogicalResult emitDeclStmt(const clang::DeclStmt &s);
   LValue emitDeclRefLValue(const clang::DeclRefExpr *e);
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index 0724cb1..b143682 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -656,8 +656,6 @@ mlir::Value CIRGenModule::getAddrOfGlobalVar(const VarDecl *d, mlir::Type ty,
 
 void CIRGenModule::emitGlobalVarDefinition(const clang::VarDecl *vd,
                                            bool isTentative) {
-  const QualType astTy = vd->getType();
-
   if (getLangOpts().OpenCL || getLangOpts().OpenMPIsTargetDevice) {
     errorNYI(vd->getSourceRange(), "emit OpenCL/OpenMP global variable");
     return;
@@ -701,7 +699,7 @@ void CIRGenModule::emitGlobalVarDefinition(const clang::VarDecl *vd,
     // never attempt to emit a tentative definition if a real one
     // exists. A use may still exists, however, so we still may need
     // to do a RAUW.
-    assert(!astTy->isIncompleteType() && "Unexpected incomplete type");
+    assert(!vd->getType()->isIncompleteType() && "Unexpected incomplete type");
     init = builder.getZeroInitAttr(convertType(vd->getType()));
   } else {
     emitter.emplace(*this);
@@ -1103,6 +1101,60 @@ cir::GlobalLinkageKind CIRGenModule::getCIRLinkageForDeclarator(
   return cir::GlobalLinkageKind::ExternalLinkage;
 }
 
+/// This function is called when we implement a function with no prototype, e.g.
+/// "int foo() {}". If there are existing call uses of the old function in the
+/// module, this adjusts them to call the new function directly.
+///
+/// This is not just a cleanup: the always_inline pass requires direct calls to
+/// functions to be able to inline them.  If there is a bitcast in the way, it
+/// won't inline them. Instcombine normally deletes these calls, but it isn't
+/// run at -O0.
+void CIRGenModule::replaceUsesOfNonProtoTypeWithRealFunction(
+    mlir::Operation *old, cir::FuncOp newFn) {
+  // If we're redefining a global as a function, don't transform it.
+  auto oldFn = mlir::dyn_cast<cir::FuncOp>(old);
+  if (!oldFn)
+    return;
+
+  // TODO(cir): this RAUW ignores the features below.
+  assert(!cir::MissingFeatures::opFuncExceptions());
+  assert(!cir::MissingFeatures::opFuncParameterAttributes());
+  assert(!cir::MissingFeatures::opFuncOperandBundles());
+  if (oldFn->getAttrs().size() <= 1)
+    errorNYI(old->getLoc(),
+             "replaceUsesOfNonProtoTypeWithRealFunction: Attribute forwarding");
+
+  // Mark new function as originated from a no-proto declaration.
+  newFn.setNoProto(oldFn.getNoProto());
+
+  // Iterate through all calls of the no-proto function.
+  std::optional<mlir::SymbolTable::UseRange> symUses =
+      oldFn.getSymbolUses(oldFn->getParentOp());
+  for (const mlir::SymbolTable::SymbolUse &use : symUses.value()) {
+    mlir::OpBuilder::InsertionGuard guard(builder);
+
+    if (auto noProtoCallOp = mlir::dyn_cast<cir::CallOp>(use.getUser())) {
+      builder.setInsertionPoint(noProtoCallOp);
+
+      // Patch call type with the real function type.
+      cir::CallOp realCallOp = builder.createCallOp(
+          noProtoCallOp.getLoc(), newFn, noProtoCallOp.getOperands());
+
+      // Replace old no proto call with fixed call.
+      noProtoCallOp.replaceAllUsesWith(realCallOp);
+      noProtoCallOp.erase();
+    } else if (auto getGlobalOp =
+                   mlir::dyn_cast<cir::GetGlobalOp>(use.getUser())) {
+      // Replace type
+      getGlobalOp.getAddr().setType(
+          cir::PointerType::get(newFn.getFunctionType()));
+    } else {
+      errorNYI(use.getUser()->getLoc(),
+               "replaceUsesOfNonProtoTypeWithRealFunction: unexpected use");
+    }
+  }
+}
+
 cir::GlobalLinkageKind
 CIRGenModule::getCIRLinkageVarDefinition(const VarDecl *vd, bool isConstant) {
   assert(!isConstant && "constant variables NYI");
@@ -1244,6 +1296,7 @@ void CIRGenModule::emitTopLevelDecl(Decl *decl) {
              decl->getDeclKindName());
     break;
 
+  case Decl::CXXConversion:
   case Decl::CXXMethod:
   case Decl::Function: {
     auto *fd = cast<FunctionDecl>(decl);
@@ -1253,8 +1306,13 @@ void CIRGenModule::emitTopLevelDecl(Decl *decl) {
     break;
   }
 
-  case Decl::Var: {
+  case Decl::Var:
+  case Decl::Decomposition: {
     auto *vd = cast<VarDecl>(decl);
+    if (isa<DecompositionDecl>(decl)) {
+      errorNYI(decl->getSourceRange(), "global variable decompositions");
+      break;
+    }
     emitGlobal(vd);
     break;
   }
@@ -1276,8 +1334,14 @@ void CIRGenModule::emitTopLevelDecl(Decl *decl) {
     break;
 
   // No code generation needed.
-  case Decl::UsingShadow:
+  case Decl::ClassTemplate:
+  case Decl::Concept:
+  case Decl::CXXDeductionGuide:
   case Decl::Empty:
+  case Decl::FunctionTemplate:
+  case Decl::StaticAssert:
+  case Decl::TypeAliasTemplate:
+  case Decl::UsingShadow:
     break;
 
   case Decl::CXXConstructor:
@@ -1539,10 +1603,10 @@ static bool shouldAssumeDSOLocal(const CIRGenModule &cgm,
 
   const llvm::Triple &tt = cgm.getTriple();
   const CodeGenOptions &cgOpts = cgm.getCodeGenOpts();
-  if (tt.isWindowsGNUEnvironment()) {
-    // In MinGW, variables without DLLImport can still be automatically
-    // imported from a DLL by the linker; don't mark variables that
-    // potentially could come from another DLL as DSO local.
+  if (tt.isOSCygMing()) {
+    // In MinGW and Cygwin, variables without DLLImport can still be
+    // automatically imported from a DLL by the linker; don't mark variables
+    // that potentially could come from another DLL as DSO local.
 
     // With EmulatedTLS, TLS variables can be autoimported from other DLLs
     // (and this actually happens in the public interface of libstdc++), so
@@ -1701,8 +1765,7 @@ cir::FuncOp CIRGenModule::getOrCreateCIRFunction(
   // Lookup the entry, lazily creating it if necessary.
   mlir::Operation *entry = getGlobalValue(mangledName);
   if (entry) {
-    if (!isa<cir::FuncOp>(entry))
-      errorNYI(d->getSourceRange(), "getOrCreateCIRFunction: non-FuncOp");
+    assert(mlir::isa<cir::FuncOp>(entry));
 
     assert(!cir::MissingFeatures::weakRefReference());
 
@@ -1738,6 +1801,30 @@ cir::FuncOp CIRGenModule::getOrCreateCIRFunction(
       invalidLoc ? theModule->getLoc() : getLoc(funcDecl->getSourceRange()),
       mangledName, mlir::cast<cir::FuncType>(funcType), funcDecl);
 
+  // If we already created a function with the same mangled name (but different
+  // type) before, take its name and add it to the list of functions to be
+  // replaced with F at the end of CodeGen.
+  //
+  // This happens if there is a prototype for a function (e.g. "int f()") and
+  // then a definition of a different type (e.g. "int f(int x)").
+  if (entry) {
+
+    // Fetch a generic symbol-defining operation and its uses.
+    auto symbolOp = mlir::cast<mlir::SymbolOpInterface>(entry);
+
+    // This might be an implementation of a function without a prototype, in
+    // which case, try to do special replacement of calls which match the new
+    // prototype. The really key thing here is that we also potentially drop
+    // arguments from the call site so as to make a direct call, which makes the
+    // inliner happier and suppresses a number of optimizer warnings (!) about
+    // dropping arguments.
+    if (symbolOp.getSymbolUses(symbolOp->getParentOp()))
+      replaceUsesOfNonProtoTypeWithRealFunction(entry, funcOp);
+
+    // Obliterate no-proto declaration.
+    entry->erase();
+  }
+
   if (d)
     setFunctionAttributes(gd, funcOp, /*isIncompleteFunction=*/false, isThunk);
 
@@ -1814,7 +1901,9 @@ CIRGenModule::createCIRFunction(mlir::Location loc, StringRef name,
     func = builder.create<cir::FuncOp>(loc, name, funcType);
 
     assert(!cir::MissingFeatures::opFuncAstDeclAttr());
-    assert(!cir::MissingFeatures::opFuncNoProto());
+
+    if (funcDecl && !funcDecl->hasPrototype())
+      func.setNoProto(true);
 
     assert(func.isDeclaration() && "expected empty body");
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h
index 22519ff..5d07d38 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.h
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.h
@@ -313,6 +313,9 @@ public:
 
   static void setInitializer(cir::GlobalOp &op, mlir::Attribute value);
 
+  void replaceUsesOfNonProtoTypeWithRealFunction(mlir::Operation *old,
+                                                 cir::FuncOp newFn);
+
   cir::FuncOp
   getOrCreateCIRFunction(llvm::StringRef mangledName, mlir::Type funcType,
                          clang::GlobalDecl gd, bool forVTable,
diff --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp b/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp
index 05e8848..0c8ff4bd 100644
--- a/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp
@@ -91,6 +91,9 @@ struct CIRRecordLowering final {
     return astContext.getTargetInfo().getABI().starts_with("aapcs");
   }
 
+  /// Helper function to check if the target machine is BigEndian.
+  bool isBigEndian() const { return astContext.getTargetInfo().isBigEndian(); }
+
   CharUnits bitsToCharUnits(uint64_t bitOffset) {
     return astContext.toCharUnitsFromBits(bitOffset);
   }
@@ -438,9 +441,7 @@ CIRRecordLowering::accumulateBitFields(RecordDecl::field_iterator field,
           } else if (cirGenTypes.getCGModule()
                          .getCodeGenOpts()
                          .FineGrainedBitfieldAccesses) {
-            assert(!cir::MissingFeatures::nonFineGrainedBitfields());
-            cirGenTypes.getCGModule().errorNYI(field->getSourceRange(),
-                                               "NYI FineGrainedBitfield");
+            installBest = true;
           } else {
             // Otherwise, we're not installing. Update the bit size
             // of the current span to go all the way to limitOffset, which is
@@ -773,7 +774,104 @@ void CIRRecordLowering::computeVolatileBitfields() {
       !cirGenTypes.getCGModule().getCodeGenOpts().AAPCSBitfieldWidth)
     return;
 
-  assert(!cir::MissingFeatures::armComputeVolatileBitfields());
+  for (auto &[field, info] : bitFields) {
+    mlir::Type resLTy = cirGenTypes.convertTypeForMem(field->getType());
+
+    if (astContext.toBits(astRecordLayout.getAlignment()) <
+        getSizeInBits(resLTy).getQuantity())
+      continue;
+
+    // CIRRecordLowering::setBitFieldInfo() pre-adjusts the bit-field offsets
+    // for big-endian targets, but it assumes a container of width
+    // info.storageSize. Since AAPCS uses a different container size (width
+    // of the type), we first undo that calculation here and redo it once
+    // the bit-field offset within the new container is calculated.
+    const unsigned oldOffset =
+        isBigEndian() ? info.storageSize - (info.offset + info.size)
+                      : info.offset;
+    // Offset to the bit-field from the beginning of the struct.
+    const unsigned absoluteOffset =
+        astContext.toBits(info.storageOffset) + oldOffset;
+
+    // Container size is the width of the bit-field type.
+    const unsigned storageSize = getSizeInBits(resLTy).getQuantity();
+    // Nothing to do if the access uses the desired
+    // container width and is naturally aligned.
+    if (info.storageSize == storageSize && (oldOffset % storageSize == 0))
+      continue;
+
+    // Offset within the container.
+    unsigned offset = absoluteOffset & (storageSize - 1);
+    // Bail out if an aligned load of the container cannot cover the entire
+    // bit-field. This can happen for example, if the bit-field is part of a
+    // packed struct. AAPCS does not define access rules for such cases, we let
+    // clang to follow its own rules.
+    if (offset + info.size > storageSize)
+      continue;
+
+    // Re-adjust offsets for big-endian targets.
+    if (isBigEndian())
+      offset = storageSize - (offset + info.size);
+
+    const CharUnits storageOffset =
+        astContext.toCharUnitsFromBits(absoluteOffset & ~(storageSize - 1));
+    const CharUnits end = storageOffset +
+                          astContext.toCharUnitsFromBits(storageSize) -
+                          CharUnits::One();
+
+    const ASTRecordLayout &layout =
+        astContext.getASTRecordLayout(field->getParent());
+    // If we access outside memory outside the record, than bail out.
+    const CharUnits recordSize = layout.getSize();
+    if (end >= recordSize)
+      continue;
+
+    // Bail out if performing this load would access non-bit-fields members.
+    bool conflict = false;
+    for (const auto *f : recordDecl->fields()) {
+      // Allow sized bit-fields overlaps.
+      if (f->isBitField() && !f->isZeroLengthBitField())
+        continue;
+
+      const CharUnits fOffset = astContext.toCharUnitsFromBits(
+          layout.getFieldOffset(f->getFieldIndex()));
+
+      // As C11 defines, a zero sized bit-field defines a barrier, so
+      // fields after and before it should be race condition free.
+      // The AAPCS acknowledges it and imposes no restritions when the
+      // natural container overlaps a zero-length bit-field.
+      if (f->isZeroLengthBitField()) {
+        if (end > fOffset && storageOffset < fOffset) {
+          conflict = true;
+          break;
+        }
+      }
+
+      const CharUnits fEnd =
+          fOffset +
+          astContext.toCharUnitsFromBits(astContext.toBits(
+              getSizeInBits(cirGenTypes.convertTypeForMem(f->getType())))) -
+          CharUnits::One();
+      // If no overlap, continue.
+      if (end < fOffset || fEnd < storageOffset)
+        continue;
+
+      // The desired load overlaps a non-bit-field member, bail out.
+      conflict = true;
+      break;
+    }
+
+    if (conflict)
+      continue;
+    // Write the new bit-field access parameters.
+    // As the storage offset now is defined as the number of elements from the
+    // start of the structure, we should divide the Offset by the element size.
+    info.volatileStorageOffset =
+        storageOffset /
+        astContext.toCharUnitsFromBits(storageSize).getQuantity();
+    info.volatileStorageSize = storageSize;
+    info.volatileOffset = offset;
+  }
 }
 
 void CIRRecordLowering::accumulateBases(const CXXRecordDecl *cxxRecordDecl) {
diff --git a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
index 21bee33..50642e7 100644
--- a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp
@@ -79,14 +79,15 @@ mlir::LogicalResult CIRGenFunction::emitStmt(const Stmt *s,
 #define EXPR(Type, Base) case Stmt::Type##Class:
 #include "clang/AST/StmtNodes.inc"
     {
-      // Remember the block we came in on.
-      mlir::Block *incoming = builder.getInsertionBlock();
-      assert(incoming && "expression emission must have an insertion point");
+      assert(builder.getInsertionBlock() &&
+             "expression emission must have an insertion point");
 
       emitIgnoredExpr(cast<Expr>(s));
 
-      mlir::Block *outgoing = builder.getInsertionBlock();
-      assert(outgoing && "expression emission cleared block!");
+      // Classic codegen has a check here to see if the emitter created a new
+      // block that isn't used (comparing the incoming and outgoing insertion
+      // points) and deletes the outgoing block if it's not used. In CIR, we
+      // will handle that during the cir.canonicalize pass.
       return mlir::success();
     }
   case Stmt::IfStmtClass:
@@ -363,8 +364,8 @@ mlir::LogicalResult CIRGenFunction::emitIfStmt(const IfStmt &s) {
 mlir::LogicalResult CIRGenFunction::emitDeclStmt(const DeclStmt &s) {
   assert(builder.getInsertionBlock() && "expected valid insertion point");
 
-  for (const Decl *I : s.decls())
-    emitDecl(*I);
+  for (const Decl *i : s.decls())
+    emitDecl(*i, /*evaluateConditionDecl=*/true);
 
   return mlir::success();
 }
@@ -875,7 +876,7 @@ mlir::LogicalResult CIRGenFunction::emitSwitchStmt(const clang::SwitchStmt &s) {
         return mlir::failure();
 
     if (s.getConditionVariable())
-      emitDecl(*s.getConditionVariable());
+      emitDecl(*s.getConditionVariable(), /*evaluateConditionDecl=*/true);
 
     mlir::Value condV = emitScalarExpr(s.getCond());
 
diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
index 2213c75..1c3a310 100644
--- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
+++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
@@ -1470,10 +1470,14 @@ ParseResult cir::FuncOp::parse(OpAsmParser &parser, OperationState &state) {
   llvm::SMLoc loc = parser.getCurrentLocation();
   mlir::Builder &builder = parser.getBuilder();
 
+  mlir::StringAttr noProtoNameAttr = getNoProtoAttrName(state.name);
   mlir::StringAttr visNameAttr = getSymVisibilityAttrName(state.name);
   mlir::StringAttr visibilityNameAttr = getGlobalVisibilityAttrName(state.name);
   mlir::StringAttr dsoLocalNameAttr = getDsoLocalAttrName(state.name);
 
+  if (parser.parseOptionalKeyword(noProtoNameAttr).succeeded())
+    state.addAttribute(noProtoNameAttr, parser.getBuilder().getUnitAttr());
+
   // Default to external linkage if no keyword is provided.
   state.addAttribute(getLinkageAttrNameString(),
                      GlobalLinkageKindAttr::get(
@@ -1578,6 +1582,9 @@ mlir::Region *cir::FuncOp::getCallableRegion() {
 }
 
 void cir::FuncOp::print(OpAsmPrinter &p) {
+  if (getNoProto())
+    p << " no_proto";
+
   if (getComdat())
     p << " comdat";
 
@@ -2295,6 +2302,15 @@ OpFoldResult BitCtzOp::fold(FoldAdaptor adaptor) {
       getPoisonZero());
 }
 
+OpFoldResult BitFfsOp::fold(FoldAdaptor adaptor) {
+  return foldUnaryBitOp(adaptor.getInput(), [](const llvm::APInt &inputValue) {
+    unsigned trailingZeros = inputValue.countTrailingZeros();
+    unsigned result =
+        trailingZeros == inputValue.getBitWidth() ? 0 : trailingZeros + 1;
+    return llvm::APInt(inputValue.getBitWidth(), result);
+  });
+}
+
 OpFoldResult BitParityOp::fold(FoldAdaptor adaptor) {
   return foldUnaryBitOp(adaptor.getInput(), [](const llvm::APInt &inputValue) {
     return llvm::APInt(inputValue.getBitWidth(), inputValue.popcount() % 2);
diff --git a/clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp b/clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp
index 2143f16..2eaa60c 100644
--- a/clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp
@@ -143,7 +143,7 @@ void CIRCanonicalizePass::runOnOperation() {
     if (isa<BrOp, BrCondOp, CastOp, ScopeOp, SwitchOp, SelectOp, UnaryOp,
             ComplexCreateOp, ComplexImagOp, ComplexRealOp, VecCmpOp,
             VecCreateOp, VecExtractOp, VecShuffleOp, VecShuffleDynamicOp,
-            VecTernaryOp, BitClrsbOp, BitClzOp, BitCtzOp, BitParityOp,
+            VecTernaryOp, BitClrsbOp, BitClzOp, BitCtzOp, BitFfsOp, BitParityOp,
             BitPopcountOp, BitReverseOp, ByteSwapOp, RotateOp>(op))
       ops.push_back(op);
   });
diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
index cef83ea..66260eb 100644
--- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
+++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp
@@ -15,7 +15,6 @@
 #include "clang/CIR/Dialect/Passes.h"
 #include "clang/CIR/MissingFeatures.h"
 
-#include <iostream>
 #include <memory>
 
 using namespace mlir;
@@ -28,8 +27,15 @@ struct LoweringPreparePass : public LoweringPrepareBase<LoweringPreparePass> {
 
   void runOnOp(mlir::Operation *op);
   void lowerCastOp(cir::CastOp op);
+  void lowerComplexMulOp(cir::ComplexMulOp op);
   void lowerUnaryOp(cir::UnaryOp op);
-  void lowerArrayCtor(ArrayCtor op);
+  void lowerArrayDtor(cir::ArrayDtor op);
+  void lowerArrayCtor(cir::ArrayCtor op);
+
+  cir::FuncOp buildRuntimeFunction(
+      mlir::OpBuilder &builder, llvm::StringRef name, mlir::Location loc,
+      cir::FuncType type,
+      cir::GlobalLinkageKind linkage = cir::GlobalLinkageKind::ExternalLinkage);
 
   ///
   /// AST related
@@ -37,11 +43,31 @@ struct LoweringPreparePass : public LoweringPrepareBase<LoweringPreparePass> {
 
   clang::ASTContext *astCtx;
 
+  /// Tracks current module.
+  mlir::ModuleOp mlirModule;
+
   void setASTContext(clang::ASTContext *c) { astCtx = c; }
 };
 
 } // namespace
 
+cir::FuncOp LoweringPreparePass::buildRuntimeFunction(
+    mlir::OpBuilder &builder, llvm::StringRef name, mlir::Location loc,
+    cir::FuncType type, cir::GlobalLinkageKind linkage) {
+  cir::FuncOp f = dyn_cast_or_null<FuncOp>(SymbolTable::lookupNearestSymbolFrom(
+      mlirModule, StringAttr::get(mlirModule->getContext(), name)));
+  if (!f) {
+    f = builder.create<cir::FuncOp>(loc, name, type);
+    f.setLinkageAttr(
+        cir::GlobalLinkageKindAttr::get(builder.getContext(), linkage));
+    mlir::SymbolTable::setSymbolVisibility(
+        f, mlir::SymbolTable::Visibility::Private);
+
+    assert(!cir::MissingFeatures::opFuncExtraAttrs());
+  }
+  return f;
+}
+
 static mlir::Value lowerScalarToComplexCast(mlir::MLIRContext &ctx,
                                             cir::CastOp op) {
   cir::CIRBaseBuilderTy builder(ctx);
@@ -127,6 +153,124 @@ void LoweringPreparePass::lowerCastOp(cir::CastOp op) {
   }
 }
 
+static mlir::Value buildComplexBinOpLibCall(
+    LoweringPreparePass &pass, CIRBaseBuilderTy &builder,
+    llvm::StringRef (*libFuncNameGetter)(llvm::APFloat::Semantics),
+    mlir::Location loc, cir::ComplexType ty, mlir::Value lhsReal,
+    mlir::Value lhsImag, mlir::Value rhsReal, mlir::Value rhsImag) {
+  cir::FPTypeInterface elementTy =
+      mlir::cast<cir::FPTypeInterface>(ty.getElementType());
+
+  llvm::StringRef libFuncName = libFuncNameGetter(
+      llvm::APFloat::SemanticsToEnum(elementTy.getFloatSemantics()));
+  llvm::SmallVector<mlir::Type, 4> libFuncInputTypes(4, elementTy);
+
+  cir::FuncType libFuncTy = cir::FuncType::get(libFuncInputTypes, ty);
+
+  // Insert a declaration for the runtime function to be used in Complex
+  // multiplication and division when needed
+  cir::FuncOp libFunc;
+  {
+    mlir::OpBuilder::InsertionGuard ipGuard{builder};
+    builder.setInsertionPointToStart(pass.mlirModule.getBody());
+    libFunc = pass.buildRuntimeFunction(builder, libFuncName, loc, libFuncTy);
+  }
+
+  cir::CallOp call =
+      builder.createCallOp(loc, libFunc, {lhsReal, lhsImag, rhsReal, rhsImag});
+  return call.getResult();
+}
+
+static llvm::StringRef
+getComplexMulLibCallName(llvm::APFloat::Semantics semantics) {
+  switch (semantics) {
+  case llvm::APFloat::S_IEEEhalf:
+    return "__mulhc3";
+  case llvm::APFloat::S_IEEEsingle:
+    return "__mulsc3";
+  case llvm::APFloat::S_IEEEdouble:
+    return "__muldc3";
+  case llvm::APFloat::S_PPCDoubleDouble:
+    return "__multc3";
+  case llvm::APFloat::S_x87DoubleExtended:
+    return "__mulxc3";
+  case llvm::APFloat::S_IEEEquad:
+    return "__multc3";
+  default:
+    llvm_unreachable("unsupported floating point type");
+  }
+}
+
+static mlir::Value lowerComplexMul(LoweringPreparePass &pass,
+                                   CIRBaseBuilderTy &builder,
+                                   mlir::Location loc, cir::ComplexMulOp op,
+                                   mlir::Value lhsReal, mlir::Value lhsImag,
+                                   mlir::Value rhsReal, mlir::Value rhsImag) {
+  // (a+bi) * (c+di) = (ac-bd) + (ad+bc)i
+  mlir::Value resultRealLhs =
+      builder.createBinop(loc, lhsReal, cir::BinOpKind::Mul, rhsReal);
+  mlir::Value resultRealRhs =
+      builder.createBinop(loc, lhsImag, cir::BinOpKind::Mul, rhsImag);
+  mlir::Value resultImagLhs =
+      builder.createBinop(loc, lhsReal, cir::BinOpKind::Mul, rhsImag);
+  mlir::Value resultImagRhs =
+      builder.createBinop(loc, lhsImag, cir::BinOpKind::Mul, rhsReal);
+  mlir::Value resultReal = builder.createBinop(
+      loc, resultRealLhs, cir::BinOpKind::Sub, resultRealRhs);
+  mlir::Value resultImag = builder.createBinop(
+      loc, resultImagLhs, cir::BinOpKind::Add, resultImagRhs);
+  mlir::Value algebraicResult =
+      builder.createComplexCreate(loc, resultReal, resultImag);
+
+  cir::ComplexType complexTy = op.getType();
+  cir::ComplexRangeKind rangeKind = op.getRange();
+  if (mlir::isa<cir::IntType>(complexTy.getElementType()) ||
+      rangeKind == cir::ComplexRangeKind::Basic ||
+      rangeKind == cir::ComplexRangeKind::Improved ||
+      rangeKind == cir::ComplexRangeKind::Promoted)
+    return algebraicResult;
+
+  assert(!cir::MissingFeatures::fastMathFlags());
+
+  // Check whether the real part and the imaginary part of the result are both
+  // NaN. If so, emit a library call to compute the multiplication instead.
+  // We check a value against NaN by comparing the value against itself.
+  mlir::Value resultRealIsNaN = builder.createIsNaN(loc, resultReal);
+  mlir::Value resultImagIsNaN = builder.createIsNaN(loc, resultImag);
+  mlir::Value resultRealAndImagAreNaN =
+      builder.createLogicalAnd(loc, resultRealIsNaN, resultImagIsNaN);
+
+  return builder
+      .create<cir::TernaryOp>(
+          loc, resultRealAndImagAreNaN,
+          [&](mlir::OpBuilder &, mlir::Location) {
+            mlir::Value libCallResult = buildComplexBinOpLibCall(
+                pass, builder, &getComplexMulLibCallName, loc, complexTy,
+                lhsReal, lhsImag, rhsReal, rhsImag);
+            builder.createYield(loc, libCallResult);
+          },
+          [&](mlir::OpBuilder &, mlir::Location) {
+            builder.createYield(loc, algebraicResult);
+          })
+      .getResult();
+}
+
+void LoweringPreparePass::lowerComplexMulOp(cir::ComplexMulOp op) {
+  cir::CIRBaseBuilderTy builder(getContext());
+  builder.setInsertionPointAfter(op);
+  mlir::Location loc = op.getLoc();
+  mlir::TypedValue<cir::ComplexType> lhs = op.getLhs();
+  mlir::TypedValue<cir::ComplexType> rhs = op.getRhs();
+  mlir::Value lhsReal = builder.createComplexReal(loc, lhs);
+  mlir::Value lhsImag = builder.createComplexImag(loc, lhs);
+  mlir::Value rhsReal = builder.createComplexReal(loc, rhs);
+  mlir::Value rhsImag = builder.createComplexImag(loc, rhs);
+  mlir::Value loweredResult = lowerComplexMul(*this, builder, loc, op, lhsReal,
+                                              lhsImag, rhsReal, rhsImag);
+  op.replaceAllUsesWith(loweredResult);
+  op.erase();
+}
+
 void LoweringPreparePass::lowerUnaryOp(cir::UnaryOp op) {
   mlir::Type ty = op.getType();
   if (!mlir::isa<cir::ComplexType>(ty))
@@ -154,7 +298,8 @@ void LoweringPreparePass::lowerUnaryOp(cir::UnaryOp op) {
 
   case cir::UnaryOpKind::Plus:
   case cir::UnaryOpKind::Minus:
-    llvm_unreachable("Complex unary Plus/Minus NYI");
+    resultReal = builder.createUnaryOp(loc, opKind, operandReal);
+    resultImag = builder.createUnaryOp(loc, opKind, operandImag);
     break;
 
   case cir::UnaryOpKind::Not:
@@ -172,28 +317,30 @@ void LoweringPreparePass::lowerUnaryOp(cir::UnaryOp op) {
 static void lowerArrayDtorCtorIntoLoop(cir::CIRBaseBuilderTy &builder,
                                        clang::ASTContext *astCtx,
                                        mlir::Operation *op, mlir::Type eltTy,
-                                       mlir::Value arrayAddr,
-                                       uint64_t arrayLen) {
+                                       mlir::Value arrayAddr, uint64_t arrayLen,
+                                       bool isCtor) {
   // Generate loop to call into ctor/dtor for every element.
   mlir::Location loc = op->getLoc();
 
-  // TODO: instead of fixed integer size, create alias for PtrDiffTy and unify
-  // with CIRGen stuff.
+  // TODO: instead of getting the size from the AST context, create alias for
+  // PtrDiffTy and unify with CIRGen stuff.
   const unsigned sizeTypeSize =
       astCtx->getTypeSize(astCtx->getSignedSizeType());
-  auto ptrDiffTy =
-      cir::IntType::get(builder.getContext(), sizeTypeSize, /*isSigned=*/false);
-  mlir::Value numArrayElementsConst = builder.getUnsignedInt(loc, arrayLen, 64);
+  uint64_t endOffset = isCtor ? arrayLen : arrayLen - 1;
+  mlir::Value endOffsetVal =
+      builder.getUnsignedInt(loc, endOffset, sizeTypeSize);
 
-  auto begin = builder.create<cir::CastOp>(
-      loc, eltTy, cir::CastKind::array_to_ptrdecay, arrayAddr);
-  mlir::Value end = builder.create<cir::PtrStrideOp>(loc, eltTy, begin,
-                                                     numArrayElementsConst);
+  auto begin = cir::CastOp::create(builder, loc, eltTy,
+                                   cir::CastKind::array_to_ptrdecay, arrayAddr);
+  mlir::Value end =
+      cir::PtrStrideOp::create(builder, loc, eltTy, begin, endOffsetVal);
+  mlir::Value start = isCtor ? begin : end;
+  mlir::Value stop = isCtor ? end : begin;
 
   mlir::Value tmpAddr = builder.createAlloca(
       loc, /*addr type*/ builder.getPointerTo(eltTy),
       /*var type*/ eltTy, "__array_idx", builder.getAlignmentAttr(1));
-  builder.createStore(loc, begin, tmpAddr);
+  builder.createStore(loc, start, tmpAddr);
 
   cir::DoWhileOp loop = builder.createDoWhile(
       loc,
@@ -202,7 +349,7 @@ static void lowerArrayDtorCtorIntoLoop(cir::CIRBaseBuilderTy &builder,
         auto currentElement = b.create<cir::LoadOp>(loc, eltTy, tmpAddr);
         mlir::Type boolTy = cir::BoolType::get(b.getContext());
         auto cmp = builder.create<cir::CmpOp>(loc, boolTy, cir::CmpOpKind::ne,
-                                              currentElement, end);
+                                              currentElement, stop);
         builder.createCondition(cmp);
       },
       /*bodyBuilder=*/
@@ -213,15 +360,19 @@ static void lowerArrayDtorCtorIntoLoop(cir::CIRBaseBuilderTy &builder,
         op->walk([&](cir::CallOp c) { ctorCall = c; });
         assert(ctorCall && "expected ctor call");
 
-        auto one = builder.create<cir::ConstantOp>(
-            loc, ptrDiffTy, cir::IntAttr::get(ptrDiffTy, 1));
+        // Array elements get constructed in order but destructed in reverse.
+        mlir::Value stride;
+        if (isCtor)
+          stride = builder.getUnsignedInt(loc, 1, sizeTypeSize);
+        else
+          stride = builder.getSignedInt(loc, -1, sizeTypeSize);
 
-        ctorCall->moveAfter(one);
+        ctorCall->moveBefore(stride.getDefiningOp());
         ctorCall->setOperand(0, currentElement);
+        auto nextElement = cir::PtrStrideOp::create(builder, loc, eltTy,
+                                                    currentElement, stride);
 
-        // Advance pointer and store them to temporary variable
-        auto nextElement =
-            builder.create<cir::PtrStrideOp>(loc, eltTy, currentElement, one);
+        // Store the element pointer to the temporary variable
         builder.createStore(loc, nextElement, tmpAddr);
         builder.createYield(loc);
       });
@@ -230,6 +381,18 @@ static void lowerArrayDtorCtorIntoLoop(cir::CIRBaseBuilderTy &builder,
   op->erase();
 }
 
+void LoweringPreparePass::lowerArrayDtor(cir::ArrayDtor op) {
+  CIRBaseBuilderTy builder(getContext());
+  builder.setInsertionPointAfter(op.getOperation());
+
+  mlir::Type eltTy = op->getRegion(0).getArgument(0).getType();
+  assert(!cir::MissingFeatures::vlas());
+  auto arrayLen =
+      mlir::cast<cir::ArrayType>(op.getAddr().getType().getPointee()).getSize();
+  lowerArrayDtorCtorIntoLoop(builder, astCtx, op, eltTy, op.getAddr(), arrayLen,
+                             false);
+}
+
 void LoweringPreparePass::lowerArrayCtor(cir::ArrayCtor op) {
   cir::CIRBaseBuilderTy builder(getContext());
   builder.setInsertionPointAfter(op.getOperation());
@@ -238,26 +401,33 @@ void LoweringPreparePass::lowerArrayCtor(cir::ArrayCtor op) {
   assert(!cir::MissingFeatures::vlas());
   auto arrayLen =
       mlir::cast<cir::ArrayType>(op.getAddr().getType().getPointee()).getSize();
-  lowerArrayDtorCtorIntoLoop(builder, astCtx, op, eltTy, op.getAddr(),
-                             arrayLen);
+  lowerArrayDtorCtorIntoLoop(builder, astCtx, op, eltTy, op.getAddr(), arrayLen,
+                             true);
 }
 
 void LoweringPreparePass::runOnOp(mlir::Operation *op) {
   if (auto arrayCtor = dyn_cast<ArrayCtor>(op))
     lowerArrayCtor(arrayCtor);
+  else if (auto arrayDtor = dyn_cast<cir::ArrayDtor>(op))
+    lowerArrayDtor(arrayDtor);
   else if (auto cast = mlir::dyn_cast<cir::CastOp>(op))
     lowerCastOp(cast);
+  else if (auto complexMul = mlir::dyn_cast<cir::ComplexMulOp>(op))
+    lowerComplexMulOp(complexMul);
   else if (auto unary = mlir::dyn_cast<cir::UnaryOp>(op))
     lowerUnaryOp(unary);
 }
 
 void LoweringPreparePass::runOnOperation() {
   mlir::Operation *op = getOperation();
+  if (isa<::mlir::ModuleOp>(op))
+    mlirModule = cast<::mlir::ModuleOp>(op);
 
   llvm::SmallVector<mlir::Operation *> opsToTransform;
 
   op->walk([&](mlir::Operation *op) {
-    if (mlir::isa<cir::ArrayCtor, cir::CastOp, cir::UnaryOp>(op))
+    if (mlir::isa<cir::ArrayCtor, cir::ArrayDtor, cir::CastOp,
+                  cir::ComplexMulOp, cir::UnaryOp>(op))
       opsToTransform.push_back(op);
   });
 
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index c27b889..957a51a 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -521,6 +521,32 @@ mlir::LogicalResult CIRToLLVMBitCtzOpLowering::matchAndRewrite(
   return mlir::LogicalResult::success();
 }
 
+mlir::LogicalResult CIRToLLVMBitFfsOpLowering::matchAndRewrite(
+    cir::BitFfsOp op, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  auto resTy = getTypeConverter()->convertType(op.getType());
+  auto ctz = rewriter.create<mlir::LLVM::CountTrailingZerosOp>(
+      op.getLoc(), resTy, adaptor.getInput(), /*is_zero_poison=*/true);
+
+  auto one = rewriter.create<mlir::LLVM::ConstantOp>(op.getLoc(), resTy, 1);
+  auto ctzAddOne = rewriter.create<mlir::LLVM::AddOp>(op.getLoc(), ctz, one);
+
+  auto zeroInputTy = rewriter.create<mlir::LLVM::ConstantOp>(
+      op.getLoc(), adaptor.getInput().getType(), 0);
+  auto isZero = rewriter.create<mlir::LLVM::ICmpOp>(
+      op.getLoc(),
+      mlir::LLVM::ICmpPredicateAttr::get(rewriter.getContext(),
+                                         mlir::LLVM::ICmpPredicate::eq),
+      adaptor.getInput(), zeroInputTy);
+
+  auto zero = rewriter.create<mlir::LLVM::ConstantOp>(op.getLoc(), resTy, 0);
+  auto res = rewriter.create<mlir::LLVM::SelectOp>(op.getLoc(), isZero, zero,
+                                                   ctzAddOne);
+  rewriter.replaceOp(op, res);
+
+  return mlir::LogicalResult::success();
+}
+
 mlir::LogicalResult CIRToLLVMBitParityOpLowering::matchAndRewrite(
     cir::BitParityOp op, OpAdaptor adaptor,
     mlir::ConversionPatternRewriter &rewriter) const {
@@ -919,13 +945,45 @@ rewriteCallOrInvoke(mlir::Operation *op, mlir::ValueRange callOperands,
                            memoryEffects, noUnwind, willReturn);
 
   mlir::LLVM::LLVMFunctionType llvmFnTy;
+
+  // Temporary to handle the case where we need to prepend an operand if the
+  // callee is an alias.
+  SmallVector<mlir::Value> adjustedCallOperands;
+
   if (calleeAttr) { // direct call
-    mlir::FunctionOpInterface fn =
-        mlir::SymbolTable::lookupNearestSymbolFrom<mlir::FunctionOpInterface>(
-            op, calleeAttr);
-    assert(fn && "Did not find function for call");
-    llvmFnTy = cast<mlir::LLVM::LLVMFunctionType>(
-        converter->convertType(fn.getFunctionType()));
+    mlir::Operation *callee =
+        mlir::SymbolTable::lookupNearestSymbolFrom(op, calleeAttr);
+    if (auto fn = mlir::dyn_cast<mlir::FunctionOpInterface>(callee)) {
+      llvmFnTy = converter->convertType<mlir::LLVM::LLVMFunctionType>(
+          fn.getFunctionType());
+      assert(llvmFnTy && "Failed to convert function type");
+    } else if (auto alias = mlir::cast<mlir::LLVM::AliasOp>(callee)) {
+      // If the callee was an alias. In that case,
+      // we need to prepend the address of the alias to the operands. The
+      // way aliases work in the LLVM dialect is a little counter-intuitive.
+      // The AliasOp itself is a pseudo-function that returns the address of
+      // the global value being aliased, but when we generate the call we
+      // need to insert an operation that gets the address of the AliasOp.
+      // This all gets sorted out when the LLVM dialect is lowered to LLVM IR.
+      auto symAttr = mlir::cast<mlir::FlatSymbolRefAttr>(calleeAttr);
+      auto addrOfAlias =
+          mlir::LLVM::AddressOfOp::create(
+              rewriter, op->getLoc(),
+              mlir::LLVM::LLVMPointerType::get(rewriter.getContext()), symAttr)
+              .getResult();
+      adjustedCallOperands.push_back(addrOfAlias);
+
+      // Now add the regular operands and assign this to the range value.
+      llvm::append_range(adjustedCallOperands, callOperands);
+      callOperands = adjustedCallOperands;
+
+      // Clear the callee attribute because we're calling an alias.
+      calleeAttr = {};
+      llvmFnTy = mlir::cast<mlir::LLVM::LLVMFunctionType>(alias.getType());
+    } else {
+      // Was this an ifunc?
+      return op->emitError("Unexpected callee type!");
+    }
   } else { // indirect call
     assert(!op->getOperands().empty() &&
            "operands list must no be empty for the indirect call");
@@ -1172,6 +1230,30 @@ void CIRToLLVMFuncOpLowering::lowerFuncAttributes(
   }
 }
 
+mlir::LogicalResult CIRToLLVMFuncOpLowering::matchAndRewriteAlias(
+    cir::FuncOp op, llvm::StringRef aliasee, mlir::Type ty, OpAdaptor adaptor,
+    mlir::ConversionPatternRewriter &rewriter) const {
+  SmallVector<mlir::NamedAttribute, 4> attributes;
+  lowerFuncAttributes(op, /*filterArgAndResAttrs=*/false, attributes);
+
+  mlir::Location loc = op.getLoc();
+  auto aliasOp = rewriter.replaceOpWithNewOp<mlir::LLVM::AliasOp>(
+      op, ty, convertLinkage(op.getLinkage()), op.getName(), op.getDsoLocal(),
+      /*threadLocal=*/false, attributes);
+
+  // Create the alias body
+  mlir::OpBuilder builder(op.getContext());
+  mlir::Block *block = builder.createBlock(&aliasOp.getInitializerRegion());
+  builder.setInsertionPointToStart(block);
+  // The type of AddressOfOp is always a pointer.
+  assert(!cir::MissingFeatures::addressSpace());
+  mlir::Type ptrTy = mlir::LLVM::LLVMPointerType::get(ty.getContext());
+  auto addrOp = mlir::LLVM::AddressOfOp::create(builder, loc, ptrTy, aliasee);
+  mlir::LLVM::ReturnOp::create(builder, loc, addrOp);
+
+  return mlir::success();
+}
+
 mlir::LogicalResult CIRToLLVMFuncOpLowering::matchAndRewrite(
     cir::FuncOp op, OpAdaptor adaptor,
     mlir::ConversionPatternRewriter &rewriter) const {
@@ -1196,6 +1278,11 @@ mlir::LogicalResult CIRToLLVMFuncOpLowering::matchAndRewrite(
       resultType ? resultType : mlir::LLVM::LLVMVoidType::get(getContext()),
       signatureConversion.getConvertedTypes(),
       /*isVarArg=*/fnType.isVarArg());
+
+  // If this is an alias, it needs to be lowered to llvm::AliasOp.
+  if (std::optional<llvm::StringRef> aliasee = op.getAliasee())
+    return matchAndRewriteAlias(op, *aliasee, llvmFnTy, adaptor, rewriter);
+
   // LLVMFuncOp expects a single FileLine Location instead of a fused
   // location.
   mlir::Location loc = op.getLoc();
@@ -2089,6 +2176,7 @@ void ConvertCIRToLLVMPass::runOnOperation() {
                CIRToLLVMBitClrsbOpLowering,
                CIRToLLVMBitClzOpLowering,
                CIRToLLVMBitCtzOpLowering,
+               CIRToLLVMBitFfsOpLowering,
                CIRToLLVMBitParityOpLowering,
                CIRToLLVMBitPopcountOpLowering,
                CIRToLLVMBitReverseOpLowering,
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
index 2911ced..f339d43 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
@@ -84,6 +84,16 @@ public:
                   mlir::ConversionPatternRewriter &) const override;
 };
 
+class CIRToLLVMBitFfsOpLowering
+    : public mlir::OpConversionPattern<cir::BitFfsOp> {
+public:
+  using mlir::OpConversionPattern<cir::BitFfsOp>::OpConversionPattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(cir::BitFfsOp op, OpAdaptor,
+                  mlir::ConversionPatternRewriter &) const override;
+};
+
 class CIRToLLVMBitParityOpLowering
     : public mlir::OpConversionPattern<cir::BitParityOp> {
 public:
@@ -257,6 +267,11 @@ class CIRToLLVMFuncOpLowering : public mlir::OpConversionPattern<cir::FuncOp> {
       cir::FuncOp func, bool filterArgAndResAttrs,
       mlir::SmallVectorImpl<mlir::NamedAttribute> &result) const;
 
+  mlir::LogicalResult
+  matchAndRewriteAlias(cir::FuncOp op, llvm::StringRef aliasee, mlir::Type ty,
+                       OpAdaptor adaptor,
+                       mlir::ConversionPatternRewriter &rewriter) const;
+
 public:
   using mlir::OpConversionPattern<cir::FuncOp>::OpConversionPattern;
 
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 3f784fc..e1f7ea0 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -1148,7 +1148,8 @@ llvm::Value *CodeGenFunction::emitCountedByPointerSize(
   assert(E->getCastKind() == CK_LValueToRValue &&
          "must be an LValue to RValue cast");
 
-  const MemberExpr *ME = dyn_cast<MemberExpr>(E->getSubExpr());
+  const MemberExpr *ME =
+      dyn_cast<MemberExpr>(E->getSubExpr()->IgnoreParenNoopCasts(getContext()));
   if (!ME)
     return nullptr;
 
diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp
index 5ee9089..827385f 100644
--- a/clang/lib/CodeGen/CGCoroutine.cpp
+++ b/clang/lib/CodeGen/CGCoroutine.cpp
@@ -435,7 +435,7 @@ CodeGenFunction::generateAwaitSuspendWrapper(Twine const &CoroName,
   llvm::FunctionType *LTy = CGM.getTypes().GetFunctionType(FI);
 
   llvm::Function *Fn = llvm::Function::Create(
-      LTy, llvm::GlobalValue::PrivateLinkage, FuncName, &CGM.getModule());
+      LTy, llvm::GlobalValue::InternalLinkage, FuncName, &CGM.getModule());
 
   Fn->addParamAttr(0, llvm::Attribute::AttrKind::NonNull);
   Fn->addParamAttr(0, llvm::Attribute::AttrKind::NoUndef);
diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index 77fc3a2..b6007f5 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -643,16 +643,7 @@ unsigned CGDebugInfo::getColumnNumber(SourceLocation Loc, bool Force) {
 }
 
 StringRef CGDebugInfo::getCurrentDirname() {
-  if (!CGM.getCodeGenOpts().DebugCompilationDir.empty())
-    return CGM.getCodeGenOpts().DebugCompilationDir;
-
-  if (!CWDName.empty())
-    return CWDName;
-  llvm::ErrorOr<std::string> CWD =
-      CGM.getFileSystem()->getCurrentWorkingDirectory();
-  if (!CWD)
-    return StringRef();
-  return CWDName = internString(*CWD);
+  return CGM.getCodeGenOpts().DebugCompilationDir;
 }
 
 void CGDebugInfo::CreateCompileUnit() {
@@ -2641,6 +2632,8 @@ void CGDebugInfo::emitVTableSymbol(llvm::GlobalVariable *VTable,
                                    const CXXRecordDecl *RD) {
   if (!CGM.getTarget().getCXXABI().isItaniumFamily())
     return;
+  if (DebugKind <= llvm::codegenoptions::DebugLineTablesOnly)
+    return;
 
   ASTContext &Context = CGM.getContext();
   StringRef SymbolName = "_vtable$";
@@ -3246,6 +3239,9 @@ llvm::DIModule *CGDebugInfo::getOrCreateModuleRef(ASTSourceDescriptor Mod,
     std::string Remapped = remapDIPath(Path);
     StringRef Relative(Remapped);
     StringRef CompDir = TheCU->getDirectory();
+    if (CompDir.empty())
+      return Remapped;
+
     if (Relative.consume_front(CompDir))
       Relative.consume_front(llvm::sys::path::get_separator());
 
diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h
index 411b2e7..497d3a6 100644
--- a/clang/lib/CodeGen/CGDebugInfo.h
+++ b/clang/lib/CodeGen/CGDebugInfo.h
@@ -158,7 +158,6 @@ class CGDebugInfo {
   /// This is a storage for names that are constructed on demand. For
   /// example, C++ destructors, C++ operators etc..
   llvm::BumpPtrAllocator DebugInfoNames;
-  StringRef CWDName;
 
   llvm::DenseMap<const char *, llvm::TrackingMDRef> DIFileCache;
   llvm::DenseMap<const FunctionDecl *, llvm::TrackingMDRef> SPCache;
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 90aed79..5a3d4e4 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -93,6 +93,7 @@ static llvm::StringRef GetUBSanTrapForHandler(SanitizerHandler ID) {
     LIST_SANITIZER_CHECKS
 #undef SANITIZER_CHECK
   }
+  llvm_unreachable("unhandled switch case");
 }
 
 /// CreateTempAlloca - This creates a alloca and inserts it into the entry
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index e25b694..04c9192 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -2278,8 +2278,12 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {
       case OffloadArch::SM_100a:
       case OffloadArch::SM_101:
       case OffloadArch::SM_101a:
+      case OffloadArch::SM_103:
+      case OffloadArch::SM_103a:
       case OffloadArch::SM_120:
       case OffloadArch::SM_120a:
+      case OffloadArch::SM_121:
+      case OffloadArch::SM_121a:
       case OffloadArch::GFX600:
       case OffloadArch::GFX601:
       case OffloadArch::GFX602:
diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp
index eb5b604..dc54c97 100644
--- a/clang/lib/CodeGen/CodeGenAction.cpp
+++ b/clang/lib/CodeGen/CodeGenAction.cpp
@@ -908,6 +908,8 @@ bool CodeGenAction::loadLinkModules(CompilerInstance &CI) {
 bool CodeGenAction::hasIRSupport() const { return true; }
 
 void CodeGenAction::EndSourceFileAction() {
+  ASTFrontendAction::EndSourceFileAction();
+
   // If the consumer creation failed, do nothing.
   if (!getCompilerInstance().hasASTConsumer())
     return;
@@ -932,7 +934,7 @@ CodeGenerator *CodeGenAction::getCodeGenerator() const {
 bool CodeGenAction::BeginSourceFileAction(CompilerInstance &CI) {
   if (CI.getFrontendOpts().GenReducedBMI)
     CI.getLangOpts().setCompilingModule(LangOptions::CMK_ModuleInterface);
-  return true;
+  return ASTFrontendAction::BeginSourceFileAction(CI);
 }
 
 static std::unique_ptr<raw_pwrite_stream>
@@ -976,7 +978,7 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
         CI.getPreprocessor());
 
   std::unique_ptr<BackendConsumer> Result(new BackendConsumer(
-      CI, BA, &CI.getVirtualFileSystem(), *VMContext, std::move(LinkModules),
+      CI, BA, CI.getVirtualFileSystemPtr(), *VMContext, std::move(LinkModules),
       InFile, std::move(OS), CoverageInfo));
   BEConsumer = Result.get();
 
@@ -1154,7 +1156,7 @@ void CodeGenAction::ExecuteAction() {
 
   // Set clang diagnostic handler. To do this we need to create a fake
   // BackendConsumer.
-  BackendConsumer Result(CI, BA, &CI.getVirtualFileSystem(), *VMContext,
+  BackendConsumer Result(CI, BA, CI.getVirtualFileSystemPtr(), *VMContext,
                          std::move(LinkModules), "", nullptr, nullptr,
                          TheModule.get());
 
diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp
index 4aafac3..38aaceb 100644
--- a/clang/lib/CodeGen/CoverageMappingGen.cpp
+++ b/clang/lib/CodeGen/CoverageMappingGen.cpp
@@ -2449,12 +2449,7 @@ CoverageMappingModuleGen::CoverageMappingModuleGen(
     : CGM(CGM), SourceInfo(SourceInfo) {}
 
 std::string CoverageMappingModuleGen::getCurrentDirname() {
-  if (!CGM.getCodeGenOpts().CoverageCompilationDir.empty())
-    return CGM.getCodeGenOpts().CoverageCompilationDir;
-
-  SmallString<256> CWD;
-  llvm::sys::fs::current_path(CWD);
-  return CWD.str().str();
+  return CGM.getCodeGenOpts().CoverageCompilationDir;
 }
 
 std::string CoverageMappingModuleGen::normalizeFilename(StringRef Filename) {
diff --git a/clang/lib/CodeGen/ObjectFilePCHContainerWriter.cpp b/clang/lib/CodeGen/ObjectFilePCHContainerWriter.cpp
index 95971e5..074f2a5 100644
--- a/clang/lib/CodeGen/ObjectFilePCHContainerWriter.cpp
+++ b/clang/lib/CodeGen/ObjectFilePCHContainerWriter.cpp
@@ -146,7 +146,7 @@ public:
       : CI(CI), Diags(CI.getDiagnostics()), MainFileName(MainFileName),
         OutputFileName(OutputFileName), Ctx(nullptr),
         MMap(CI.getPreprocessor().getHeaderSearchInfo().getModuleMap()),
-        FS(&CI.getVirtualFileSystem()),
+        FS(CI.getVirtualFileSystemPtr()),
         HeaderSearchOpts(CI.getHeaderSearchOpts()),
         PreprocessorOpts(CI.getPreprocessorOpts()),
         TargetOpts(CI.getTargetOpts()), LangOpts(CI.getLangOpts()),
diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
index 2e6b4b3..980f7eb 100644
--- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp
@@ -4922,19 +4922,6 @@ Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,
   if (Builtin->LLVMIntrinsic == 0)
     return nullptr;
 
-  if (BuiltinID == SME::BI__builtin_sme___arm_in_streaming_mode) {
-    // If we already know the streaming mode, don't bother with the intrinsic
-    // and emit a constant instead
-    const auto *FD = cast<FunctionDecl>(CurFuncDecl);
-    if (const auto *FPT = FD->getType()->getAs<FunctionProtoType>()) {
-      unsigned SMEAttrs = FPT->getAArch64SMEAttributes();
-      if (!(SMEAttrs & FunctionType::SME_PStateSMCompatibleMask)) {
-        bool IsStreaming = SMEAttrs & FunctionType::SME_PStateSMEnabledMask;
-        return ConstantInt::getBool(Builder.getContext(), IsStreaming);
-      }
-    }
-  }
-
   // Predicates must match the main datatype.
   for (Value *&Op : Ops)
     if (auto PredTy = dyn_cast<llvm::VectorType>(Op->getType()))
diff --git a/clang/lib/CrossTU/CrossTranslationUnit.cpp b/clang/lib/CrossTU/CrossTranslationUnit.cpp
index 6d0f042..fb2a79a 100644
--- a/clang/lib/CrossTU/CrossTranslationUnit.cpp
+++ b/clang/lib/CrossTU/CrossTranslationUnit.cpp
@@ -563,9 +563,8 @@ CrossTranslationUnitContext::ASTLoader::loadFromDump(StringRef ASTDumpPath) {
   auto DiagOpts = std::make_shared<DiagnosticOptions>();
   TextDiagnosticPrinter *DiagClient =
       new TextDiagnosticPrinter(llvm::errs(), *DiagOpts);
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
-  IntrusiveRefCntPtr<DiagnosticsEngine> Diags(
-      new DiagnosticsEngine(DiagID, *DiagOpts, DiagClient));
+  auto Diags = llvm::makeIntrusiveRefCnt<DiagnosticsEngine>(
+      DiagnosticIDs::create(), *DiagOpts, DiagClient);
   return ASTUnit::LoadFromASTFile(
       ASTDumpPath, CI.getPCHContainerOperations()->getRawReader(),
       ASTUnit::LoadEverything, DiagOpts, Diags, CI.getFileSystemOpts(),
@@ -607,8 +606,8 @@ CrossTranslationUnitContext::ASTLoader::loadFromSource(
   auto *DiagClient = new ForwardingDiagnosticConsumer{CI.getDiagnosticClient()};
   IntrusiveRefCntPtr<DiagnosticIDs> DiagID{
       CI.getDiagnostics().getDiagnosticIDs()};
-  IntrusiveRefCntPtr<DiagnosticsEngine> Diags(
-      new DiagnosticsEngine{DiagID, *DiagOpts, DiagClient});
+  auto Diags = llvm::makeIntrusiveRefCnt<DiagnosticsEngine>(DiagID, *DiagOpts,
+                                                            DiagClient);
 
   return ASTUnit::LoadFromCommandLine(
       CommandLineArgs.begin(), (CommandLineArgs.end()),
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 99de951..586f287 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -3606,7 +3606,7 @@ class OffloadingActionBuilder final {
           if (!CompileDeviceOnly) {
             C.getDriver().Diag(diag::err_opt_not_valid_without_opt)
                 << "-fhip-emit-relocatable"
-                << "--cuda-device-only";
+                << "--offload-device-only";
           }
         }
       }
@@ -4774,6 +4774,21 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
       C.isOffloadingHostKind(Action::OFK_HIP) &&
       !Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false);
 
+  bool HIPRelocatableObj =
+      C.isOffloadingHostKind(Action::OFK_HIP) &&
+      Args.hasFlag(options::OPT_fhip_emit_relocatable,
+                   options::OPT_fno_hip_emit_relocatable, false);
+
+  if (!HIPNoRDC && HIPRelocatableObj)
+    C.getDriver().Diag(diag::err_opt_not_valid_with_opt)
+        << "-fhip-emit-relocatable"
+        << "-fgpu-rdc";
+
+  if (!offloadDeviceOnly() && HIPRelocatableObj)
+    C.getDriver().Diag(diag::err_opt_not_valid_without_opt)
+        << "-fhip-emit-relocatable"
+        << "--offload-device-only";
+
   // For HIP non-rdc non-device-only compilation, create a linker wrapper
   // action for each host object to link, bundle and wrap device files in
   // it.
@@ -4894,7 +4909,7 @@ Action *Driver::BuildOffloadingActions(Compilation &C,
                            A->getOffloadingToolChain()->getTriple().isSPIRV();
       if ((A->getType() != types::TY_Object && !IsAMDGCNSPIRV &&
            A->getType() != types::TY_LTO_BC) ||
-          !HIPNoRDC || !offloadDeviceOnly())
+          HIPRelocatableObj || !HIPNoRDC || !offloadDeviceOnly())
         continue;
       ActionList LinkerInput = {A};
       A = C.MakeAction<LinkJobAction>(LinkerInput, types::TY_Image);
diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index 1d7dad0..25c6b5a 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -191,9 +191,10 @@ static void getAArch64MultilibFlags(const Driver &D,
   for (const auto &ArchInfo : AArch64::ArchInfos)
     if (FeatureSet.contains(ArchInfo->ArchFeature))
       ArchName = ArchInfo->Name;
-  assert(!ArchName.empty() && "at least one architecture should be found");
-  MArch.insert(MArch.begin(), ("-march=" + ArchName).str());
-  Result.push_back(llvm::join(MArch, "+"));
+  if (!ArchName.empty()) {
+    MArch.insert(MArch.begin(), ("-march=" + ArchName).str());
+    Result.push_back(llvm::join(MArch, "+"));
+  }
 
   const Arg *BranchProtectionArg =
       Args.getLastArgNoClaim(options::OPT_mbranch_protection_EQ);
@@ -760,7 +761,7 @@ std::string ToolChain::buildCompilerRTBasename(const llvm::opt::ArgList &Args,
     break;
   case ToolChain::FT_Shared:
     if (TT.isOSWindows())
-      Suffix = TT.isWindowsGNUEnvironment() ? ".dll.a" : ".lib";
+      Suffix = TT.isOSCygMing() ? ".dll.a" : ".lib";
     else if (TT.isOSAIX())
       Suffix = ".a";
     else
diff --git a/clang/lib/Driver/ToolChains/AMDGPU.h b/clang/lib/Driver/ToolChains/AMDGPU.h
index 513c77d..e5d41e2 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.h
+++ b/clang/lib/Driver/ToolChains/AMDGPU.h
@@ -10,7 +10,6 @@
 #define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_AMDGPU_H
 
 #include "Gnu.h"
-#include "ROCm.h"
 #include "clang/Basic/TargetID.h"
 #include "clang/Driver/Options.h"
 #include "clang/Driver/Tool.h"
diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
index 6bd710e..418f9fd 100644
--- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
@@ -467,3 +467,18 @@ void aarch64::setPAuthABIInTriple(const Driver &D, const ArgList &Args,
     break;
   }
 }
+
+/// Is the triple {aarch64.aarch64_be}-none-elf?
+bool aarch64::isAArch64BareMetal(const llvm::Triple &Triple) {
+  if (Triple.getArch() != llvm::Triple::aarch64 &&
+      Triple.getArch() != llvm::Triple::aarch64_be)
+    return false;
+
+  if (Triple.getVendor() != llvm::Triple::UnknownVendor)
+    return false;
+
+  if (Triple.getOS() != llvm::Triple::UnknownOS)
+    return false;
+
+  return Triple.getEnvironmentName() == "elf";
+}
diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.h b/clang/lib/Driver/ToolChains/Arch/AArch64.h
index 2057272..2765ee8 100644
--- a/clang/lib/Driver/ToolChains/Arch/AArch64.h
+++ b/clang/lib/Driver/ToolChains/Arch/AArch64.h
@@ -30,6 +30,7 @@ std::string getAArch64TargetCPU(const llvm::opt::ArgList &Args,
 
 void setPAuthABIInTriple(const Driver &D, const llvm::opt::ArgList &Args,
                          llvm::Triple &triple);
+bool isAArch64BareMetal(const llvm::Triple &Triple);
 
 } // end namespace aarch64
 } // end namespace target
diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp b/clang/lib/Driver/ToolChains/BareMetal.cpp
index 497f333..25a16fe 100644
--- a/clang/lib/Driver/ToolChains/BareMetal.cpp
+++ b/clang/lib/Driver/ToolChains/BareMetal.cpp
@@ -12,6 +12,7 @@
 #include "clang/Driver/CommonArgs.h"
 #include "clang/Driver/InputInfo.h"
 
+#include "Arch/AArch64.h"
 #include "Arch/ARM.h"
 #include "Arch/RISCV.h"
 #include "clang/Driver/Compilation.h"
@@ -31,21 +32,6 @@ using namespace clang::driver;
 using namespace clang::driver::tools;
 using namespace clang::driver::toolchains;
 
-/// Is the triple {aarch64.aarch64_be}-none-elf?
-static bool isAArch64BareMetal(const llvm::Triple &Triple) {
-  if (Triple.getArch() != llvm::Triple::aarch64 &&
-      Triple.getArch() != llvm::Triple::aarch64_be)
-    return false;
-
-  if (Triple.getVendor() != llvm::Triple::UnknownVendor)
-    return false;
-
-  if (Triple.getOS() != llvm::Triple::UnknownOS)
-    return false;
-
-  return Triple.getEnvironmentName() == "elf";
-}
-
 static bool isRISCVBareMetal(const llvm::Triple &Triple) {
   if (!Triple.isRISCV())
     return false;
@@ -363,8 +349,9 @@ void BareMetal::findMultilibs(const Driver &D, const llvm::Triple &Triple,
 }
 
 bool BareMetal::handlesTarget(const llvm::Triple &Triple) {
-  return arm::isARMEABIBareMetal(Triple) || isAArch64BareMetal(Triple) ||
-         isRISCVBareMetal(Triple) || isPPCBareMetal(Triple);
+  return arm::isARMEABIBareMetal(Triple) ||
+         aarch64::isAArch64BareMetal(Triple) || isRISCVBareMetal(Triple) ||
+         isPPCBareMetal(Triple);
 }
 
 Tool *BareMetal::buildLinker() const {
@@ -684,7 +671,8 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
     CmdArgs.push_back("--start-group");
     AddRunTimeLibs(TC, D, CmdArgs, Args);
-    CmdArgs.push_back("-lc");
+    if (!Args.hasArg(options::OPT_nolibc))
+      CmdArgs.push_back("-lc");
     if (TC.hasValidGCCInstallation() || detectGCCToolchainAdjacent(D))
       CmdArgs.push_back("-lgloss");
     CmdArgs.push_back("--end-group");
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 9d882db..4e1b1d9 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -16,6 +16,7 @@
 #include "Arch/SystemZ.h"
 #include "Hexagon.h"
 #include "PS4CPU.h"
+#include "ToolChains/Cuda.h"
 #include "clang/Basic/CLWarnings.h"
 #include "clang/Basic/CodeGenOptions.h"
 #include "clang/Basic/HeaderInclude.h"
@@ -225,17 +226,19 @@ static bool ShouldEnableAutolink(const ArgList &Args, const ToolChain &TC,
 static const char *addDebugCompDirArg(const ArgList &Args,
                                       ArgStringList &CmdArgs,
                                       const llvm::vfs::FileSystem &VFS) {
+  std::string DebugCompDir;
   if (Arg *A = Args.getLastArg(options::OPT_ffile_compilation_dir_EQ,
-                               options::OPT_fdebug_compilation_dir_EQ)) {
-    if (A->getOption().matches(options::OPT_ffile_compilation_dir_EQ))
-      CmdArgs.push_back(Args.MakeArgString(Twine("-fdebug-compilation-dir=") +
-                                           A->getValue()));
+                               options::OPT_fdebug_compilation_dir_EQ))
+    DebugCompDir = A->getValue();
+
+  if (DebugCompDir.empty()) {
+    if (llvm::ErrorOr<std::string> CWD = VFS.getCurrentWorkingDirectory())
+      DebugCompDir = std::move(*CWD);
     else
-      A->render(Args, CmdArgs);
-  } else if (llvm::ErrorOr<std::string> CWD =
-                 VFS.getCurrentWorkingDirectory()) {
-    CmdArgs.push_back(Args.MakeArgString("-fdebug-compilation-dir=" + *CWD));
+      return nullptr;
   }
+  CmdArgs.push_back(
+      Args.MakeArgString("-fdebug-compilation-dir=" + DebugCompDir));
   StringRef Path(CmdArgs.back());
   return Path.substr(Path.find('=') + 1).data();
 }
@@ -524,17 +527,17 @@ static void addPGOAndCoverageFlags(const ToolChain &TC, Compilation &C,
     CmdArgs.push_back("-fcoverage-mcdc");
   }
 
+  StringRef CoverageCompDir;
   if (Arg *A = Args.getLastArg(options::OPT_ffile_compilation_dir_EQ,
-                               options::OPT_fcoverage_compilation_dir_EQ)) {
-    if (A->getOption().matches(options::OPT_ffile_compilation_dir_EQ))
-      CmdArgs.push_back(Args.MakeArgString(
-          Twine("-fcoverage-compilation-dir=") + A->getValue()));
-    else
-      A->render(Args, CmdArgs);
-  } else if (llvm::ErrorOr<std::string> CWD =
-                 D.getVFS().getCurrentWorkingDirectory()) {
-    CmdArgs.push_back(Args.MakeArgString("-fcoverage-compilation-dir=" + *CWD));
-  }
+                               options::OPT_fcoverage_compilation_dir_EQ))
+    CoverageCompDir = A->getValue();
+  if (CoverageCompDir.empty()) {
+    if (auto CWD = D.getVFS().getCurrentWorkingDirectory())
+      CmdArgs.push_back(
+          Args.MakeArgString(Twine("-fcoverage-compilation-dir=") + *CWD));
+  } else
+    CmdArgs.push_back(Args.MakeArgString(Twine("-fcoverage-compilation-dir=") +
+                                         CoverageCompDir));
 
   if (Args.hasArg(options::OPT_fprofile_exclude_files_EQ)) {
     auto *Arg = Args.getLastArg(options::OPT_fprofile_exclude_files_EQ);
@@ -5944,7 +5947,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back("-mms-bitfields");
   }
 
-  if (Triple.isWindowsGNUEnvironment()) {
+  if (Triple.isOSCygMing()) {
     Args.addOptOutFlag(CmdArgs, options::OPT_fauto_import,
                        options::OPT_fno_auto_import);
   }
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 826e2ea..0771c7c 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -23,6 +23,7 @@
 #include "Hexagon.h"
 #include "MSP430.h"
 #include "Solaris.h"
+#include "ToolChains/Cuda.h"
 #include "clang/Basic/CodeGenOptions.h"
 #include "clang/Config/config.h"
 #include "clang/Driver/Action.h"
@@ -294,17 +295,22 @@ static void renderRemarksOptions(const ArgList &Args, ArgStringList &CmdArgs,
     Format = A->getValue();
 
   SmallString<128> F;
-  const Arg *A = Args.getLastArg(options::OPT_foptimization_record_file_EQ);
-  if (A)
+  if (const Arg *A =
+          Args.getLastArg(options::OPT_foptimization_record_file_EQ)) {
+    F = A->getValue();
+    F += ".";
+  } else if (const Arg *A = Args.getLastArg(options::OPT_dumpdir)) {
     F = A->getValue();
-  else if (Output.isFilename())
+  } else if (Output.isFilename()) {
     F = Output.getFilename();
+    F += ".";
+  }
 
   assert(!F.empty() && "Cannot determine remarks output name.");
   // Append "opt.ld.<format>" to the end of the file name.
   CmdArgs.push_back(Args.MakeArgString(Twine(PluginOptPrefix) +
-                                       "opt-remarks-filename=" + F +
-                                       ".opt.ld." + Format));
+                                       "opt-remarks-filename=" + F + "opt.ld." +
+                                       Format));
 
   if (const Arg *A =
           Args.getLastArg(options::OPT_foptimization_record_passes_EQ))
@@ -547,15 +553,22 @@ const char *tools::getLDMOption(const llvm::Triple &T, const ArgList &Args) {
   case llvm::Triple::aarch64:
     if (T.isOSManagarm())
       return "aarch64managarm";
+    else if (aarch64::isAArch64BareMetal(T))
+      return "aarch64elf";
     return "aarch64linux";
   case llvm::Triple::aarch64_be:
+    if (aarch64::isAArch64BareMetal(T))
+      return "aarch64elfb";
     return "aarch64linuxb";
   case llvm::Triple::arm:
   case llvm::Triple::thumb:
   case llvm::Triple::armeb:
-  case llvm::Triple::thumbeb:
-    return tools::arm::isARMBigEndian(T, Args) ? "armelfb_linux_eabi"
-                                               : "armelf_linux_eabi";
+  case llvm::Triple::thumbeb: {
+    bool IsBigEndian = tools::arm::isARMBigEndian(T, Args);
+    if (arm::isARMEABIBareMetal(T))
+      return IsBigEndian ? "armelfb" : "armelf";
+    return IsBigEndian ? "armelfb_linux_eabi" : "armelf_linux_eabi";
+  }
   case llvm::Triple::m68k:
     return "m68kelf";
   case llvm::Triple::ppc:
@@ -1067,9 +1080,17 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args,
     }
   }
 
-  if (Args.hasArg(options::OPT_gsplit_dwarf))
-    CmdArgs.push_back(Args.MakeArgString(
-        Twine(PluginOptPrefix) + "dwo_dir=" + Output.getFilename() + "_dwo"));
+  if (Args.hasArg(options::OPT_gsplit_dwarf)) {
+    SmallString<128> F;
+    if (const Arg *A = Args.getLastArg(options::OPT_dumpdir)) {
+      F = A->getValue();
+    } else {
+      F = Output.getFilename();
+      F += "_";
+    }
+    CmdArgs.push_back(
+        Args.MakeArgString(Twine(PluginOptPrefix) + "dwo_dir=" + F + "dwo"));
+  }
 
   if (IsThinLTO && !IsOSAIX)
     CmdArgs.push_back(Args.MakeArgString(Twine(PluginOptPrefix) + "thinlto"));
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index 7d803be..1f0b478 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -88,6 +88,8 @@ CudaVersion getCudaVersion(uint32_t raw_version) {
     return CudaVersion::CUDA_126;
   if (raw_version < 12090)
     return CudaVersion::CUDA_128;
+  if (raw_version < 13000)
+    return CudaVersion::CUDA_129;
   return CudaVersion::NEW;
 }
 
@@ -683,6 +685,7 @@ void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
   case CudaVersion::CUDA_##CUDA_VER:                                           \
     PtxFeature = "+ptx" #PTX_VER;                                              \
     break;
+    CASE_CUDA_VERSION(129, 88);
     CASE_CUDA_VERSION(128, 87);
     CASE_CUDA_VERSION(126, 85);
     CASE_CUDA_VERSION(125, 85);
diff --git a/clang/lib/Driver/ToolChains/Cuda.h b/clang/lib/Driver/ToolChains/Cuda.h
index 259eda6..8aeba53 100644
--- a/clang/lib/Driver/ToolChains/Cuda.h
+++ b/clang/lib/Driver/ToolChains/Cuda.h
@@ -11,6 +11,7 @@
 
 #include "clang/Basic/Cuda.h"
 #include "clang/Driver/Action.h"
+#include "clang/Driver/CudaInstallationDetector.h"
 #include "clang/Driver/Multilib.h"
 #include "clang/Driver/Tool.h"
 #include "clang/Driver/ToolChain.h"
@@ -22,61 +23,6 @@
 
 namespace clang {
 namespace driver {
-
-/// A class to find a viable CUDA installation
-class CudaInstallationDetector {
-private:
-  const Driver &D;
-  bool IsValid = false;
-  CudaVersion Version = CudaVersion::UNKNOWN;
-  std::string InstallPath;
-  std::string BinPath;
-  std::string LibDevicePath;
-  std::string IncludePath;
-  llvm::StringMap<std::string> LibDeviceMap;
-
-  // CUDA architectures for which we have raised an error in
-  // CheckCudaVersionSupportsArch.
-  mutable std::bitset<(int)OffloadArch::LAST> ArchsWithBadVersion;
-
-public:
-  CudaInstallationDetector(const Driver &D, const llvm::Triple &HostTriple,
-                           const llvm::opt::ArgList &Args);
-
-  void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs,
-                          llvm::opt::ArgStringList &CC1Args) const;
-
-  /// Emit an error if Version does not support the given Arch.
-  ///
-  /// If either Version or Arch is unknown, does not emit an error.  Emits at
-  /// most one error per Arch.
-  void CheckCudaVersionSupportsArch(OffloadArch Arch) const;
-
-  /// Check whether we detected a valid Cuda install.
-  bool isValid() const { return IsValid; }
-  /// Print information about the detected CUDA installation.
-  void print(raw_ostream &OS) const;
-
-  /// Get the detected Cuda install's version.
-  CudaVersion version() const {
-    return Version == CudaVersion::NEW ? CudaVersion::PARTIALLY_SUPPORTED
-                                       : Version;
-  }
-  /// Get the detected Cuda installation path.
-  StringRef getInstallPath() const { return InstallPath; }
-  /// Get the detected path to Cuda's bin directory.
-  StringRef getBinPath() const { return BinPath; }
-  /// Get the detected Cuda Include path.
-  StringRef getIncludePath() const { return IncludePath; }
-  /// Get the detected Cuda device library path.
-  StringRef getLibDevicePath() const { return LibDevicePath; }
-  /// Get libdevice file for given architecture
-  std::string getLibDeviceFile(StringRef Gpu) const {
-    return LibDeviceMap.lookup(Gpu);
-  }
-  void WarnIfUnsupportedVersion() const;
-};
-
 namespace tools {
 namespace NVPTX {
 
diff --git a/clang/lib/Driver/ToolChains/Darwin.h b/clang/lib/Driver/ToolChains/Darwin.h
index b38bfe6..d1cfb6f 100644
--- a/clang/lib/Driver/ToolChains/Darwin.h
+++ b/clang/lib/Driver/ToolChains/Darwin.h
@@ -9,12 +9,12 @@
 #ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_DARWIN_H
 #define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_DARWIN_H
 
-#include "Cuda.h"
-#include "LazyDetector.h"
-#include "ROCm.h"
-#include "SYCL.h"
 #include "clang/Basic/DarwinSDKInfo.h"
 #include "clang/Basic/LangOptions.h"
+#include "clang/Driver/CudaInstallationDetector.h"
+#include "clang/Driver/LazyDetector.h"
+#include "clang/Driver/RocmInstallationDetector.h"
+#include "clang/Driver/SyclInstallationDetector.h"
 #include "clang/Driver/Tool.h"
 #include "clang/Driver/ToolChain.h"
 #include "clang/Driver/XRayArgs.h"
diff --git a/clang/lib/Driver/ToolChains/Gnu.h b/clang/lib/Driver/ToolChains/Gnu.h
index 3b8df71..4c42a5e5 100644
--- a/clang/lib/Driver/ToolChains/Gnu.h
+++ b/clang/lib/Driver/ToolChains/Gnu.h
@@ -9,10 +9,10 @@
 #ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_GNU_H
 #define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_GNU_H
 
-#include "Cuda.h"
-#include "LazyDetector.h"
-#include "ROCm.h"
-#include "SYCL.h"
+#include "clang/Driver/CudaInstallationDetector.h"
+#include "clang/Driver/LazyDetector.h"
+#include "clang/Driver/RocmInstallationDetector.h"
+#include "clang/Driver/SyclInstallationDetector.h"
 #include "clang/Driver/Tool.h"
 #include "clang/Driver/ToolChain.h"
 #include <set>
diff --git a/clang/lib/Driver/ToolChains/HIPAMD.h b/clang/lib/Driver/ToolChains/HIPAMD.h
index bcc3ebb..30fc01a 100644
--- a/clang/lib/Driver/ToolChains/HIPAMD.h
+++ b/clang/lib/Driver/ToolChains/HIPAMD.h
@@ -10,6 +10,7 @@
 #define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_HIPAMD_H
 
 #include "AMDGPU.h"
+#include "clang/Driver/SyclInstallationDetector.h"
 #include "clang/Driver/Tool.h"
 #include "clang/Driver/ToolChain.h"
 
diff --git a/clang/lib/Driver/ToolChains/MSVC.h b/clang/lib/Driver/ToolChains/MSVC.h
index b35390c..5c17edc 100644
--- a/clang/lib/Driver/ToolChains/MSVC.h
+++ b/clang/lib/Driver/ToolChains/MSVC.h
@@ -9,11 +9,11 @@
 #ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_MSVC_H
 #define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_MSVC_H
 
-#include "AMDGPU.h"
-#include "Cuda.h"
-#include "LazyDetector.h"
-#include "SYCL.h"
 #include "clang/Driver/Compilation.h"
+#include "clang/Driver/CudaInstallationDetector.h"
+#include "clang/Driver/LazyDetector.h"
+#include "clang/Driver/RocmInstallationDetector.h"
+#include "clang/Driver/SyclInstallationDetector.h"
 #include "clang/Driver/Tool.h"
 #include "clang/Driver/ToolChain.h"
 #include "llvm/Frontend/Debug/Options.h"
diff --git a/clang/lib/Driver/ToolChains/MinGW.cpp b/clang/lib/Driver/ToolChains/MinGW.cpp
index 6abd0c0..1bb9bcf 100644
--- a/clang/lib/Driver/ToolChains/MinGW.cpp
+++ b/clang/lib/Driver/ToolChains/MinGW.cpp
@@ -85,11 +85,18 @@ void tools::MinGW::Linker::AddLibGCC(const ArgList &Args,
 
   CmdArgs.push_back("-lmoldname");
   CmdArgs.push_back("-lmingwex");
-  for (auto Lib : Args.getAllArgValues(options::OPT_l))
+  for (auto Lib : Args.getAllArgValues(options::OPT_l)) {
     if (StringRef(Lib).starts_with("msvcr") ||
         StringRef(Lib).starts_with("ucrt") ||
-        StringRef(Lib).starts_with("crtdll"))
+        StringRef(Lib).starts_with("crtdll")) {
+      std::string CRTLib = (llvm::Twine("-l") + Lib).str();
+      // Respect the user's chosen crt variant, but still provide it
+      // again as the last linker argument, because some of the libraries
+      // we added above may depend on it.
+      CmdArgs.push_back(Args.MakeArgStringRef(CRTLib));
       return;
+    }
+  }
   CmdArgs.push_back("-lmsvcrt");
 }
 
diff --git a/clang/lib/Driver/ToolChains/MinGW.h b/clang/lib/Driver/ToolChains/MinGW.h
index a9963d8..1730da4 100644
--- a/clang/lib/Driver/ToolChains/MinGW.h
+++ b/clang/lib/Driver/ToolChains/MinGW.h
@@ -11,8 +11,9 @@
 
 #include "Cuda.h"
 #include "Gnu.h"
-#include "LazyDetector.h"
-#include "ROCm.h"
+#include "clang/Driver/CudaInstallationDetector.h"
+#include "clang/Driver/LazyDetector.h"
+#include "clang/Driver/RocmInstallationDetector.h"
 #include "clang/Driver/Tool.h"
 #include "clang/Driver/ToolChain.h"
 #include "llvm/Support/ErrorOr.h"
diff --git a/clang/lib/Driver/ToolChains/SYCL.h b/clang/lib/Driver/ToolChains/SYCL.h
index 2a8b4ec..be4ba47 100644
--- a/clang/lib/Driver/ToolChains/SYCL.h
+++ b/clang/lib/Driver/ToolChains/SYCL.h
@@ -9,21 +9,12 @@
 #ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_SYCL_H
 #define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_SYCL_H
 
+#include "clang/Driver/SyclInstallationDetector.h"
 #include "clang/Driver/Tool.h"
 #include "clang/Driver/ToolChain.h"
 
 namespace clang {
 namespace driver {
-
-class SYCLInstallationDetector {
-public:
-  SYCLInstallationDetector(const Driver &D, const llvm::Triple &HostTriple,
-                           const llvm::opt::ArgList &Args);
-
-  void addSYCLIncludeArgs(const llvm::opt::ArgList &DriverArgs,
-                          llvm::opt::ArgStringList &CC1Args) const;
-};
-
 namespace toolchains {
 
 class LLVM_LIBRARY_VISIBILITY SYCLToolChain : public ToolChain {
diff --git a/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp b/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp
index 80487fa..7772a56 100644
--- a/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp
+++ b/clang/lib/Format/IntegerLiteralSeparatorFixer.cpp
@@ -45,15 +45,18 @@ std::pair<tooling::Replacements, unsigned>
 IntegerLiteralSeparatorFixer::process(const Environment &Env,
                                       const FormatStyle &Style) {
   switch (Style.Language) {
-  case FormatStyle::LK_Cpp:
-  case FormatStyle::LK_ObjC:
-    Separator = '\'';
-    break;
   case FormatStyle::LK_CSharp:
   case FormatStyle::LK_Java:
   case FormatStyle::LK_JavaScript:
     Separator = '_';
     break;
+  case FormatStyle::LK_Cpp:
+  case FormatStyle::LK_ObjC:
+    if (Style.Standard >= FormatStyle::LS_Cpp14) {
+      Separator = '\'';
+      break;
+    }
+    [[fallthrough]];
   default:
     return {};
   }
diff --git a/clang/lib/Frontend/ASTMerge.cpp b/clang/lib/Frontend/ASTMerge.cpp
index a4ce883..10c1045 100644
--- a/clang/lib/Frontend/ASTMerge.cpp
+++ b/clang/lib/Frontend/ASTMerge.cpp
@@ -41,10 +41,10 @@ void ASTMergeAction::ExecuteAction() {
   auto SharedState = std::make_shared<ASTImporterSharedState>(
       *CI.getASTContext().getTranslationUnitDecl());
   for (unsigned I = 0, N = ASTFiles.size(); I != N; ++I) {
-    IntrusiveRefCntPtr<DiagnosticsEngine> Diags(new DiagnosticsEngine(
+    auto Diags = llvm::makeIntrusiveRefCnt<DiagnosticsEngine>(
         DiagIDs, CI.getDiagnosticOpts(),
         new ForwardingDiagnosticConsumer(*CI.getDiagnostics().getClient()),
-        /*ShouldOwnClient=*/true));
+        /*ShouldOwnClient=*/true);
     std::unique_ptr<ASTUnit> Unit = ASTUnit::LoadFromASTFile(
         ASTFiles[I], CI.getPCHContainerReader(), ASTUnit::LoadEverything,
         nullptr, Diags, CI.getFileSystemOpts(), CI.getHeaderSearchOpts());
diff --git a/clang/lib/Frontend/ASTUnit.cpp b/clang/lib/Frontend/ASTUnit.cpp
index 67ed17b..5711f45 100644
--- a/clang/lib/Frontend/ASTUnit.cpp
+++ b/clang/lib/Frontend/ASTUnit.cpp
@@ -1204,7 +1204,7 @@ bool ASTUnit::Parse(std::shared_ptr<PCHContainerOperations> PCHContainerOps,
 
   // Set up diagnostics, capturing any diagnostics that would
   // otherwise be dropped.
-  Clang->setDiagnostics(&getDiagnostics());
+  Clang->setDiagnostics(getDiagnosticsPtr());
 
   // Create the target instance.
   if (!Clang->createTarget())
@@ -1424,7 +1424,7 @@ ASTUnit::getMainBufferWithPrecompiledPreamble(
       PreambleInvocationIn.getFrontendOpts().SkipFunctionBodies = true;
 
     llvm::ErrorOr<PrecompiledPreamble> NewPreamble = PrecompiledPreamble::Build(
-        PreambleInvocationIn, MainFileBuffer.get(), Bounds, *Diagnostics, VFS,
+        PreambleInvocationIn, MainFileBuffer.get(), Bounds, Diagnostics, VFS,
         PCHContainerOps, StorePreamblesInMemory, PreambleStoragePath,
         Callbacks);
 
@@ -1624,7 +1624,7 @@ ASTUnit *ASTUnit::LoadFromCompilerInvocationAction(
 
   // Set up diagnostics, capturing any diagnostics that would
   // otherwise be dropped.
-  Clang->setDiagnostics(&AST->getDiagnostics());
+  Clang->setDiagnostics(AST->getDiagnosticsPtr());
 
   // Create the target instance.
   if (!Clang->createTarget())
@@ -1773,7 +1773,7 @@ std::unique_ptr<ASTUnit> ASTUnit::LoadFromCompilerInvocation(
 
   if (AST->LoadFromCompilerInvocation(std::move(PCHContainerOps),
                                       PrecompilePreambleAfterNParses,
-                                      &AST->FileMgr->getVirtualFileSystem()))
+                                      AST->FileMgr->getVirtualFileSystemPtr()))
     return nullptr;
   return AST;
 }
@@ -1895,7 +1895,7 @@ bool ASTUnit::Reparse(std::shared_ptr<PCHContainerOperations> PCHContainerOps,
 
   if (!VFS) {
     assert(FileMgr && "FileMgr is null on Reparse call");
-    VFS = &FileMgr->getVirtualFileSystem();
+    VFS = FileMgr->getVirtualFileSystemPtr();
   }
 
   clearFileLevelDecls();
@@ -2209,8 +2209,9 @@ void ASTUnit::CodeComplete(
     bool IncludeCodePatterns, bool IncludeBriefComments,
     CodeCompleteConsumer &Consumer,
     std::shared_ptr<PCHContainerOperations> PCHContainerOps,
-    DiagnosticsEngine &Diag, LangOptions &LangOpts, SourceManager &SourceMgr,
-    FileManager &FileMgr, SmallVectorImpl<StoredDiagnostic> &StoredDiagnostics,
+    llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diag, LangOptions &LangOpts,
+    SourceManager &SourceMgr, FileManager &FileMgr,
+    SmallVectorImpl<StoredDiagnostic> &StoredDiagnostics,
     SmallVectorImpl<const llvm::MemoryBuffer *> &OwnedBuffers,
     std::unique_ptr<SyntaxOnlyAction> Act) {
   if (!Invocation)
@@ -2259,11 +2260,11 @@ void ASTUnit::CodeComplete(
       std::string(Clang->getFrontendOpts().Inputs[0].getFile());
 
   // Set up diagnostics, capturing any diagnostics produced.
-  Clang->setDiagnostics(&Diag);
+  Clang->setDiagnostics(Diag);
   CaptureDroppedDiagnostics Capture(CaptureDiagsKind::All,
                                     Clang->getDiagnostics(),
                                     &StoredDiagnostics, nullptr);
-  ProcessWarningOptions(Diag, Inv.getDiagnosticOpts(),
+  ProcessWarningOptions(*Diag, Inv.getDiagnosticOpts(),
                         FileMgr.getVirtualFileSystem());
 
   // Create the target instance.
@@ -2321,7 +2322,8 @@ void ASTUnit::CodeComplete(
   std::unique_ptr<llvm::MemoryBuffer> OverrideMainBuffer;
   if (Preamble && Line > 1 && hasSameUniqueID(File, OriginalSourceFile)) {
     OverrideMainBuffer = getMainBufferWithPrecompiledPreamble(
-        PCHContainerOps, Inv, &FileMgr.getVirtualFileSystem(), false, Line - 1);
+        PCHContainerOps, Inv, FileMgr.getVirtualFileSystemPtr(), false,
+        Line - 1);
   }
 
   // If the main file has been overridden due to the use of a preamble,
@@ -2331,7 +2333,7 @@ void ASTUnit::CodeComplete(
            "No preamble was built, but OverrideMainBuffer is not null");
 
     IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS =
-        &FileMgr.getVirtualFileSystem();
+        FileMgr.getVirtualFileSystemPtr();
     Preamble->AddImplicitPreamble(Clang->getInvocation(), VFS,
                                   OverrideMainBuffer.get());
     // FIXME: there is no way to update VFS if it was changed by
diff --git a/clang/lib/Frontend/ChainedIncludesSource.cpp b/clang/lib/Frontend/ChainedIncludesSource.cpp
index ba7c767..88b1076 100644
--- a/clang/lib/Frontend/ChainedIncludesSource.cpp
+++ b/clang/lib/Frontend/ChainedIncludesSource.cpp
@@ -118,13 +118,12 @@ IntrusiveRefCntPtr<ExternalSemaSource> clang::createChainedIncludesSource(
 
     TextDiagnosticPrinter *DiagClient =
         new TextDiagnosticPrinter(llvm::errs(), CI.getDiagnosticOpts());
-    IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
-    IntrusiveRefCntPtr<DiagnosticsEngine> Diags(
-        new DiagnosticsEngine(DiagID, CI.getDiagnosticOpts(), DiagClient));
+    auto Diags = llvm::makeIntrusiveRefCnt<DiagnosticsEngine>(
+        DiagnosticIDs::create(), CI.getDiagnosticOpts(), DiagClient);
 
     auto Clang = std::make_unique<CompilerInstance>(
         std::move(CInvok), CI.getPCHContainerOperations());
-    Clang->setDiagnostics(Diags.get());
+    Clang->setDiagnostics(Diags);
     Clang->setTarget(TargetInfo::CreateTargetInfo(
         Clang->getDiagnostics(), Clang->getInvocation().getTargetOpts()));
     Clang->createFileManager();
diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp
index c7b82db..ed6a651 100644
--- a/clang/lib/Frontend/CompilerInstance.cpp
+++ b/clang/lib/Frontend/CompilerInstance.cpp
@@ -87,8 +87,9 @@ bool CompilerInstance::shouldBuildGlobalModuleIndex() const {
          !DisableGeneratingGlobalModuleIndex;
 }
 
-void CompilerInstance::setDiagnostics(DiagnosticsEngine *Value) {
-  Diagnostics = Value;
+void CompilerInstance::setDiagnostics(
+    llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Value) {
+  Diagnostics = std::move(Value);
 }
 
 void CompilerInstance::setVerboseOutputStream(raw_ostream &Value) {
@@ -160,6 +161,11 @@ llvm::vfs::FileSystem &CompilerInstance::getVirtualFileSystem() const {
   return getFileManager().getVirtualFileSystem();
 }
 
+llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>
+CompilerInstance::getVirtualFileSystemPtr() const {
+  return getFileManager().getVirtualFileSystemPtr();
+}
+
 void CompilerInstance::setFileManager(FileManager *Value) {
   FileMgr = Value;
 }
@@ -340,9 +346,8 @@ IntrusiveRefCntPtr<DiagnosticsEngine> CompilerInstance::createDiagnostics(
     llvm::vfs::FileSystem &VFS, DiagnosticOptions &Opts,
     DiagnosticConsumer *Client, bool ShouldOwnClient,
     const CodeGenOptions *CodeGenOpts) {
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
-  IntrusiveRefCntPtr<DiagnosticsEngine> Diags(
-      new DiagnosticsEngine(DiagID, Opts));
+  auto Diags = llvm::makeIntrusiveRefCnt<DiagnosticsEngine>(
+      DiagnosticIDs::create(), Opts);
 
   // Create the diagnostic client for reporting errors or for
   // implementing -verify.
@@ -375,7 +380,7 @@ IntrusiveRefCntPtr<DiagnosticsEngine> CompilerInstance::createDiagnostics(
 FileManager *CompilerInstance::createFileManager(
     IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS) {
   if (!VFS)
-    VFS = FileMgr ? &FileMgr->getVirtualFileSystem()
+    VFS = FileMgr ? FileMgr->getVirtualFileSystemPtr()
                   : createVFSFromCompilerInvocation(getInvocation(),
                                                     getDiagnostics());
   assert(VFS && "FileManager has no VFS?");
@@ -1218,7 +1223,7 @@ std::unique_ptr<CompilerInstance> CompilerInstance::cloneForModuleCompileImpl(
   } else if (FrontendOpts.ModulesShareFileManager) {
     Instance.setFileManager(&getFileManager());
   } else {
-    Instance.createFileManager(&getVirtualFileSystem());
+    Instance.createFileManager(getVirtualFileSystemPtr());
   }
 
   if (ThreadSafeConfig) {
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index ab4384a..9f77e62 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -826,7 +826,7 @@ static bool RoundTrip(ParseFn Parse, GenerateFn Generate,
 
   // Setup a dummy DiagnosticsEngine.
   DiagnosticOptions DummyDiagOpts;
-  DiagnosticsEngine DummyDiags(new DiagnosticIDs(), DummyDiagOpts);
+  DiagnosticsEngine DummyDiags(DiagnosticIDs::create(), DummyDiagOpts);
   DummyDiags.setClient(new TextDiagnosticBuffer());
 
   // Run the first parse on the original arguments with the dummy invocation and
@@ -2685,7 +2685,7 @@ bool clang::ParseDiagnosticArgs(DiagnosticOptions &Opts, ArgList &Args,
   std::optional<DiagnosticsEngine> IgnoringDiags;
   if (!Diags) {
     IgnoringDiagOpts.emplace();
-    IgnoringDiags.emplace(new DiagnosticIDs(), *IgnoringDiagOpts,
+    IgnoringDiags.emplace(DiagnosticIDs::create(), *IgnoringDiagOpts,
                           new IgnoringDiagConsumer());
     Diags = &*IgnoringDiags;
   }
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index 1d82fc7..137d225 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -763,11 +763,11 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
   // If we're replaying the build of an AST file, import it and set up
   // the initial state from its build.
   if (ReplayASTFile) {
-    IntrusiveRefCntPtr<DiagnosticsEngine> Diags(&CI.getDiagnostics());
+    IntrusiveRefCntPtr<DiagnosticsEngine> Diags = CI.getDiagnosticsPtr();
 
     // The AST unit populates its own diagnostics engine rather than ours.
-    IntrusiveRefCntPtr<DiagnosticsEngine> ASTDiags(new DiagnosticsEngine(
-        Diags->getDiagnosticIDs(), Diags->getDiagnosticOptions()));
+    auto ASTDiags = llvm::makeIntrusiveRefCnt<DiagnosticsEngine>(
+        Diags->getDiagnosticIDs(), Diags->getDiagnosticOptions());
     ASTDiags->setClient(Diags->getClient(), /*OwnsClient*/false);
 
     // FIXME: What if the input is a memory buffer?
@@ -835,7 +835,7 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
     assert(hasASTFileSupport() &&
            "This action does not have AST file support!");
 
-    IntrusiveRefCntPtr<DiagnosticsEngine> Diags(&CI.getDiagnostics());
+    IntrusiveRefCntPtr<DiagnosticsEngine> Diags = CI.getDiagnosticsPtr();
 
     // FIXME: What if the input is a memory buffer?
     StringRef InputFile = Input.getFile();
diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp
index dcfbd53..685a9bb 100644
--- a/clang/lib/Frontend/FrontendActions.cpp
+++ b/clang/lib/Frontend/FrontendActions.cpp
@@ -181,7 +181,7 @@ bool GeneratePCHAction::shouldEraseOutputFiles() {
 
 bool GeneratePCHAction::BeginSourceFileAction(CompilerInstance &CI) {
   CI.getLangOpts().CompilingPCH = true;
-  return true;
+  return ASTFrontendAction::BeginSourceFileAction(CI);
 }
 
 std::vector<std::unique_ptr<ASTConsumer>>
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index 382ccd6..008a35d 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -945,8 +945,8 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
   if (LangOpts.GNUCVersion && LangOpts.CPlusPlus11)
     Builder.defineMacro("__GXX_EXPERIMENTAL_CXX0X__");
 
-  if (TI.getTriple().isWindowsGNUEnvironment()) {
-    // Set ABI defining macros for libstdc++ for MinGW, where the
+  if (TI.getTriple().isOSCygMing()) {
+    // Set ABI defining macros for libstdc++ for MinGW and Cygwin, where the
     // default in libstdc++ differs from the defaults for this target.
     Builder.defineMacro("__GXX_TYPEINFO_EQUALITY_INLINE", "0");
   }
diff --git a/clang/lib/Frontend/PrecompiledPreamble.cpp b/clang/lib/Frontend/PrecompiledPreamble.cpp
index 146cf90..7fc1d87 100644
--- a/clang/lib/Frontend/PrecompiledPreamble.cpp
+++ b/clang/lib/Frontend/PrecompiledPreamble.cpp
@@ -57,11 +57,9 @@ createVFSOverlayForPreamblePCH(StringRef PCHFilename,
                                IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS) {
   // We want only the PCH file from the real filesystem to be available,
   // so we create an in-memory VFS with just that and overlay it on top.
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> PCHFS(
-      new llvm::vfs::InMemoryFileSystem());
+  auto PCHFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   PCHFS->addFile(PCHFilename, 0, std::move(PCHBuffer));
-  IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> Overlay(
-      new llvm::vfs::OverlayFileSystem(VFS));
+  auto Overlay = llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(VFS);
   Overlay->pushOverlay(PCHFS);
   return Overlay;
 }
@@ -414,7 +412,7 @@ PrecompiledPreamble::operator=(PrecompiledPreamble &&) = default;
 llvm::ErrorOr<PrecompiledPreamble> PrecompiledPreamble::Build(
     const CompilerInvocation &Invocation,
     const llvm::MemoryBuffer *MainFileBuffer, PreambleBounds Bounds,
-    DiagnosticsEngine &Diagnostics,
+    IntrusiveRefCntPtr<DiagnosticsEngine> Diagnostics,
     IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS,
     std::shared_ptr<PCHContainerOperations> PCHContainerOps, bool StoreInMemory,
     StringRef StoragePath, PreambleCallbacks &Callbacks) {
@@ -463,7 +461,7 @@ llvm::ErrorOr<PrecompiledPreamble> PrecompiledPreamble::Build(
   llvm::CrashRecoveryContextCleanupRegistrar<CompilerInstance> CICleanup(
       Clang.get());
 
-  Clang->setDiagnostics(&Diagnostics);
+  Clang->setDiagnostics(Diagnostics);
 
   // Create the target instance.
   if (!Clang->createTarget())
@@ -478,18 +476,18 @@ llvm::ErrorOr<PrecompiledPreamble> PrecompiledPreamble::Build(
   }
 
   // Clear out old caches and data.
-  Diagnostics.Reset();
-  ProcessWarningOptions(Diagnostics, Clang->getDiagnosticOpts(), *VFS);
+  Diagnostics->Reset();
+  ProcessWarningOptions(*Diagnostics, Clang->getDiagnosticOpts(), *VFS);
 
-  VFS =
-      createVFSFromCompilerInvocation(Clang->getInvocation(), Diagnostics, VFS);
+  VFS = createVFSFromCompilerInvocation(Clang->getInvocation(), *Diagnostics,
+                                        VFS);
 
   // Create a file manager object to provide access to and cache the filesystem.
   Clang->setFileManager(new FileManager(Clang->getFileSystemOpts(), VFS));
 
   // Create the source manager.
   Clang->setSourceManager(
-      new SourceManager(Diagnostics, Clang->getFileManager()));
+      new SourceManager(*Diagnostics, Clang->getFileManager()));
 
   auto PreambleDepCollector = std::make_shared<PreambleDependencyCollector>();
   Clang->addDependencyCollector(PreambleDepCollector);
diff --git a/clang/lib/Frontend/Rewrite/FrontendActions.cpp b/clang/lib/Frontend/Rewrite/FrontendActions.cpp
index 84e7a4f..6c9c9d5 100644
--- a/clang/lib/Frontend/Rewrite/FrontendActions.cpp
+++ b/clang/lib/Frontend/Rewrite/FrontendActions.cpp
@@ -103,12 +103,13 @@ bool FixItAction::BeginSourceFileAction(CompilerInstance &CI) {
   }
   Rewriter.reset(new FixItRewriter(CI.getDiagnostics(), CI.getSourceManager(),
                                    CI.getLangOpts(), FixItOpts.get()));
-  return true;
+  return ASTFrontendAction::BeginSourceFileAction(CI);
 }
 
 void FixItAction::EndSourceFileAction() {
   // Otherwise rewrite all files.
   Rewriter->WriteFixedFiles();
+  ASTFrontendAction::EndSourceFileAction();
 }
 
 bool FixItRecompile::BeginInvocation(CompilerInstance &CI) {
@@ -298,7 +299,7 @@ bool RewriteIncludesAction::BeginSourceFileAction(CompilerInstance &CI) {
         std::make_unique<RewriteImportsListener>(CI, OutputStream));
   }
 
-  return true;
+  return PreprocessorFrontendAction::BeginSourceFileAction(CI);
 }
 
 void RewriteIncludesAction::ExecuteAction() {
diff --git a/clang/lib/Frontend/SerializedDiagnosticPrinter.cpp b/clang/lib/Frontend/SerializedDiagnosticPrinter.cpp
index 23a1f90..1f5bb47 100644
--- a/clang/lib/Frontend/SerializedDiagnosticPrinter.cpp
+++ b/clang/lib/Frontend/SerializedDiagnosticPrinter.cpp
@@ -753,10 +753,9 @@ DiagnosticsEngine *SDiagsWriter::getMetaDiags() {
   //    to be distinct from the engine the writer was being added to and would
   //    normally not be used.
   if (!State->MetaDiagnostics) {
-    IntrusiveRefCntPtr<DiagnosticIDs> IDs(new DiagnosticIDs());
     auto Client = new TextDiagnosticPrinter(llvm::errs(), State->DiagOpts);
-    State->MetaDiagnostics =
-        std::make_unique<DiagnosticsEngine>(IDs, State->DiagOpts, Client);
+    State->MetaDiagnostics = std::make_unique<DiagnosticsEngine>(
+        DiagnosticIDs::create(), State->DiagOpts, Client);
   }
   return State->MetaDiagnostics.get();
 }
diff --git a/clang/lib/Headers/avx10_2_512niintrin.h b/clang/lib/Headers/avx10_2_512niintrin.h
index 7e614f7..9d96e36c7 100644
--- a/clang/lib/Headers/avx10_2_512niintrin.h
+++ b/clang/lib/Headers/avx10_2_512niintrin.h
@@ -197,7 +197,7 @@ _mm512_mask_dpwsud_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwsud_epi32(
-    __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
+    __mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
   return (__m512i)__builtin_ia32_selectd_512(
       (__mmask16)__U, (__v16si)_mm512_dpwsud_epi32(__A, __B, __C),
       (__v16si)_mm512_setzero_si512());
@@ -218,7 +218,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwsuds_epi32(
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwsuds_epi32(
-    __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
+    __mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
   return (__m512i)__builtin_ia32_selectd_512(
       (__mmask16)__U, (__v16si)_mm512_dpwsuds_epi32(__A, __B, __C),
       (__v16si)_mm512_setzero_si512());
@@ -239,7 +239,7 @@ _mm512_mask_dpwusd_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwusd_epi32(
-    __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
+    __mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
   return (__m512i)__builtin_ia32_selectd_512(
       (__mmask16)__U, (__v16si)_mm512_dpwusd_epi32(__A, __B, __C),
       (__v16si)_mm512_setzero_si512());
@@ -260,7 +260,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwusds_epi32(
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwusds_epi32(
-    __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
+    __mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
   return (__m512i)__builtin_ia32_selectd_512(
       (__mmask16)__U, (__v16si)_mm512_dpwusds_epi32(__A, __B, __C),
       (__v16si)_mm512_setzero_si512());
@@ -281,7 +281,7 @@ _mm512_mask_dpwuud_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwuud_epi32(
-    __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
+    __mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
   return (__m512i)__builtin_ia32_selectd_512(
       (__mmask16)__U, (__v16si)_mm512_dpwuud_epi32(__A, __B, __C),
       (__v16si)_mm512_setzero_si512());
@@ -302,7 +302,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwuuds_epi32(
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwuuds_epi32(
-    __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
+    __mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
   return (__m512i)__builtin_ia32_selectd_512(
       (__mmask16)__U, (__v16si)_mm512_dpwuuds_epi32(__A, __B, __C),
       (__v16si)_mm512_setzero_si512());
diff --git a/clang/lib/Headers/avx10_2niintrin.h b/clang/lib/Headers/avx10_2niintrin.h
index 992be18..d5a66cf 100644
--- a/clang/lib/Headers/avx10_2niintrin.h
+++ b/clang/lib/Headers/avx10_2niintrin.h
@@ -253,7 +253,7 @@ _mm_mask_dpwsud_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_dpwsud_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
+_mm_maskz_dpwsud_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
   return (__m128i)__builtin_ia32_selectd_128(
       (__mmask8)__U, (__v4si)_mm_dpwsud_epi32(__A, __B, __C),
       (__v4si)_mm_setzero_si128());
@@ -266,7 +266,7 @@ _mm256_mask_dpwsud_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_dpwsud_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
+_mm256_maskz_dpwsud_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
   return (__m256i)__builtin_ia32_selectd_256(
       (__mmask8)__U, (__v8si)_mm256_dpwsud_epi32(__A, __B, __C),
       (__v8si)_mm256_setzero_si256());
@@ -279,7 +279,7 @@ _mm_mask_dpwsuds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_dpwsuds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
+_mm_maskz_dpwsuds_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
   return (__m128i)__builtin_ia32_selectd_128(
       (__mmask8)__U, (__v4si)_mm_dpwsuds_epi32(__A, __B, __C),
       (__v4si)_mm_setzero_si128());
@@ -292,7 +292,7 @@ _mm256_mask_dpwsuds_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpwsuds_epi32(
-    __m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
+    __mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
   return (__m256i)__builtin_ia32_selectd_256(
       (__mmask8)__U, (__v8si)_mm256_dpwsuds_epi32(__A, __B, __C),
       (__v8si)_mm256_setzero_si256());
@@ -305,7 +305,7 @@ _mm_mask_dpwusd_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_dpwusd_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
+_mm_maskz_dpwusd_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
   return (__m128i)__builtin_ia32_selectd_128(
       (__mmask8)__U, (__v4si)_mm_dpwusd_epi32(__A, __B, __C),
       (__v4si)_mm_setzero_si128());
@@ -318,7 +318,7 @@ _mm256_mask_dpwusd_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_dpwusd_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
+_mm256_maskz_dpwusd_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
   return (__m256i)__builtin_ia32_selectd_256(
       (__mmask8)__U, (__v8si)_mm256_dpwusd_epi32(__A, __B, __C),
       (__v8si)_mm256_setzero_si256());
@@ -331,7 +331,7 @@ _mm_mask_dpwusds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_dpwusds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
+_mm_maskz_dpwusds_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
   return (__m128i)__builtin_ia32_selectd_128(
       (__mmask8)__U, (__v4si)_mm_dpwusds_epi32(__A, __B, __C),
       (__v4si)_mm_setzero_si128());
@@ -344,7 +344,7 @@ _mm256_mask_dpwusds_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpwusds_epi32(
-    __m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
+    __mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
   return (__m256i)__builtin_ia32_selectd_256(
       (__mmask8)__U, (__v8si)_mm256_dpwusds_epi32(__A, __B, __C),
       (__v8si)_mm256_setzero_si256());
@@ -357,7 +357,7 @@ _mm_mask_dpwuud_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_dpwuud_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
+_mm_maskz_dpwuud_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
   return (__m128i)__builtin_ia32_selectd_128(
       (__mmask8)__U, (__v4si)_mm_dpwuud_epi32(__A, __B, __C),
       (__v4si)_mm_setzero_si128());
@@ -370,7 +370,7 @@ _mm256_mask_dpwuud_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_dpwuud_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
+_mm256_maskz_dpwuud_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
   return (__m256i)__builtin_ia32_selectd_256(
       (__mmask8)__U, (__v8si)_mm256_dpwuud_epi32(__A, __B, __C),
       (__v8si)_mm256_setzero_si256());
@@ -383,7 +383,7 @@ _mm_mask_dpwuuds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_dpwuuds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) {
+_mm_maskz_dpwuuds_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
   return (__m128i)__builtin_ia32_selectd_128(
       (__mmask8)__U, (__v4si)_mm_dpwuuds_epi32(__A, __B, __C),
       (__v4si)_mm_setzero_si128());
@@ -396,7 +396,7 @@ _mm256_mask_dpwuuds_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpwuuds_epi32(
-    __m256i __A, __mmask8 __U, __m256i __B, __m256i __C) {
+    __mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
   return (__m256i)__builtin_ia32_selectd_256(
       (__mmask8)__U, (__v8si)_mm256_dpwuuds_epi32(__A, __B, __C),
       (__v8si)_mm256_setzero_si256());
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
index e8ccccb..c877234 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsic_helpers.h
@@ -12,7 +12,7 @@
 namespace hlsl {
 namespace __detail {
 
-constexpr vector<uint, 4> d3d_color_to_ubyte4_impl(vector<float, 4> V) {
+constexpr int4 d3d_color_to_ubyte4_impl(float4 V) {
   // Use the same scaling factor used by FXC, and DXC for DXIL
   // (i.e., 255.001953)
   // https://github.com/microsoft/DirectXShaderCompiler/blob/070d0d5a2beacef9eeb51037a9b04665716fd6f3/lib/HLSL/HLOperationLower.cpp#L666C1-L697C2
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index 499a053..d9d87c8 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -418,7 +418,7 @@ const inline float4 lit(float NDotL, float NDotH, float M) {
 /// This function swizzles and scales components of the \a x parameter. Use this
 /// function to compensate for the lack of UBYTE4 support in some hardware.
 
-constexpr vector<uint, 4> D3DCOLORtoUBYTE4(vector<float, 4> V) {
+constexpr int4 D3DCOLORtoUBYTE4(float4 V) {
   return __detail::d3d_color_to_ubyte4_impl(V);
 }
 
diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h
index e1e0fde..f65b4b3 100644
--- a/clang/lib/Headers/opencl-c.h
+++ b/clang/lib/Headers/opencl-c.h
@@ -18410,6 +18410,22 @@ intel_sub_group_avc_mce_convert_to_sic_result(
 #pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : end
 #endif // cl_intel_device_side_avc_motion_estimation
 
+#if defined(cl_intel_bfloat16_conversions)
+ushort __ovld intel_convert_bfloat16_as_ushort(float source);
+ushort2 __ovld intel_convert_bfloat162_as_ushort2(float2 source);
+ushort3 __ovld intel_convert_bfloat163_as_ushort3(float3 source);
+ushort4 __ovld intel_convert_bfloat164_as_ushort4(float4 source);
+ushort8 __ovld intel_convert_bfloat168_as_ushort8(float8 source);
+ushort16 __ovld intel_convert_bfloat1616_as_ushort16(float16 source);
+
+float __ovld intel_convert_as_bfloat16_float(ushort source);
+float2 __ovld intel_convert_as_bfloat162_float2(ushort2 source);
+float3 __ovld intel_convert_as_bfloat163_float3(ushort3 source);
+float4 __ovld intel_convert_as_bfloat164_float4(ushort4 source);
+float8 __ovld intel_convert_as_bfloat168_float8(ushort8 source);
+float16 __ovld intel_convert_as_bfloat1616_float16(ushort16 source);
+#endif // cl_intel_bfloat16_conversions
+
 #ifdef cl_amd_media_ops
 uint __ovld amd_bitalign(uint, uint, uint);
 uint2 __ovld amd_bitalign(uint2, uint2, uint2);
diff --git a/clang/lib/Interpreter/CodeCompletion.cpp b/clang/lib/Interpreter/CodeCompletion.cpp
index dac3888..ecdf489 100644
--- a/clang/lib/Interpreter/CodeCompletion.cpp
+++ b/clang/lib/Interpreter/CodeCompletion.cpp
@@ -380,7 +380,7 @@ void ReplCodeCompleter::codeComplete(CompilerInstance *InterpCI,
   AU->setOwnsRemappedFileBuffers(false);
   AU->CodeComplete(CodeCompletionFileName, 1, Col, RemappedFiles, false, false,
                    false, consumer,
-                   std::make_shared<clang::PCHContainerOperations>(), *diag,
+                   std::make_shared<clang::PCHContainerOperations>(), diag,
                    InterpCI->getLangOpts(), AU->getSourceManager(),
                    AU->getFileManager(), sd, tb, std::move(Act));
 }
diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp
index 9b71486..5e5ae81 100644
--- a/clang/lib/Interpreter/Interpreter.cpp
+++ b/clang/lib/Interpreter/Interpreter.cpp
@@ -86,7 +86,6 @@ GetCC1Arguments(DiagnosticsEngine *Diagnostics,
 static llvm::Expected<std::unique_ptr<CompilerInstance>>
 CreateCI(const llvm::opt::ArgStringList &Argv) {
   std::unique_ptr<CompilerInstance> Clang(new CompilerInstance());
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
 
   // Register the support for object-file-wrapped Clang modules.
   // FIXME: Clang should register these container operations automatically.
@@ -98,7 +97,7 @@ CreateCI(const llvm::opt::ArgStringList &Argv) {
   // a well formed diagnostic object.
   DiagnosticOptions DiagOpts;
   TextDiagnosticBuffer *DiagsBuffer = new TextDiagnosticBuffer;
-  DiagnosticsEngine Diags(DiagID, DiagOpts, DiagsBuffer);
+  DiagnosticsEngine Diags(DiagnosticIDs::create(), DiagOpts, DiagsBuffer);
   bool Success = CompilerInvocation::CreateFromArgs(
       Clang->getInvocation(), llvm::ArrayRef(Argv.begin(), Argv.size()), Diags);
 
@@ -174,11 +173,10 @@ IncrementalCompilerBuilder::create(std::string TT,
 
   // Buffer diagnostics from argument parsing so that we can output them using a
   // well formed diagnostic object.
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
   std::unique_ptr<DiagnosticOptions> DiagOpts =
       CreateAndPopulateDiagOpts(ClangArgv);
   TextDiagnosticBuffer *DiagsBuffer = new TextDiagnosticBuffer;
-  DiagnosticsEngine Diags(DiagID, *DiagOpts, DiagsBuffer);
+  DiagnosticsEngine Diags(DiagnosticIDs::create(), *DiagOpts, DiagsBuffer);
 
   driver::Driver Driver(/*MainBinaryName=*/ClangArgv[0], TT, Diags);
   Driver.setCheckInputsExist(false); // the input comes from mem buffers
diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp
index 829c81b..35ad0b5 100644
--- a/clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -503,8 +503,12 @@ static bool areAllValuesNoReturn(const VarDecl *VD, const CFGBlock &VarBlk,
 
   TransferFunctions TF(VD);
   BackwardDataflowWorklist Worklist(*AC.getCFG(), AC);
+  llvm::DenseSet<const CFGBlock *> Visited;
   Worklist.enqueueBlock(&VarBlk);
   while (const CFGBlock *B = Worklist.dequeue()) {
+    if (Visited.contains(B))
+      continue;
+    Visited.insert(B);
     // First check the current block.
     for (CFGBlock::const_reverse_iterator ri = B->rbegin(), re = B->rend();
          ri != re; ++ri) {
diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
index d50eeff..43a7f9e 100644
--- a/clang/lib/Sema/Sema.cpp
+++ b/clang/lib/Sema/Sema.cpp
@@ -2251,16 +2251,15 @@ void Sema::checkTypeSupport(QualType Ty, SourceLocation Loc, ValueDecl *D) {
     }
 
     // Don't allow SVE types in functions without a SVE target.
-    if (Ty->isSVESizelessBuiltinType() && FD) {
+    if (Ty->isSVESizelessBuiltinType() && FD && !FD->getType().isNull()) {
       llvm::StringMap<bool> CallerFeatureMap;
       Context.getFunctionFeatureMap(CallerFeatureMap, FD);
       if (!Builtin::evaluateRequiredTargetFeatures("sve", CallerFeatureMap)) {
         if (!Builtin::evaluateRequiredTargetFeatures("sme", CallerFeatureMap))
           Diag(Loc, diag::err_sve_vector_in_non_sve_target) << Ty;
         else if (!IsArmStreamingFunction(FD,
-                                         /*IncludeLocallyStreaming=*/true)) {
+                                         /*IncludeLocallyStreaming=*/true))
           Diag(Loc, diag::err_sve_vector_in_non_streaming_function) << Ty;
-        }
       }
     }
 
diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index c23c98a..8580de2 100644
--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -84,6 +84,16 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
     return checkMovDPPFunctionCall(TheCall, 2, 1);
   case AMDGPU::BI__builtin_amdgcn_update_dpp: {
     return checkMovDPPFunctionCall(TheCall, 6, 2);
+  case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f16_fp8:
+  case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_bf16_fp8:
+  case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f16_bf8:
+  case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_bf16_bf8:
+  case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f16_fp4:
+  case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_bf16_fp4:
+  case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f32_fp8:
+  case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f32_bf8:
+  case AMDGPU::BI__builtin_amdgcn_cvt_scale_pk8_f32_fp4:
+    return SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 7);
   }
   default:
     return false;
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index 8e27fab..e09c352 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -846,9 +846,9 @@ bool SemaARM::CheckARMCoprocessorImmediate(const TargetInfo &TI,
   return false;
 }
 
-bool SemaARM::CheckARMBuiltinExclusiveCall(unsigned BuiltinID,
-                                           CallExpr *TheCall,
-                                           unsigned MaxWidth) {
+bool SemaARM::CheckARMBuiltinExclusiveCall(const TargetInfo &TI,
+                                           unsigned BuiltinID,
+                                           CallExpr *TheCall) {
   assert((BuiltinID == ARM::BI__builtin_arm_ldrex ||
           BuiltinID == ARM::BI__builtin_arm_ldaex ||
           BuiltinID == ARM::BI__builtin_arm_strex ||
@@ -923,12 +923,56 @@ bool SemaARM::CheckARMBuiltinExclusiveCall(unsigned BuiltinID,
     return true;
   }
 
-  // But ARM doesn't have instructions to deal with 128-bit versions.
-  if (Context.getTypeSize(ValType) > MaxWidth) {
-    assert(MaxWidth == 64 && "Diagnostic unexpectedly inaccurate");
-    Diag(DRE->getBeginLoc(), diag::err_atomic_exclusive_builtin_pointer_size)
-        << PointerArg->getType() << PointerArg->getSourceRange();
-    return true;
+  // Check whether the size of the type can be handled atomically on this
+  // target.
+  if (!TI.getTriple().isAArch64()) {
+    unsigned Mask = TI.getARMLDREXMask();
+    unsigned Bits = Context.getTypeSize(ValType);
+    bool Supported =
+        (llvm::isPowerOf2_64(Bits)) && Bits >= 8 && (Mask & (Bits / 8));
+
+    if (!Supported) {
+      // Emit a diagnostic saying that this size isn't available. If _no_ size
+      // of exclusive access is supported on this target, we emit a diagnostic
+      // with special wording for that case, but otherwise, we emit
+      // err_atomic_exclusive_builtin_pointer_size and loop over `Mask` to
+      // control what subset of sizes it lists as legal.
+      if (Mask) {
+        auto D = Diag(DRE->getBeginLoc(),
+                      diag::err_atomic_exclusive_builtin_pointer_size)
+                 << PointerArg->getType();
+        bool Started = false;
+        for (unsigned Size = 1; Size <= 8; Size <<= 1) {
+          // For each of the sizes 1,2,4,8, pass two integers into the
+          // diagnostic. The first selects a separator from the previous
+          // number: 0 for no separator at all, 1 for a comma, 2 for " or "
+          // which appears before the final number in a list of more than one.
+          // The second integer just indicates whether we print this size in
+          // the message at all.
+          if (!(Mask & Size)) {
+            // This size isn't one of the supported ones, so emit no separator
+            // text and don't print the size itself.
+            D << 0 << 0;
+          } else {
+            // This size is supported, so print it, and an appropriate
+            // separator.
+            Mask &= ~Size;
+            if (!Started)
+              D << 0; // No separator if this is the first size we've printed
+            else if (Mask)
+              D << 1; // "," if there's still another size to come
+            else
+              D << 2; // " or " if the size we're about to print is the last
+            D << 1;   // print the size itself
+            Started = true;
+          }
+        }
+      } else {
+        Diag(DRE->getBeginLoc(),
+             diag::err_atomic_exclusive_builtin_pointer_size_none)
+            << PointerArg->getSourceRange();
+      }
+    }
   }
 
   switch (ValType.getObjCLifetime()) {
@@ -972,7 +1016,7 @@ bool SemaARM::CheckARMBuiltinFunctionCall(const TargetInfo &TI,
       BuiltinID == ARM::BI__builtin_arm_ldaex ||
       BuiltinID == ARM::BI__builtin_arm_strex ||
       BuiltinID == ARM::BI__builtin_arm_stlex) {
-    return CheckARMBuiltinExclusiveCall(BuiltinID, TheCall, 64);
+    return CheckARMBuiltinExclusiveCall(TI, BuiltinID, TheCall);
   }
 
   if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
@@ -1053,7 +1097,7 @@ bool SemaARM::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI,
       BuiltinID == AArch64::BI__builtin_arm_ldaex ||
       BuiltinID == AArch64::BI__builtin_arm_strex ||
       BuiltinID == AArch64::BI__builtin_arm_stlex) {
-    return CheckARMBuiltinExclusiveCall(BuiltinID, TheCall, 128);
+    return CheckARMBuiltinExclusiveCall(TI, BuiltinID, TheCall);
   }
 
   if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index c74b671..bc87611 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -15893,9 +15893,7 @@ ExprResult Sema::BuiltinMatrixTranspose(CallExpr *TheCall,
 // Get and verify the matrix dimensions.
 static std::optional<unsigned>
 getAndVerifyMatrixDimension(Expr *Expr, StringRef Name, Sema &S) {
-  SourceLocation ErrorPos;
-  std::optional<llvm::APSInt> Value =
-      Expr->getIntegerConstantExpr(S.Context, &ErrorPos);
+  std::optional<llvm::APSInt> Value = Expr->getIntegerConstantExpr(S.Context);
   if (!Value) {
     S.Diag(Expr->getBeginLoc(), diag::err_builtin_matrix_scalar_unsigned_arg)
         << Name;
diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp
index a43ac9e..0de5580 100644
--- a/clang/lib/Sema/SemaCodeComplete.cpp
+++ b/clang/lib/Sema/SemaCodeComplete.cpp
@@ -4034,6 +4034,14 @@ static void AddOverloadParameterChunks(
       return;
     }
 
+    // C++23 introduces an explicit object parameter, a.k.a. "deducing this"
+    // Skip it for autocomplete and treat the next parameter as the first
+    // parameter
+    if (Function && FirstParameter &&
+        Function->getParamDecl(P)->isExplicitObjectParameter()) {
+      continue;
+    }
+
     if (FirstParameter)
       FirstParameter = false;
     else
diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp
index 044cf5c..da85959 100644
--- a/clang/lib/Sema/SemaConcept.cpp
+++ b/clang/lib/Sema/SemaConcept.cpp
@@ -1105,10 +1105,6 @@ static bool CheckFunctionConstraintsWithoutInstantiation(
   }
 
   Sema::ContextRAII SavedContext(SemaRef, FD);
-  std::optional<Sema::CXXThisScopeRAII> ThisScope;
-  if (auto *Method = dyn_cast<CXXMethodDecl>(FD))
-    ThisScope.emplace(SemaRef, /*Record=*/Method->getParent(),
-                      /*ThisQuals=*/Method->getMethodQualifiers());
   return SemaRef.CheckConstraintSatisfaction(
       Template, TemplateAC, MLTAL, PointOfInstantiation, Satisfaction);
 }
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index d7420bd..d255c11 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -3063,7 +3063,8 @@ static void checkNewAttributesAfterDef(Sema &S, Decl *New, const Decl *Old) {
       // error since the definition will have already been created without
       // the semantic effects of the attribute having been applied.
       S.Diag(NewAttribute->getLocation(),
-             diag::err_sycl_entry_point_after_definition);
+             diag::err_sycl_entry_point_after_definition)
+          << NewAttribute;
       S.Diag(Def->getLocation(), diag::note_previous_definition);
       cast<SYCLKernelEntryPointAttr>(NewAttribute)->setInvalidAttr();
       ++I;
@@ -12586,9 +12587,9 @@ static bool isDefaultStdCall(FunctionDecl *FD, Sema &S) {
   if (FD->getName() == "main" || FD->getName() == "wmain")
     return false;
 
-  // Default calling convention for MinGW is __cdecl
+  // Default calling convention for MinGW and Cygwin is __cdecl
   const llvm::Triple &T = S.Context.getTargetInfo().getTriple();
-  if (T.isWindowsGNUEnvironment())
+  if (T.isOSCygMing())
     return false;
 
   // Default calling convention for WinMain, wWinMain and DllMain
@@ -16258,19 +16259,19 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body,
         FD->getAttr<SYCLKernelEntryPointAttr>();
     if (FD->isDefaulted()) {
       Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid)
-          << /*defaulted function*/ 3;
+          << SKEPAttr << /*defaulted function*/ 3;
       SKEPAttr->setInvalidAttr();
     } else if (FD->isDeleted()) {
       Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid)
-          << /*deleted function*/ 2;
+          << SKEPAttr << /*deleted function*/ 2;
       SKEPAttr->setInvalidAttr();
     } else if (FSI->isCoroutine()) {
       Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid)
-          << /*coroutine*/ 7;
+          << SKEPAttr << /*coroutine*/ 7;
       SKEPAttr->setInvalidAttr();
     } else if (Body && isa<CXXTryStmt>(Body)) {
       Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid)
-          << /*function defined with a function try block*/ 8;
+          << SKEPAttr << /*function defined with a function try block*/ 8;
       SKEPAttr->setInvalidAttr();
     }
 
diff --git a/clang/lib/Sema/SemaModule.cpp b/clang/lib/Sema/SemaModule.cpp
index 98ebd70..b137549 100644
--- a/clang/lib/Sema/SemaModule.cpp
+++ b/clang/lib/Sema/SemaModule.cpp
@@ -13,7 +13,7 @@
 
 #include "clang/AST/ASTConsumer.h"
 #include "clang/AST/ASTMutationListener.h"
-#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/AST/DynamicRecursiveASTVisitor.h"
 #include "clang/Lex/HeaderSearch.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Sema/ParsedAttr.h"
@@ -1422,14 +1422,14 @@ bool ExposureChecker::checkExposure(const CXXRecordDecl *RD, bool Diag) {
   return IsExposure;
 }
 
-template <typename CallbackTy>
-class ReferenceTULocalChecker
-    : public clang::RecursiveASTVisitor<ReferenceTULocalChecker<CallbackTy>> {
+class ReferenceTULocalChecker : public DynamicRecursiveASTVisitor {
 public:
+  using CallbackTy = std::function<void(DeclRefExpr *, ValueDecl *)>;
+
   ReferenceTULocalChecker(ExposureChecker &C, CallbackTy &&Callback)
       : Checker(C), Callback(std::move(Callback)) {}
 
-  bool VisitDeclRefExpr(DeclRefExpr *DRE) {
+  bool VisitDeclRefExpr(DeclRefExpr *DRE) override {
     ValueDecl *Referenced = DRE->getDecl();
     if (!Referenced)
       return true;
@@ -1468,10 +1468,6 @@ public:
   CallbackTy Callback;
 };
 
-template <typename CallbackTy>
-ReferenceTULocalChecker(ExposureChecker &, CallbackTy &&)
-    -> ReferenceTULocalChecker<CallbackTy>;
-
 bool ExposureChecker::checkExposure(const Stmt *S, bool Diag) {
   if (!S)
     return false;
diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp
index 3e03cb4..4683c81 100644
--- a/clang/lib/Sema/SemaSYCL.cpp
+++ b/clang/lib/Sema/SemaSYCL.cpp
@@ -262,12 +262,13 @@ void SemaSYCL::CheckSYCLEntryPointFunctionDecl(FunctionDecl *FD) {
     if (!getASTContext().hasSameType(SAI->getKernelName(),
                                      SKEPAttr->getKernelName())) {
       Diag(SAI->getLocation(), diag::err_sycl_entry_point_invalid_redeclaration)
-          << SAI->getKernelName() << SKEPAttr->getKernelName();
+          << SKEPAttr << SAI->getKernelName() << SKEPAttr->getKernelName();
       Diag(SKEPAttr->getLocation(), diag::note_previous_attribute);
       SAI->setInvalidAttr();
     } else {
       Diag(SAI->getLocation(),
-           diag::warn_sycl_entry_point_redundant_declaration);
+           diag::warn_sycl_entry_point_redundant_declaration)
+          << SAI;
       Diag(SKEPAttr->getLocation(), diag::note_previous_attribute);
     }
   }
@@ -289,7 +290,8 @@ void SemaSYCL::CheckSYCLEntryPointFunctionDecl(FunctionDecl *FD) {
                                        PrevSKEPAttr->getKernelName())) {
         Diag(SKEPAttr->getLocation(),
              diag::err_sycl_entry_point_invalid_redeclaration)
-            << SKEPAttr->getKernelName() << PrevSKEPAttr->getKernelName();
+            << SKEPAttr << SKEPAttr->getKernelName()
+            << PrevSKEPAttr->getKernelName();
         Diag(PrevSKEPAttr->getLocation(), diag::note_previous_decl) << PrevFD;
         SKEPAttr->setInvalidAttr();
       }
@@ -299,50 +301,52 @@ void SemaSYCL::CheckSYCLEntryPointFunctionDecl(FunctionDecl *FD) {
   if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
     if (!MD->isStatic()) {
       Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid)
-          << /*non-static member function*/ 0;
+          << SKEPAttr << /*non-static member function*/ 0;
       SKEPAttr->setInvalidAttr();
     }
   }
 
   if (FD->isVariadic()) {
     Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid)
-        << /*variadic function*/ 1;
+        << SKEPAttr << /*variadic function*/ 1;
     SKEPAttr->setInvalidAttr();
   }
 
   if (FD->isDefaulted()) {
     Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid)
-        << /*defaulted function*/ 3;
+        << SKEPAttr << /*defaulted function*/ 3;
     SKEPAttr->setInvalidAttr();
   } else if (FD->isDeleted()) {
     Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid)
-        << /*deleted function*/ 2;
+        << SKEPAttr << /*deleted function*/ 2;
     SKEPAttr->setInvalidAttr();
   }
 
   if (FD->isConsteval()) {
     Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid)
-        << /*consteval function*/ 5;
+        << SKEPAttr << /*consteval function*/ 5;
     SKEPAttr->setInvalidAttr();
   } else if (FD->isConstexpr()) {
     Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid)
-        << /*constexpr function*/ 4;
+        << SKEPAttr << /*constexpr function*/ 4;
     SKEPAttr->setInvalidAttr();
   }
 
   if (FD->isNoReturn()) {
     Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_invalid)
-        << /*function declared with the 'noreturn' attribute*/ 6;
+        << SKEPAttr << /*function declared with the 'noreturn' attribute*/ 6;
     SKEPAttr->setInvalidAttr();
   }
 
   if (FD->getReturnType()->isUndeducedType()) {
     Diag(SKEPAttr->getLocation(),
-         diag::err_sycl_entry_point_deduced_return_type);
+         diag::err_sycl_entry_point_deduced_return_type)
+        << SKEPAttr;
     SKEPAttr->setInvalidAttr();
   } else if (!FD->getReturnType()->isDependentType() &&
              !FD->getReturnType()->isVoidType()) {
-    Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_return_type);
+    Diag(SKEPAttr->getLocation(), diag::err_sycl_entry_point_return_type)
+        << SKEPAttr;
     SKEPAttr->setInvalidAttr();
   }
 
@@ -354,7 +358,8 @@ void SemaSYCL::CheckSYCLEntryPointFunctionDecl(FunctionDecl *FD) {
       if (!declaresSameEntity(FD, SKI->getKernelEntryPointDecl())) {
         // FIXME: This diagnostic should include the origin of the kernel
         // FIXME: names; not just the locations of the conflicting declarations.
-        Diag(FD->getLocation(), diag::err_sycl_kernel_name_conflict);
+        Diag(FD->getLocation(), diag::err_sycl_kernel_name_conflict)
+            << SKEPAttr;
         Diag(SKI->getKernelEntryPointDecl()->getLocation(),
              diag::note_previous_declaration);
         SKEPAttr->setInvalidAttr();
diff --git a/clang/lib/Sema/SemaStmtAttr.cpp b/clang/lib/Sema/SemaStmtAttr.cpp
index 857d46a..77aa716 100644
--- a/clang/lib/Sema/SemaStmtAttr.cpp
+++ b/clang/lib/Sema/SemaStmtAttr.cpp
@@ -795,6 +795,10 @@ ExprResult Sema::BuildCXXAssumeExpr(Expr *Assumption,
   if (Res.isInvalid())
     return ExprError();
 
+  Res = ActOnFinishFullExpr(Res.get(), /*DiscardedValue=*/false);
+  if (Res.isInvalid())
+    return ExprError();
+
   Assumption = Res.get();
   if (Assumption->HasSideEffects(Context))
     Diag(Assumption->getBeginLoc(), diag::warn_assume_side_effects)
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index 698d127..21fed2e 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -4749,8 +4749,6 @@ Sema::CheckConceptTemplateId(const CXXScopeSpec &SS,
   EnterExpressionEvaluationContext EECtx{
       *this, ExpressionEvaluationContext::Unevaluated, CSD};
 
-  ContextRAII CurContext(*this, CSD->getDeclContext(),
-                         /*NewThisContext=*/false);
   if (!AreArgsDependent &&
       CheckConstraintSatisfaction(
           NamedConcept, AssociatedConstraint(NamedConcept->getConstraintExpr()),
diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp
index 20bac0e..d84d0ca1 100644
--- a/clang/lib/Sema/SemaTemplateInstantiate.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp
@@ -2270,11 +2270,6 @@ TemplateInstantiator::TransformCXXAssumeAttr(const CXXAssumeAttr *AA) {
   if (!Res.isUsable())
     return AA;
 
-  Res = getSema().ActOnFinishFullExpr(Res.get(),
-                                      /*DiscardedValue=*/false);
-  if (!Res.isUsable())
-    return AA;
-
   if (!(Res.get()->getDependence() & ExprDependence::TypeValueInstantiation)) {
     Res = getSema().BuildCXXAssumeExpr(Res.get(), AA->getAttrName(),
                                        AA->getRange());
diff --git a/clang/lib/Sema/SemaTypeTraits.cpp b/clang/lib/Sema/SemaTypeTraits.cpp
index 1d8687e..c2f0600 100644
--- a/clang/lib/Sema/SemaTypeTraits.cpp
+++ b/clang/lib/Sema/SemaTypeTraits.cpp
@@ -11,9 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/AST/DeclCXX.h"
-#include "clang/AST/TemplateBase.h"
 #include "clang/AST/Type.h"
-#include "clang/Basic/DiagnosticIDs.h"
 #include "clang/Basic/DiagnosticParse.h"
 #include "clang/Basic/DiagnosticSema.h"
 #include "clang/Basic/TypeTraits.h"
@@ -1965,7 +1963,6 @@ static std::optional<TypeTrait> StdNameToTypeTrait(StringRef Name) {
       .Case("is_assignable", TypeTrait::BTT_IsAssignable)
       .Case("is_empty", TypeTrait::UTT_IsEmpty)
       .Case("is_standard_layout", TypeTrait::UTT_IsStandardLayout)
-      .Case("is_constructible", TypeTrait::TT_IsConstructible)
       .Default(std::nullopt);
 }
 
@@ -2002,16 +1999,8 @@ static ExtractedTypeTraitInfo ExtractTypeTraitFromExpression(const Expr *E) {
     Trait = StdNameToTypeTrait(Name);
     if (!Trait)
       return std::nullopt;
-    for (const auto &Arg : VD->getTemplateArgs().asArray()) {
-      if (Arg.getKind() == TemplateArgument::ArgKind::Pack) {
-        for (const auto &InnerArg : Arg.pack_elements())
-          Args.push_back(InnerArg.getAsType());
-      } else if (Arg.getKind() == TemplateArgument::ArgKind::Type) {
-        Args.push_back(Arg.getAsType());
-      } else {
-        llvm_unreachable("Unexpected kind");
-      }
-    }
+    for (const auto &Arg : VD->getTemplateArgs().asArray())
+      Args.push_back(Arg.getAsType());
     return {{Trait.value(), std::move(Args)}};
   }
 
@@ -2284,60 +2273,6 @@ static void DiagnoseNonTriviallyCopyableReason(Sema &SemaRef,
   }
 }
 
-static void DiagnoseNonConstructibleReason(
-    Sema &SemaRef, SourceLocation Loc,
-    const llvm::SmallVector<clang::QualType, 1> &Ts) {
-  if (Ts.empty()) {
-    return;
-  }
-
-  bool ContainsVoid = false;
-  for (const QualType &ArgTy : Ts) {
-    ContainsVoid |= ArgTy->isVoidType();
-  }
-
-  if (ContainsVoid)
-    SemaRef.Diag(Loc, diag::note_unsatisfied_trait_reason)
-        << diag::TraitNotSatisfiedReason::CVVoidType;
-
-  QualType T = Ts[0];
-  if (T->isFunctionType())
-    SemaRef.Diag(Loc, diag::note_unsatisfied_trait_reason)
-        << diag::TraitNotSatisfiedReason::FunctionType;
-
-  if (T->isIncompleteArrayType())
-    SemaRef.Diag(Loc, diag::note_unsatisfied_trait_reason)
-        << diag::TraitNotSatisfiedReason::IncompleteArrayType;
-
-  const CXXRecordDecl *D = T->getAsCXXRecordDecl();
-  if (!D || D->isInvalidDecl() || !D->hasDefinition())
-    return;
-
-  llvm::BumpPtrAllocator OpaqueExprAllocator;
-  SmallVector<Expr *, 2> ArgExprs;
-  ArgExprs.reserve(Ts.size() - 1);
-  for (unsigned I = 1, N = Ts.size(); I != N; ++I) {
-    QualType ArgTy = Ts[I];
-    if (ArgTy->isObjectType() || ArgTy->isFunctionType())
-      ArgTy = SemaRef.Context.getRValueReferenceType(ArgTy);
-    ArgExprs.push_back(
-        new (OpaqueExprAllocator.Allocate<OpaqueValueExpr>())
-            OpaqueValueExpr(Loc, ArgTy.getNonLValueExprType(SemaRef.Context),
-                            Expr::getValueKindForType(ArgTy)));
-  }
-
-  EnterExpressionEvaluationContext Unevaluated(
-      SemaRef, Sema::ExpressionEvaluationContext::Unevaluated);
-  Sema::ContextRAII TUContext(SemaRef,
-                              SemaRef.Context.getTranslationUnitDecl());
-  InitializedEntity To(InitializedEntity::InitializeTemporary(T));
-  InitializationKind InitKind(InitializationKind::CreateDirect(Loc, Loc, Loc));
-  InitializationSequence Init(SemaRef, To, InitKind, ArgExprs);
-
-  Init.Diagnose(SemaRef, To, InitKind, ArgExprs);
-  SemaRef.Diag(D->getLocation(), diag::note_defined_here) << D;
-}
-
 static void DiagnoseNonTriviallyCopyableReason(Sema &SemaRef,
                                                SourceLocation Loc, QualType T) {
   SemaRef.Diag(Loc, diag::note_unsatisfied_trait)
@@ -2624,9 +2559,6 @@ void Sema::DiagnoseTypeTraitDetails(const Expr *E) {
   case UTT_IsStandardLayout:
     DiagnoseNonStandardLayoutReason(*this, E->getBeginLoc(), Args[0]);
     break;
-  case TT_IsConstructible:
-    DiagnoseNonConstructibleReason(*this, E->getBeginLoc(), Args);
-    break;
   default:
     break;
   }
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index f896f9f1..682d263 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -677,8 +677,7 @@ bool PCHValidator::ReadDiagnosticOptions(DiagnosticOptions &DiagOpts,
                                          bool Complain) {
   DiagnosticsEngine &ExistingDiags = PP.getDiagnostics();
   IntrusiveRefCntPtr<DiagnosticIDs> DiagIDs(ExistingDiags.getDiagnosticIDs());
-  IntrusiveRefCntPtr<DiagnosticsEngine> Diags(
-      new DiagnosticsEngine(DiagIDs, DiagOpts));
+  auto Diags = llvm::makeIntrusiveRefCnt<DiagnosticsEngine>(DiagIDs, DiagOpts);
   // This should never fail, because we would have processed these options
   // before writing them to an ASTFile.
   ProcessWarningOptions(*Diags, DiagOpts,
diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp
index bd84a97..cdaf38d 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -1147,7 +1147,8 @@ void ASTDeclReader::VisitFunctionDecl(FunctionDecl *FD) {
     const SYCLKernelInfo *SKI = C.findSYCLKernelInfo(SKEPAttr->getKernelName());
     if (SKI) {
       if (!declaresSameEntity(FD, SKI->getKernelEntryPointDecl())) {
-        Reader.Diag(FD->getLocation(), diag::err_sycl_kernel_name_conflict);
+        Reader.Diag(FD->getLocation(), diag::err_sycl_kernel_name_conflict)
+            << SKEPAttr;
         Reader.Diag(SKI->getKernelEntryPointDecl()->getLocation(),
                     diag::note_previous_declaration);
         SKEPAttr->setInvalidAttr();
diff --git a/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp b/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp
index 88feb6a..e682c4e 100644
--- a/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp
@@ -99,7 +99,7 @@ class NilArgChecker : public Checker<check::PreObjCMessage,
                                      check::PostStmt<ObjCDictionaryLiteral>,
                                      check::PostStmt<ObjCArrayLiteral>,
                                      EventDispatcher<ImplicitNullDerefEvent>> {
-  mutable std::unique_ptr<APIMisuse> BT;
+  const APIMisuse BT{this, "nil argument"};
 
   mutable llvm::SmallDenseMap<Selector, unsigned, 16> StringSelectors;
   mutable Selector ArrayWithObjectSel;
@@ -218,10 +218,7 @@ void NilArgChecker::generateBugReport(ExplodedNode *N,
                                       SourceRange Range,
                                       const Expr *E,
                                       CheckerContext &C) const {
-  if (!BT)
-    BT.reset(new APIMisuse(this, "nil argument"));
-
-  auto R = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
+  auto R = std::make_unique<PathSensitiveBugReport>(BT, Msg, N);
   R->addRange(Range);
   bugreporter::trackExpressionValue(N, E, *R);
   C.emitReport(std::move(R));
@@ -350,7 +347,7 @@ void NilArgChecker::checkPostStmt(const ObjCDictionaryLiteral *DL,
 
 namespace {
 class CFNumberChecker : public Checker< check::PreStmt<CallExpr> > {
-  mutable std::unique_ptr<APIMisuse> BT;
+  const APIMisuse BT{this, "Bad use of CFNumber APIs"};
   mutable IdentifierInfo *ICreate = nullptr, *IGetValue = nullptr;
 public:
   CFNumberChecker() = default;
@@ -524,10 +521,7 @@ void CFNumberChecker::checkPreStmt(const CallExpr *CE,
       << " bits of the integer value will be "
       << (isCreate ? "lost." : "garbage.");
 
-    if (!BT)
-      BT.reset(new APIMisuse(this, "Bad use of CFNumber APIs"));
-
-    auto report = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), N);
+    auto report = std::make_unique<PathSensitiveBugReport>(BT, os.str(), N);
     report->addRange(CE->getArg(2)->getSourceRange());
     C.emitReport(std::move(report));
   }
@@ -539,7 +533,7 @@ void CFNumberChecker::checkPreStmt(const CallExpr *CE,
 
 namespace {
 class CFRetainReleaseChecker : public Checker<check::PreCall> {
-  mutable APIMisuse BT{this, "null passed to CF memory management function"};
+  const APIMisuse BT{this, "null passed to CF memory management function"};
   const CallDescriptionSet ModelledCalls = {
       {CDM::CLibrary, {"CFRetain"}, 1},
       {CDM::CLibrary, {"CFRelease"}, 1},
@@ -600,7 +594,8 @@ class ClassReleaseChecker : public Checker<check::PreObjCMessage> {
   mutable Selector retainS;
   mutable Selector autoreleaseS;
   mutable Selector drainS;
-  mutable std::unique_ptr<BugType> BT;
+  const APIMisuse BT{
+      this, "message incorrectly sent to class instead of class instance"};
 
 public:
   void checkPreObjCMessage(const ObjCMethodCall &msg, CheckerContext &C) const;
@@ -609,10 +604,7 @@ public:
 
 void ClassReleaseChecker::checkPreObjCMessage(const ObjCMethodCall &msg,
                                               CheckerContext &C) const {
-  if (!BT) {
-    BT.reset(new APIMisuse(
-        this, "message incorrectly sent to class instead of class instance"));
-
+  if (releaseS.isNull()) {
     ASTContext &Ctx = C.getASTContext();
     releaseS = GetNullarySelector("release", Ctx);
     retainS = GetNullarySelector("retain", Ctx);
@@ -639,7 +631,7 @@ void ClassReleaseChecker::checkPreObjCMessage(const ObjCMethodCall &msg,
           "of class '" << Class->getName()
        << "' and not the class directly";
 
-    auto report = std::make_unique<PathSensitiveBugReport>(*BT, os.str(), N);
+    auto report = std::make_unique<PathSensitiveBugReport>(BT, os.str(), N);
     report->addRange(msg.getSourceRange());
     C.emitReport(std::move(report));
   }
@@ -658,7 +650,8 @@ class VariadicMethodTypeChecker : public Checker<check::PreObjCMessage> {
   mutable Selector orderedSetWithObjectsS;
   mutable Selector initWithObjectsS;
   mutable Selector initWithObjectsAndKeysS;
-  mutable std::unique_ptr<BugType> BT;
+  const APIMisuse BT{this, "Arguments passed to variadic method aren't all "
+                           "Objective-C pointer types"};
 
   bool isVariadicMessage(const ObjCMethodCall &msg) const;
 
@@ -717,11 +710,7 @@ VariadicMethodTypeChecker::isVariadicMessage(const ObjCMethodCall &msg) const {
 
 void VariadicMethodTypeChecker::checkPreObjCMessage(const ObjCMethodCall &msg,
                                                     CheckerContext &C) const {
-  if (!BT) {
-    BT.reset(new APIMisuse(this,
-                           "Arguments passed to variadic method aren't all "
-                           "Objective-C pointer types"));
-
+  if (arrayWithObjectsS.isNull()) {
     ASTContext &Ctx = C.getASTContext();
     arrayWithObjectsS = GetUnarySelector("arrayWithObjects", Ctx);
     dictionaryWithObjectsAndKeysS =
@@ -792,8 +781,7 @@ void VariadicMethodTypeChecker::checkPreObjCMessage(const ObjCMethodCall &msg,
     ArgTy.print(os, C.getLangOpts());
     os << "'";
 
-    auto R =
-        std::make_unique<PathSensitiveBugReport>(*BT, os.str(), *errorNode);
+    auto R = std::make_unique<PathSensitiveBugReport>(BT, os.str(), *errorNode);
     R->addRange(msg.getArgSourceRange(I));
     C.emitReport(std::move(R));
   }
diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index 31cb150..fd0a398 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -78,35 +78,30 @@ static QualType getCharPtrType(ASTContext &Ctx, CharKind CK) {
                                                     : Ctx.WideCharTy);
 }
 
-class CStringChecker : public Checker< eval::Call,
-                                         check::PreStmt<DeclStmt>,
-                                         check::LiveSymbols,
-                                         check::DeadSymbols,
-                                         check::RegionChanges
-                                         > {
-  mutable std::unique_ptr<BugType> BT_Null, BT_Bounds, BT_Overlap,
-      BT_NotCString, BT_AdditionOverflow, BT_UninitRead;
-
+class CStringChecker
+    : public CheckerFamily<eval::Call, check::PreStmt<DeclStmt>,
+                           check::LiveSymbols, check::DeadSymbols,
+                           check::RegionChanges> {
   mutable const char *CurrentFunctionDescription = nullptr;
 
 public:
-  /// The filter is used to filter out the diagnostics which are not enabled by
-  /// the user.
-  struct CStringChecksFilter {
-    bool CheckCStringNullArg = false;
-    bool CheckCStringOutOfBounds = false;
-    bool CheckCStringBufferOverlap = false;
-    bool CheckCStringNotNullTerm = false;
-    bool CheckCStringUninitializedRead = false;
-
-    CheckerNameRef CheckNameCStringNullArg;
-    CheckerNameRef CheckNameCStringOutOfBounds;
-    CheckerNameRef CheckNameCStringBufferOverlap;
-    CheckerNameRef CheckNameCStringNotNullTerm;
-    CheckerNameRef CheckNameCStringUninitializedRead;
-  };
-
-  CStringChecksFilter Filter;
+  // FIXME: The bug types emitted by this checker family have confused garbage
+  // in their Description and Category fields (e.g. `categories::UnixAPI` is
+  // passed as the description in several cases and `uninitialized` is mistyped
+  // as `unitialized`). This should be cleaned up.
+  CheckerFrontendWithBugType NullArg{categories::UnixAPI};
+  CheckerFrontendWithBugType OutOfBounds{"Out-of-bound array access"};
+  CheckerFrontendWithBugType BufferOverlap{categories::UnixAPI,
+                                           "Improper arguments"};
+  CheckerFrontendWithBugType NotNullTerm{categories::UnixAPI};
+  CheckerFrontendWithBugType UninitializedRead{
+      "Accessing unitialized/garbage values"};
+
+  // FIXME: This bug type should be removed because it is only emitted in a
+  // situation that is practically impossible.
+  const BugType AdditionOverflow{&OutOfBounds, "API"};
+
+  StringRef getDebugTag() const override { return "MallocChecker"; }
 
   static void *getTag() { static int tag; return &tag; }
 
@@ -384,7 +379,7 @@ ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
       assumeZero(C, State, l, Arg.Expression->getType());
 
   if (stateNull && !stateNonNull) {
-    if (Filter.CheckCStringNullArg) {
+    if (NullArg.isEnabled()) {
       SmallString<80> buf;
       llvm::raw_svector_ostream OS(buf);
       assert(CurrentFunctionDescription);
@@ -468,7 +463,7 @@ ProgramStateRef CStringChecker::checkInit(CheckerContext &C,
     return State;
 
   // Ensure that we wouldn't read uninitialized value.
-  if (Filter.CheckCStringUninitializedRead &&
+  if (UninitializedRead.isEnabled() &&
       State->getSVal(*FirstElementVal).isUndef()) {
     llvm::SmallString<258> Buf;
     llvm::raw_svector_ostream OS(Buf);
@@ -524,7 +519,7 @@ ProgramStateRef CStringChecker::checkInit(CheckerContext &C,
   if (!isa<Loc>(LastElementVal))
     return State;
 
-  if (Filter.CheckCStringUninitializedRead &&
+  if (UninitializedRead.isEnabled() &&
       State->getSVal(LastElementVal.castAs<Loc>()).isUndef()) {
     const llvm::APSInt *IdxInt = LastIdx.getAsInteger();
     // If we can't get emit a sensible last element index, just bail out --
@@ -581,13 +576,9 @@ ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
 
   auto [StInBound, StOutBound] = state->assumeInBoundDual(*Idx, Size);
   if (StOutBound && !StInBound) {
-    // These checks are either enabled by the CString out-of-bounds checker
-    // explicitly or implicitly by the Malloc checker.
-    // In the latter case we only do modeling but do not emit warning.
-    if (!Filter.CheckCStringOutOfBounds)
+    if (!OutOfBounds.isEnabled())
       return nullptr;
 
-    // Emit a bug report.
     ErrorMessage Message =
         createOutOfBoundErrorMsg(CurrentFunctionDescription, Access);
     emitOutOfBoundsBug(C, StOutBound, Buffer.Expression, Message);
@@ -620,7 +611,7 @@ CStringChecker::CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
     return nullptr;
 
   // If out-of-bounds checking is turned off, skip the rest.
-  if (!Filter.CheckCStringOutOfBounds)
+  if (!OutOfBounds.isEnabled())
     return State;
 
   SVal BufStart =
@@ -670,7 +661,7 @@ ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,
                                              SizeArgExpr Size, AnyArgExpr First,
                                              AnyArgExpr Second,
                                              CharKind CK) const {
-  if (!Filter.CheckCStringBufferOverlap)
+  if (!BufferOverlap.isEnabled())
     return state;
 
   // Do a simple check for overlap: if the two arguments are from the same
@@ -789,13 +780,9 @@ void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state,
   if (!N)
     return;
 
-  if (!BT_Overlap)
-    BT_Overlap.reset(new BugType(Filter.CheckNameCStringBufferOverlap,
-                                 categories::UnixAPI, "Improper arguments"));
-
   // Generate a report for this bug.
   auto report = std::make_unique<PathSensitiveBugReport>(
-      *BT_Overlap, "Arguments must not be overlapping buffers", N);
+      BufferOverlap, "Arguments must not be overlapping buffers", N);
   report->addRange(First->getSourceRange());
   report->addRange(Second->getSourceRange());
 
@@ -805,15 +792,8 @@ void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state,
 void CStringChecker::emitNullArgBug(CheckerContext &C, ProgramStateRef State,
                                     const Stmt *S, StringRef WarningMsg) const {
   if (ExplodedNode *N = C.generateErrorNode(State)) {
-    if (!BT_Null) {
-      // FIXME: This call uses the string constant 'categories::UnixAPI' as the
-      // description of the bug; it should be replaced by a real description.
-      BT_Null.reset(
-          new BugType(Filter.CheckNameCStringNullArg, categories::UnixAPI));
-    }
-
     auto Report =
-        std::make_unique<PathSensitiveBugReport>(*BT_Null, WarningMsg, N);
+        std::make_unique<PathSensitiveBugReport>(NullArg, WarningMsg, N);
     Report->addRange(S->getSourceRange());
     if (const auto *Ex = dyn_cast<Expr>(S))
       bugreporter::trackExpressionValue(N, Ex, *Report);
@@ -826,12 +806,8 @@ void CStringChecker::emitUninitializedReadBug(CheckerContext &C,
                                               const Expr *E, const MemRegion *R,
                                               StringRef Msg) const {
   if (ExplodedNode *N = C.generateErrorNode(State)) {
-    if (!BT_UninitRead)
-      BT_UninitRead.reset(new BugType(Filter.CheckNameCStringUninitializedRead,
-                                      "Accessing unitialized/garbage values"));
-
     auto Report =
-        std::make_unique<PathSensitiveBugReport>(*BT_UninitRead, Msg, N);
+        std::make_unique<PathSensitiveBugReport>(UninitializedRead, Msg, N);
     Report->addNote("Other elements might also be undefined",
                     Report->getLocation());
     Report->addRange(E->getSourceRange());
@@ -845,17 +821,11 @@ void CStringChecker::emitOutOfBoundsBug(CheckerContext &C,
                                         ProgramStateRef State, const Stmt *S,
                                         StringRef WarningMsg) const {
   if (ExplodedNode *N = C.generateErrorNode(State)) {
-    if (!BT_Bounds)
-      BT_Bounds.reset(new BugType(Filter.CheckCStringOutOfBounds
-                                      ? Filter.CheckNameCStringOutOfBounds
-                                      : Filter.CheckNameCStringNullArg,
-                                  "Out-of-bound array access"));
-
     // FIXME: It would be nice to eventually make this diagnostic more clear,
     // e.g., by referencing the original declaration or by saying *why* this
     // reference is outside the range.
     auto Report =
-        std::make_unique<PathSensitiveBugReport>(*BT_Bounds, WarningMsg, N);
+        std::make_unique<PathSensitiveBugReport>(OutOfBounds, WarningMsg, N);
     Report->addRange(S->getSourceRange());
     C.emitReport(std::move(Report));
   }
@@ -865,15 +835,8 @@ void CStringChecker::emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
                                        const Stmt *S,
                                        StringRef WarningMsg) const {
   if (ExplodedNode *N = C.generateNonFatalErrorNode(State)) {
-    if (!BT_NotCString) {
-      // FIXME: This call uses the string constant 'categories::UnixAPI' as the
-      // description of the bug; it should be replaced by a real description.
-      BT_NotCString.reset(
-          new BugType(Filter.CheckNameCStringNotNullTerm, categories::UnixAPI));
-    }
-
     auto Report =
-        std::make_unique<PathSensitiveBugReport>(*BT_NotCString, WarningMsg, N);
+        std::make_unique<PathSensitiveBugReport>(NotNullTerm, WarningMsg, N);
 
     Report->addRange(S->getSourceRange());
     C.emitReport(std::move(Report));
@@ -883,14 +846,6 @@ void CStringChecker::emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
 void CStringChecker::emitAdditionOverflowBug(CheckerContext &C,
                                              ProgramStateRef State) const {
   if (ExplodedNode *N = C.generateErrorNode(State)) {
-    if (!BT_AdditionOverflow) {
-      // FIXME: This call uses the word "API" as the description of the bug;
-      // it should be replaced by a better error message (if this unlikely
-      // situation continues to exist as a separate bug type).
-      BT_AdditionOverflow.reset(
-          new BugType(Filter.CheckNameCStringOutOfBounds, "API"));
-    }
-
     // This isn't a great error message, but this should never occur in real
     // code anyway -- you'd have to create a buffer longer than a size_t can
     // represent, which is sort of a contradiction.
@@ -898,7 +853,7 @@ void CStringChecker::emitAdditionOverflowBug(CheckerContext &C,
         "This expression will create a string whose length is too big to "
         "be represented as a size_t";
 
-    auto Report = std::make_unique<PathSensitiveBugReport>(*BT_AdditionOverflow,
+    auto Report = std::make_unique<PathSensitiveBugReport>(AdditionOverflow,
                                                            WarningMsg, N);
     C.emitReport(std::move(Report));
   }
@@ -909,7 +864,7 @@ ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
                                                      NonLoc left,
                                                      NonLoc right) const {
   // If out-of-bounds checking is turned off, skip the rest.
-  if (!Filter.CheckCStringOutOfBounds)
+  if (!OutOfBounds.isEnabled())
     return state;
 
   // If a previous check has failed, propagate the failure.
@@ -1048,7 +1003,7 @@ SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
     // C string. In the context of locations, the only time we can issue such
     // a warning is for labels.
     if (std::optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) {
-      if (Filter.CheckCStringNotNullTerm) {
+      if (NotNullTerm.isEnabled()) {
         SmallString<120> buf;
         llvm::raw_svector_ostream os(buf);
         assert(CurrentFunctionDescription);
@@ -1110,7 +1065,7 @@ SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
     // Other regions (mostly non-data) can't have a reliable C string length.
     // In this case, an error is emitted and UndefinedVal is returned.
     // The caller should always be prepared to handle this case.
-    if (Filter.CheckCStringNotNullTerm) {
+    if (NotNullTerm.isEnabled()) {
       SmallString<120> buf;
       llvm::raw_svector_ostream os(buf);
 
@@ -2873,24 +2828,27 @@ void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
 }
 
 void ento::registerCStringModeling(CheckerManager &Mgr) {
-  Mgr.registerChecker<CStringChecker>();
+  // Other checker relies on the modeling implemented in this checker family,
+  // so this "modeling checker" can register the 'CStringChecker' backend for
+  // its callbacks without enabling any of its frontends.
+  Mgr.getChecker<CStringChecker>();
 }
 
-bool ento::shouldRegisterCStringModeling(const CheckerManager &mgr) {
+bool ento::shouldRegisterCStringModeling(const CheckerManager &) {
   return true;
 }
 
-#define REGISTER_CHECKER(name)                                                 \
-  void ento::register##name(CheckerManager &mgr) {                             \
-    CStringChecker *checker = mgr.getChecker<CStringChecker>();                \
-    checker->Filter.Check##name = true;                                        \
-    checker->Filter.CheckName##name = mgr.getCurrentCheckerName();             \
+#define REGISTER_CHECKER(NAME)                                                 \
+  void ento::registerCString##NAME(CheckerManager &Mgr) {                      \
+    Mgr.getChecker<CStringChecker>()->NAME.enable(Mgr);                        \
   }                                                                            \
                                                                                \
-  bool ento::shouldRegister##name(const CheckerManager &mgr) { return true; }
+  bool ento::shouldRegisterCString##NAME(const CheckerManager &) {             \
+    return true;                                                               \
+  }
 
-REGISTER_CHECKER(CStringNullArg)
-REGISTER_CHECKER(CStringOutOfBounds)
-REGISTER_CHECKER(CStringBufferOverlap)
-REGISTER_CHECKER(CStringNotNullTerm)
-REGISTER_CHECKER(CStringUninitializedRead)
+REGISTER_CHECKER(NullArg)
+REGISTER_CHECKER(OutOfBounds)
+REGISTER_CHECKER(BufferOverlap)
+REGISTER_CHECKER(NotNullTerm)
+REGISTER_CHECKER(UninitializedRead)
diff --git a/clang/lib/StaticAnalyzer/Checkers/CheckPlacementNew.cpp b/clang/lib/StaticAnalyzer/Checkers/CheckPlacementNew.cpp
index 839c8bc..a227ca0 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CheckPlacementNew.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CheckPlacementNew.cpp
@@ -111,32 +111,12 @@ bool PlacementNewChecker::checkPlaceCapacityIsSufficient(
   if (!SizeOfPlaceCI)
     return true;
 
-  if ((SizeOfPlaceCI->getValue() < SizeOfTargetCI->getValue()) ||
-      (IsArrayTypeAllocated &&
-       SizeOfPlaceCI->getValue() >= SizeOfTargetCI->getValue())) {
+  if ((SizeOfPlaceCI->getValue() < SizeOfTargetCI->getValue())) {
     if (ExplodedNode *N = C.generateErrorNode(C.getState())) {
-      std::string Msg;
-      // TODO: use clang constant
-      if (IsArrayTypeAllocated &&
-          SizeOfPlaceCI->getValue() > SizeOfTargetCI->getValue())
-        Msg = std::string(llvm::formatv(
-            "{0} bytes is possibly not enough for array allocation which "
-            "requires {1} bytes. Current overhead requires the size of {2} "
-            "bytes",
-            SizeOfPlaceCI->getValue(), SizeOfTargetCI->getValue(),
-            *SizeOfPlaceCI->getValue() - SizeOfTargetCI->getValue()));
-      else if (IsArrayTypeAllocated &&
-               SizeOfPlaceCI->getValue() == SizeOfTargetCI->getValue())
-        Msg = std::string(llvm::formatv(
-            "Storage provided to placement new is only {0} bytes, "
-            "whereas the allocated array type requires more space for "
-            "internal needs",
-            SizeOfPlaceCI->getValue()));
-      else
-        Msg = std::string(llvm::formatv(
-            "Storage provided to placement new is only {0} bytes, "
-            "whereas the allocated type requires {1} bytes",
-            SizeOfPlaceCI->getValue(), SizeOfTargetCI->getValue()));
+      std::string Msg =
+          llvm::formatv("Storage provided to placement new is only {0} bytes, "
+                        "whereas the allocated type requires {1} bytes",
+                        SizeOfPlaceCI->getValue(), SizeOfTargetCI->getValue());
 
       auto R = std::make_unique<PathSensitiveBugReport>(SBT, Msg, N);
       bugreporter::trackExpressionValue(N, NE->getPlacementArg(0), *R);
diff --git a/clang/lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp
index 15fd9a0..d2760ca 100644
--- a/clang/lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp
@@ -142,34 +142,19 @@ void CFErrorFunctionChecker::checkASTDecl(const FunctionDecl *D,
 //===----------------------------------------------------------------------===//
 
 namespace {
+class NSOrCFErrorDerefChecker
+    : public CheckerFamily<check::Location,
+                           check::Event<ImplicitNullDerefEvent>> {
+  mutable IdentifierInfo *NSErrorII = nullptr;
+  mutable IdentifierInfo *CFErrorII = nullptr;
 
-class NSErrorDerefBug : public BugType {
-public:
-  NSErrorDerefBug(const CheckerNameRef Checker)
-      : BugType(Checker, "NSError** null dereference",
-                "Coding conventions (Apple)") {}
-};
-
-class CFErrorDerefBug : public BugType {
 public:
-  CFErrorDerefBug(const CheckerNameRef Checker)
-      : BugType(Checker, "CFErrorRef* null dereference",
-                "Coding conventions (Apple)") {}
-};
-
-}
+  CheckerFrontendWithBugType NSError{"NSError** null dereference",
+                                     "Coding conventions (Apple)"};
+  CheckerFrontendWithBugType CFError{"CFErrorRef* null dereference",
+                                     "Coding conventions (Apple)"};
 
-namespace {
-class NSOrCFErrorDerefChecker
-    : public Checker< check::Location,
-                        check::Event<ImplicitNullDerefEvent> > {
-  mutable IdentifierInfo *NSErrorII, *CFErrorII;
-  mutable std::unique_ptr<NSErrorDerefBug> NSBT;
-  mutable std::unique_ptr<CFErrorDerefBug> CFBT;
-public:
-  bool ShouldCheckNSError = false, ShouldCheckCFError = false;
-  CheckerNameRef NSErrorName, CFErrorName;
-  NSOrCFErrorDerefChecker() : NSErrorII(nullptr), CFErrorII(nullptr) {}
+  StringRef getDebugTag() const override { return "NSOrCFErrorDerefChecker"; }
 
   void checkLocation(SVal loc, bool isLoad, const Stmt *S,
                      CheckerContext &C) const;
@@ -236,12 +221,12 @@ void NSOrCFErrorDerefChecker::checkLocation(SVal loc, bool isLoad,
   if (!CFErrorII)
     CFErrorII = &Ctx.Idents.get("CFErrorRef");
 
-  if (ShouldCheckNSError && IsNSError(parmT, NSErrorII)) {
+  if (NSError.isEnabled() && IsNSError(parmT, NSErrorII)) {
     setFlag<NSErrorOut>(state, state->getSVal(loc.castAs<Loc>()), C);
     return;
   }
 
-  if (ShouldCheckCFError && IsCFError(parmT, CFErrorII)) {
+  if (CFError.isEnabled() && IsCFError(parmT, CFErrorII)) {
     setFlag<CFErrorOut>(state, state->getSVal(loc.castAs<Loc>()), C);
     return;
   }
@@ -274,19 +259,9 @@ void NSOrCFErrorDerefChecker::checkEvent(ImplicitNullDerefEvent event) const {
 
   os  << " may be null";
 
-  BugType *bug = nullptr;
-  if (isNSError) {
-    if (!NSBT)
-      NSBT.reset(new NSErrorDerefBug(NSErrorName));
-    bug = NSBT.get();
-  }
-  else {
-    if (!CFBT)
-      CFBT.reset(new CFErrorDerefBug(CFErrorName));
-    bug = CFBT.get();
-  }
+  const BugType &BT = isNSError ? NSError : CFError;
   BR.emitReport(
-      std::make_unique<PathSensitiveBugReport>(*bug, os.str(), event.SinkNode));
+      std::make_unique<PathSensitiveBugReport>(BT, os.str(), event.SinkNode));
 }
 
 static bool IsNSError(QualType T, IdentifierInfo *II) {
@@ -320,32 +295,21 @@ static bool IsCFError(QualType T, IdentifierInfo *II) {
   return TT->getDecl()->getIdentifier() == II;
 }
 
-void ento::registerNSOrCFErrorDerefChecker(CheckerManager &mgr) {
-  mgr.registerChecker<NSOrCFErrorDerefChecker>();
-}
-
-bool ento::shouldRegisterNSOrCFErrorDerefChecker(const CheckerManager &mgr) {
-  return true;
-}
-
-void ento::registerNSErrorChecker(CheckerManager &mgr) {
-  mgr.registerChecker<NSErrorMethodChecker>();
-  NSOrCFErrorDerefChecker *checker = mgr.getChecker<NSOrCFErrorDerefChecker>();
-  checker->ShouldCheckNSError = true;
-  checker->NSErrorName = mgr.getCurrentCheckerName();
-}
-
-bool ento::shouldRegisterNSErrorChecker(const CheckerManager &mgr) {
-  return true;
-}
-
-void ento::registerCFErrorChecker(CheckerManager &mgr) {
-  mgr.registerChecker<CFErrorFunctionChecker>();
-  NSOrCFErrorDerefChecker *checker = mgr.getChecker<NSOrCFErrorDerefChecker>();
-  checker->ShouldCheckCFError = true;
-  checker->CFErrorName = mgr.getCurrentCheckerName();
-}
+// This source file implements two user-facing checkers ("osx.cocoa.NSError"
+// and "osx.coreFoundation.CFError") which are both implemented as the
+// combination of two `CheckerFrontend`s that are registered under the same
+// name (but otherwise act independently). Among these 2+2 `CheckerFrontend`s
+// two are coming from the checker family `NSOrCFErrorDerefChecker` while the
+// other two (the `ADDITIONAL_PART`s) are small standalone checkers.
+#define REGISTER_CHECKER(NAME, ADDITIONAL_PART)                                \
+  void ento::register##NAME##Checker(CheckerManager &Mgr) {                    \
+    Mgr.getChecker<NSOrCFErrorDerefChecker>()->NAME.enable(Mgr);               \
+    Mgr.registerChecker<ADDITIONAL_PART>();                                    \
+  }                                                                            \
+                                                                               \
+  bool ento::shouldRegister##NAME##Checker(const CheckerManager &) {           \
+    return true;                                                               \
+  }
 
-bool ento::shouldRegisterCFErrorChecker(const CheckerManager &mgr) {
-  return true;
-}
+REGISTER_CHECKER(NSError, NSErrorMethodChecker)
+REGISTER_CHECKER(CFError, CFErrorFunctionChecker)
diff --git a/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp
index a63497c..019e81f 100644
--- a/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp
@@ -28,23 +28,22 @@ using namespace ento;
 
 namespace {
 class StackAddrEscapeChecker
-    : public Checker<check::PreCall, check::PreStmt<ReturnStmt>,
-                     check::EndFunction> {
+    : public CheckerFamily<check::PreCall, check::PreStmt<ReturnStmt>,
+                           check::EndFunction> {
   mutable IdentifierInfo *dispatch_semaphore_tII = nullptr;
-  mutable std::unique_ptr<BugType> BT_stackleak;
-  mutable std::unique_ptr<BugType> BT_returnstack;
-  mutable std::unique_ptr<BugType> BT_capturedstackasync;
-  mutable std::unique_ptr<BugType> BT_capturedstackret;
 
 public:
-  enum CheckKind {
-    CK_StackAddrEscapeChecker,
-    CK_StackAddrAsyncEscapeChecker,
-    CK_NumCheckKinds
-  };
+  StringRef getDebugTag() const override { return "StackAddrEscapeChecker"; }
+
+  CheckerFrontend StackAddrEscape;
+  CheckerFrontend StackAddrAsyncEscape;
 
-  bool ChecksEnabled[CK_NumCheckKinds] = {false};
-  CheckerNameRef CheckNames[CK_NumCheckKinds];
+  const BugType StackLeak{&StackAddrEscape,
+                          "Stack address leaks outside of stack frame"};
+  const BugType ReturnStack{&StackAddrEscape,
+                            "Return of address to stack-allocated memory"};
+  const BugType CapturedStackAsync{
+      &StackAddrAsyncEscape, "Address of stack-allocated memory is captured"};
 
   void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
   void checkPreStmt(const ReturnStmt *RS, CheckerContext &C) const;
@@ -170,10 +169,6 @@ void StackAddrEscapeChecker::EmitReturnLeakError(CheckerContext &C,
   ExplodedNode *N = C.generateNonFatalErrorNode();
   if (!N)
     return;
-  if (!BT_returnstack)
-    BT_returnstack = std::make_unique<BugType>(
-        CheckNames[CK_StackAddrEscapeChecker],
-        "Return of address to stack-allocated memory");
 
   // Generate a report for this bug.
   SmallString<128> buf;
@@ -184,7 +179,7 @@ void StackAddrEscapeChecker::EmitReturnLeakError(CheckerContext &C,
   EmitReturnedAsPartOfError(os, C.getSVal(RetE), R);
 
   auto report =
-      std::make_unique<PathSensitiveBugReport>(*BT_returnstack, os.str(), N);
+      std::make_unique<PathSensitiveBugReport>(ReturnStack, os.str(), N);
   report->addRange(RetE->getSourceRange());
   if (range.isValid())
     report->addRange(range);
@@ -215,16 +210,12 @@ void StackAddrEscapeChecker::checkAsyncExecutedBlockCaptures(
     ExplodedNode *N = C.generateNonFatalErrorNode();
     if (!N)
       continue;
-    if (!BT_capturedstackasync)
-      BT_capturedstackasync = std::make_unique<BugType>(
-          CheckNames[CK_StackAddrAsyncEscapeChecker],
-          "Address of stack-allocated memory is captured");
     SmallString<128> Buf;
     llvm::raw_svector_ostream Out(Buf);
     SourceRange Range = genName(Out, Region, C.getASTContext());
     Out << " is captured by an asynchronously-executed block";
-    auto Report = std::make_unique<PathSensitiveBugReport>(
-        *BT_capturedstackasync, Out.str(), N);
+    auto Report = std::make_unique<PathSensitiveBugReport>(CapturedStackAsync,
+                                                           Out.str(), N);
     if (Range.isValid())
       Report->addRange(Range);
     C.emitReport(std::move(Report));
@@ -233,7 +224,7 @@ void StackAddrEscapeChecker::checkAsyncExecutedBlockCaptures(
 
 void StackAddrEscapeChecker::checkPreCall(const CallEvent &Call,
                                           CheckerContext &C) const {
-  if (!ChecksEnabled[CK_StackAddrAsyncEscapeChecker])
+  if (!StackAddrAsyncEscape.isEnabled())
     return;
   if (!Call.isGlobalCFunction("dispatch_after") &&
       !Call.isGlobalCFunction("dispatch_async"))
@@ -357,7 +348,7 @@ FindEscapingStackRegions(CheckerContext &C, const Expr *RetE, SVal RetVal) {
 
 void StackAddrEscapeChecker::checkPreStmt(const ReturnStmt *RS,
                                           CheckerContext &C) const {
-  if (!ChecksEnabled[CK_StackAddrEscapeChecker])
+  if (!StackAddrEscape.isEnabled())
     return;
 
   const Expr *RetE = RS->getRetValue();
@@ -456,7 +447,7 @@ static bool isInvalidatedSymbolRegion(const MemRegion *Region) {
 
 void StackAddrEscapeChecker::checkEndFunction(const ReturnStmt *RS,
                                               CheckerContext &Ctx) const {
-  if (!ChecksEnabled[CK_StackAddrEscapeChecker])
+  if (!StackAddrEscape.isEnabled())
     return;
 
   ExplodedNode *Node = Ctx.getPredecessor();
@@ -581,11 +572,6 @@ void StackAddrEscapeChecker::checkEndFunction(const ReturnStmt *RS,
   if (!N)
     return;
 
-  if (!BT_stackleak)
-    BT_stackleak =
-        std::make_unique<BugType>(CheckNames[CK_StackAddrEscapeChecker],
-                                  "Stack address leaks outside of stack frame");
-
   for (const auto &P : Cb.V) {
     const MemRegion *Referrer = P.first->getBaseRegion();
     const MemRegion *Referred = P.second;
@@ -604,7 +590,7 @@ void StackAddrEscapeChecker::checkEndFunction(const ReturnStmt *RS,
       Out << " is still referred to by a temporary object on the stack"
           << CommonSuffix;
       auto Report =
-          std::make_unique<PathSensitiveBugReport>(*BT_stackleak, Out.str(), N);
+          std::make_unique<PathSensitiveBugReport>(StackLeak, Out.str(), N);
       if (Range.isValid())
         Report->addRange(Range);
       Ctx.emitReport(std::move(Report));
@@ -618,7 +604,7 @@ void StackAddrEscapeChecker::checkEndFunction(const ReturnStmt *RS,
 
     Out << " is still referred to by the " << *ReferrerVariable << CommonSuffix;
     auto Report =
-        std::make_unique<PathSensitiveBugReport>(*BT_stackleak, Out.str(), N);
+        std::make_unique<PathSensitiveBugReport>(StackLeak, Out.str(), N);
     if (Range.isValid())
       Report->addRange(Range);
 
@@ -626,23 +612,14 @@ void StackAddrEscapeChecker::checkEndFunction(const ReturnStmt *RS,
   }
 }
 
-void ento::registerStackAddrEscapeBase(CheckerManager &mgr) {
-  mgr.registerChecker<StackAddrEscapeChecker>();
-}
-
-bool ento::shouldRegisterStackAddrEscapeBase(const CheckerManager &mgr) {
-  return true;
-}
-
-#define REGISTER_CHECKER(name)                                                 \
-  void ento::register##name(CheckerManager &Mgr) {                             \
-    StackAddrEscapeChecker *Chk = Mgr.getChecker<StackAddrEscapeChecker>();    \
-    Chk->ChecksEnabled[StackAddrEscapeChecker::CK_##name] = true;              \
-    Chk->CheckNames[StackAddrEscapeChecker::CK_##name] =                       \
-        Mgr.getCurrentCheckerName();                                           \
+#define REGISTER_CHECKER(NAME)                                                 \
+  void ento::register##NAME##Checker(CheckerManager &Mgr) {                    \
+    Mgr.getChecker<StackAddrEscapeChecker>()->NAME.enable(Mgr);                \
   }                                                                            \
                                                                                \
-  bool ento::shouldRegister##name(const CheckerManager &mgr) { return true; }
+  bool ento::shouldRegister##NAME##Checker(const CheckerManager &) {           \
+    return true;                                                               \
+  }
 
-REGISTER_CHECKER(StackAddrEscapeChecker)
-REGISTER_CHECKER(StackAddrAsyncEscapeChecker)
+REGISTER_CHECKER(StackAddrEscape)
+REGISTER_CHECKER(StackAddrAsyncEscape)
diff --git a/clang/lib/Tooling/CompilationDatabase.cpp b/clang/lib/Tooling/CompilationDatabase.cpp
index d5fc216..860457a 100644
--- a/clang/lib/Tooling/CompilationDatabase.cpp
+++ b/clang/lib/Tooling/CompilationDatabase.cpp
@@ -241,9 +241,8 @@ static bool stripPositionalArgs(std::vector<const char *> Args,
   llvm::raw_string_ostream Output(ErrorMsg);
   TextDiagnosticPrinter DiagnosticPrinter(Output, DiagOpts);
   UnusedInputDiagConsumer DiagClient(DiagnosticPrinter);
-  DiagnosticsEngine Diagnostics(
-      IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs()), DiagOpts,
-      &DiagClient, false);
+  DiagnosticsEngine Diagnostics(DiagnosticIDs::create(), DiagOpts, &DiagClient,
+                                false);
 
   // The clang executable path isn't required since the jobs the driver builds
   // will not be executed.
diff --git a/clang/lib/Tooling/Core/Replacement.cpp b/clang/lib/Tooling/Core/Replacement.cpp
index 1506218..10bdc22 100644
--- a/clang/lib/Tooling/Core/Replacement.cpp
+++ b/clang/lib/Tooling/Core/Replacement.cpp
@@ -581,12 +581,11 @@ llvm::Expected<std::string> applyAllReplacements(StringRef Code,
   if (Replaces.empty())
     return Code.str();
 
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   FileManager Files(FileSystemOptions(), InMemoryFileSystem);
   DiagnosticOptions DiagOpts;
-  DiagnosticsEngine Diagnostics(
-      IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs), DiagOpts);
+  DiagnosticsEngine Diagnostics(DiagnosticIDs::create(), DiagOpts);
   SourceManager SourceMgr(Diagnostics, Files);
   Rewriter Rewrite(SourceMgr, LangOptions());
   InMemoryFileSystem->addFile(
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
index 8ce2706..b2b61de7 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
@@ -605,8 +605,8 @@ DependencyScanningWorker::DependencyScanningWorker(
 
   switch (Service.getMode()) {
   case ScanningMode::DependencyDirectivesScan:
-    DepFS =
-        new DependencyScanningWorkerFilesystem(Service.getSharedCache(), FS);
+    DepFS = llvm::makeIntrusiveRefCnt<DependencyScanningWorkerFilesystem>(
+        Service.getSharedCache(), FS);
     BaseFS = DepFS;
     break;
   case ScanningMode::CanonicalPreprocessing:
diff --git a/clang/lib/Tooling/Refactoring.cpp b/clang/lib/Tooling/Refactoring.cpp
index d2b0b37..9b1af237 100644
--- a/clang/lib/Tooling/Refactoring.cpp
+++ b/clang/lib/Tooling/Refactoring.cpp
@@ -39,9 +39,8 @@ int RefactoringTool::runAndSave(FrontendActionFactory *ActionFactory) {
   LangOptions DefaultLangOptions;
   DiagnosticOptions DiagOpts;
   TextDiagnosticPrinter DiagnosticPrinter(llvm::errs(), DiagOpts);
-  DiagnosticsEngine Diagnostics(
-      IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs()), DiagOpts,
-      &DiagnosticPrinter, false);
+  DiagnosticsEngine Diagnostics(DiagnosticIDs::create(), DiagOpts,
+                                &DiagnosticPrinter, false);
   SourceManager Sources(Diagnostics, getFiles());
   Rewriter Rewrite(Sources, DefaultLangOptions);
 
diff --git a/clang/lib/Tooling/Tooling.cpp b/clang/lib/Tooling/Tooling.cpp
index 5333956..ecafe26 100644
--- a/clang/lib/Tooling/Tooling.cpp
+++ b/clang/lib/Tooling/Tooling.cpp
@@ -227,10 +227,11 @@ bool runToolOnCodeWithArgs(
     const Twine &ToolName,
     std::shared_ptr<PCHContainerOperations> PCHContainerOps,
     const FileContentMappings &VirtualMappedFiles) {
-  llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayFileSystem(
-      new llvm::vfs::OverlayFileSystem(llvm::vfs::getRealFileSystem()));
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto OverlayFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(
+          llvm::vfs::getRealFileSystem());
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   OverlayFileSystem->pushOverlay(InMemoryFileSystem);
 
   SmallString<1024> CodeStorage;
@@ -403,7 +404,7 @@ bool ToolInvocation::run() {
   }
 
   const std::unique_ptr<driver::Driver> Driver(
-      newDriver(&*Diagnostics, BinaryName, &Files->getVirtualFileSystem()));
+      newDriver(&*Diagnostics, BinaryName, Files->getVirtualFileSystemPtr()));
   // The "input file not found" diagnostics from the driver are useful.
   // The driver is only aware of the VFS working directory, but some clients
   // change this at the FileManager level instead.
@@ -473,8 +474,10 @@ ClangTool::ClangTool(const CompilationDatabase &Compilations,
                      IntrusiveRefCntPtr<FileManager> Files)
     : Compilations(Compilations), SourcePaths(SourcePaths),
       PCHContainerOps(std::move(PCHContainerOps)),
-      OverlayFileSystem(new llvm::vfs::OverlayFileSystem(std::move(BaseFS))),
-      InMemoryFileSystem(new llvm::vfs::InMemoryFileSystem),
+      OverlayFileSystem(llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(
+          std::move(BaseFS))),
+      InMemoryFileSystem(
+          llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>()),
       Files(Files ? Files
                   : new FileManager(FileSystemOptions(), OverlayFileSystem)) {
   OverlayFileSystem->pushOverlay(InMemoryFileSystem);
@@ -692,10 +695,11 @@ std::unique_ptr<ASTUnit> buildASTFromCodeWithArgs(
     IntrusiveRefCntPtr<llvm::vfs::FileSystem> BaseFS) {
   std::vector<std::unique_ptr<ASTUnit>> ASTs;
   ASTBuilderAction Action(ASTs);
-  llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayFileSystem(
-      new llvm::vfs::OverlayFileSystem(std::move(BaseFS)));
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto OverlayFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(
+          std::move(BaseFS));
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   OverlayFileSystem->pushOverlay(InMemoryFileSystem);
   llvm::IntrusiveRefCntPtr<FileManager> Files(
       new FileManager(FileSystemOptions(), OverlayFileSystem));
diff --git a/clang/test/AST/ByteCode/codegen.cpp b/clang/test/AST/ByteCode/codegen.cpp
index 6f9e75e..1bc756c 100644
--- a/clang/test/AST/ByteCode/codegen.cpp
+++ b/clang/test/AST/ByteCode/codegen.cpp
@@ -23,6 +23,10 @@ S s;
 // CHECK: @sp = constant ptr getelementptr (i8, ptr @s, i64 16), align 8
 float &sp = s.c[3];
 
+namespace NearlyZeroInit {
+  // CHECK: @_ZN14NearlyZeroInit1bE ={{.*}} global{{.*}} { i32, <{ i32, [2147483647 x i32] }> } { i32 1, <{ i32, [2147483647 x i32] }> <{ i32 2, [2147483647 x i32] zeroinitializer }> }{{.*}}
+  struct B { int n; int arr[1024 * 1024 * 1024 * 2u]; } b = {1, {2}};
+}
 
 namespace BaseClassOffsets {
   struct A { int a; };
diff --git a/clang/test/AST/ByteCode/functions.cpp b/clang/test/AST/ByteCode/functions.cpp
index b5e6f5b..363b6a5 100644
--- a/clang/test/AST/ByteCode/functions.cpp
+++ b/clang/test/AST/ByteCode/functions.cpp
@@ -5,6 +5,8 @@
 // RUN: %clang_cc1 -pedantic -std=c++14 -verify=ref,both %s
 // RUN: %clang_cc1 -pedantic -std=c++20 -verify=ref,both %s
 
+#define fold(x) (__builtin_constant_p(0) ? (x) : (x))
+
 constexpr void doNothing() {}
 constexpr int gimme5() {
   doNothing();
@@ -654,14 +656,26 @@ namespace {
 }
 
 namespace FunctionCast {
-  // When folding, we allow functions to be cast to different types. Such
-  // cast functions cannot be called, even if they're constexpr.
+  // When folding, we allow functions to be cast to different types. We only
+  // allow calls if the dynamic type of the pointer matches the type of the
+  // call.
   constexpr int f() { return 1; }
+  constexpr void* f2() { return nullptr; }
+  constexpr int f3(int a) { return a; }
   typedef double (*DoubleFn)();
   typedef int (*IntFn)();
-  int a[(int)DoubleFn(f)()]; // both-error {{variable length array}} \
-                             // both-warning {{are a Clang extension}}
-  int b[(int)IntFn(f)()];    // ok
+  typedef int* (*IntPtrFn)();
+  constexpr int test1 = (int)DoubleFn(f)(); // both-error {{constant expression}} both-note {{reinterpret_cast}}
+  // FIXME: We should print a note explaining the error.
+  constexpr int test2 = (int)fold(DoubleFn(f))(); // both-error {{constant expression}}
+  constexpr int test3 = (int)IntFn(f)();    // no-op cast
+  constexpr int test4 = fold(IntFn(DoubleFn(f)))();
+  constexpr int test5 = IntFn(fold(DoubleFn(f)))(); // both-error {{constant expression}} \
+                                                    // both-note {{cast that performs the conversions of a reinterpret_cast is not allowed in a constant expression}}
+  // FIXME: Interpreter is less strict here.
+  constexpr int test6 = fold(IntPtrFn(f2))() == nullptr; // ref-error {{constant expression}}
+  // FIXME: The following crashes interpreter
+  // constexpr int test6 = fold(IntFn(f3)());
 }
 
 #if __cplusplus >= 202002L
diff --git a/clang/test/AST/ByteCode/intap.cpp b/clang/test/AST/ByteCode/intap.cpp
index 6888387..05ab319b 100644
--- a/clang/test/AST/ByteCode/intap.cpp
+++ b/clang/test/AST/ByteCode/intap.cpp
@@ -292,7 +292,19 @@ constexpr int shifts() { // both-error {{never produces a constant expression}}
   (void)(2 << b); // ref-warning {{shift count is negative}}
   return 1;
 }
-#endif
 
+namespace UnderlyingInt128 {
+  enum F  {
+    a = (__int128)-1
+  };
+
+  constexpr int foo() { // both-error {{never produces a constant expression}}
+    F f = (F)(__int128)10; // both-note 2{{integer value 10 is outside the valid range of values [-1, 0] for the enumeration type 'F'}}
+    return (int)f;
+  }
+  static_assert(foo() == 0, ""); // both-error {{not an integral constant expression}} \
+                                 // both-note {{in call to}}
+}
+#endif
 
 #endif
diff --git a/clang/test/ASTMerge/enum/Inputs/enum3.c b/clang/test/ASTMerge/enum/Inputs/enum3.c
new file mode 100644
index 0000000..32ad536
--- /dev/null
+++ b/clang/test/ASTMerge/enum/Inputs/enum3.c
@@ -0,0 +1,14 @@
+// [C23] missing underlying types
+enum E1 : int {
+  E1Enumerator1
+};
+
+enum E2 {
+  E2Enumerator1
+};
+
+// [C23] Incompatible underlying types
+enum E3 : long {
+  E3Enumerator1
+};
+
diff --git a/clang/test/ASTMerge/enum/Inputs/enum4.c b/clang/test/ASTMerge/enum/Inputs/enum4.c
new file mode 100644
index 0000000..15f5c603
--- /dev/null
+++ b/clang/test/ASTMerge/enum/Inputs/enum4.c
@@ -0,0 +1,14 @@
+// [C23] missing underlying types
+enum E1 {
+  E1Enumerator1
+};
+
+enum E2 : int {
+  E2Enumerator1
+};
+
+// [C23] Incompatible underlying types
+enum E3 : short {
+  E3Enumerator1
+};
+
diff --git a/clang/test/ASTMerge/enum/test2.c b/clang/test/ASTMerge/enum/test2.c
new file mode 100644
index 0000000..bdd8b13
--- /dev/null
+++ b/clang/test/ASTMerge/enum/test2.c
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -std=c23 -emit-pch -o %t.1.ast %S/Inputs/enum3.c
+// RUN: %clang_cc1 -std=c23 -emit-pch -o %t.2.ast %S/Inputs/enum4.c
+// RUN: %clang_cc1 -std=c23 -ast-merge %t.1.ast -ast-merge %t.2.ast -fsyntax-only %s 2>&1 | FileCheck %s
+
+// CHECK: enum3.c:2:6: warning: type 'enum E1' has incompatible definitions in different translation units
+// CHECK: enum4.c:2:6: note: enumeration 'E1' missing fixed underlying type here
+// CHECK: enum3.c:2:6: note: enumeration 'E1' has fixed underlying type here
+// CHECK: enum3.c:6:6: warning: type 'enum E2' has incompatible definitions in different translation units
+// CHECK: enum4.c:6:6: note: enumeration 'E2' has fixed underlying type here
+// CHECK: enum3.c:6:6: note: enumeration 'E2' missing fixed underlying type here
+// CHECK: enum3.c:11:6: warning: type 'enum E3' has incompatible definitions in different translation units
+// CHECK: enum3.c:11:6: note: enumeration 'E3' declared with incompatible fixed underlying types ('long' vs. 'short')
+// CHECK: 3 warnings generated
+
diff --git a/clang/test/ASTSYCL/ast-dump-sycl-kernel-entry-point.cpp b/clang/test/ASTSYCL/ast-dump-sycl-kernel-entry-point.cpp
index b112e9e..1a82bdc 100644
--- a/clang/test/ASTSYCL/ast-dump-sycl-kernel-entry-point.cpp
+++ b/clang/test/ASTSYCL/ast-dump-sycl-kernel-entry-point.cpp
@@ -28,21 +28,21 @@
 // A unique kernel name type is required for each declared kernel entry point.
 template<int, int=0> struct KN;
 
-__attribute__((sycl_kernel_entry_point(KN<1>)))
+[[clang::sycl_kernel_entry_point(KN<1>)]]
 void skep1() {
 }
 // CHECK:      |-FunctionDecl {{.*}} skep1 'void ()'
 // CHECK:      | `-SYCLKernelEntryPointAttr {{.*}} KN<1>
 
 using KN2 = KN<2>;
-__attribute__((sycl_kernel_entry_point(KN2)))
+[[clang::sycl_kernel_entry_point(KN2)]]
 void skep2() {
 }
 // CHECK:      |-FunctionDecl {{.*}} skep2 'void ()'
 // CHECK:      | `-SYCLKernelEntryPointAttr {{.*}} KN2
 
 template<int I> using KNT = KN<I>;
-__attribute__((sycl_kernel_entry_point(KNT<3>)))
+[[clang::sycl_kernel_entry_point(KNT<3>)]]
 void skep3() {
 }
 // CHECK:      |-FunctionDecl {{.*}} skep3 'void ()'
diff --git a/clang/test/Analysis/analyzer-enabled-checkers.c b/clang/test/Analysis/analyzer-enabled-checkers.c
index a632b70..32afcf3 100644
--- a/clang/test/Analysis/analyzer-enabled-checkers.c
+++ b/clang/test/Analysis/analyzer-enabled-checkers.c
@@ -20,7 +20,6 @@
 // CHECK-NEXT: core.NonNullParamChecker
 // CHECK-NEXT: core.NonnilStringConstants
 // CHECK-NEXT: core.NullDereference
-// CHECK-NEXT: core.StackAddrEscapeBase
 // CHECK-NEXT: core.StackAddressEscape
 // CHECK-NEXT: core.UndefinedBinaryOperatorResult
 // CHECK-NEXT: core.VLASize
diff --git a/clang/test/Analysis/placement-new.cpp b/clang/test/Analysis/placement-new.cpp
index 766b11c..50bbde2 100644
--- a/clang/test/Analysis/placement-new.cpp
+++ b/clang/test/Analysis/placement-new.cpp
@@ -166,10 +166,29 @@ void f1() {
     short a;
   };
 
-  // bad (not enough space).
+  // On some systems, (notably before MSVC 16.7), a non-allocating placement
+  // array new could allocate more memory than the nominal size of the array.
+
+  // Since CWG 2382 (implemented in MSVC 16.7), overhead was disallowed for non-allocating placement new.
+  //  See:
+  //    https://learn.microsoft.com/en-us/cpp/overview/visual-cpp-language-conformance?view=msvc-170
+  //    https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2019/p1969r0.html#2382
+
+  // However, as of 17.1, there is a regression when the type comes from a template
+  // parameter where MSVC reintroduces overhead.
+  //  See:
+  //    https://developercommunity.visualstudio.com/t/10777485
+  //    https://godbolt.org/z/E1z1Tsfvj
+
+  // The checker doesn't warn here because this behavior only affects older
+  // MSVC versions (<16.7) or certain specific versions (17.1).
+  // Suppressing warnings avoids false positives on standard-compliant compilers
+  // and modern MSVC versions, but users of affected MSVC versions should be
+  // aware of potential buffer size issues.
+
   const unsigned N = 32;
-  alignas(S) unsigned char buffer1[sizeof(S) * N]; // expected-note {{'buffer1' initialized here}}
-  ::new (buffer1) S[N];                            // expected-warning{{Storage provided to placement new is only 64 bytes, whereas the allocated array type requires more space for internal needs}} expected-note 1 {{}}
+  alignas(S) unsigned char buffer1[sizeof(S) * N]; 
+  ::new (buffer1) S[N]; // no-warning: See comments above
 }
 
 void f2() {
@@ -177,10 +196,11 @@ void f2() {
     short a;
   };
 
-  // maybe ok but we need to warn.
+  // On some systems, placement array new could allocate more memory than the nominal size of the array.
+  // See the comment at f1() above for more details.
   const unsigned N = 32;
-  alignas(S) unsigned char buffer2[sizeof(S) * N + sizeof(int)]; // expected-note {{'buffer2' initialized here}}
-  ::new (buffer2) S[N];                                          // expected-warning{{68 bytes is possibly not enough for array allocation which requires 64 bytes. Current overhead requires the size of 4 bytes}} expected-note 1 {{}}
+  alignas(S) unsigned char buffer2[sizeof(S) * N + sizeof(int)]; 
+  ::new (buffer2) S[N]; // no-warning: See comments above
 }
 } // namespace testArrayTypesAllocation
 
diff --git a/clang/test/Analysis/std-c-library-functions-arg-enabled-checkers.c b/clang/test/Analysis/std-c-library-functions-arg-enabled-checkers.c
index b388c31..77fa037 100644
--- a/clang/test/Analysis/std-c-library-functions-arg-enabled-checkers.c
+++ b/clang/test/Analysis/std-c-library-functions-arg-enabled-checkers.c
@@ -28,7 +28,6 @@
 // CHECK-NEXT: core.NonNullParamChecker
 // CHECK-NEXT: core.NonnilStringConstants
 // CHECK-NEXT: core.NullDereference
-// CHECK-NEXT: core.StackAddrEscapeBase
 // CHECK-NEXT: core.StackAddressEscape
 // CHECK-NEXT: core.UndefinedBinaryOperatorResult
 // CHECK-NEXT: core.VLASize
diff --git a/clang/test/C/C23/n3037.c b/clang/test/C/C23/n3037.c
index ce6f4c4..3748375 100644
--- a/clang/test/C/C23/n3037.c
+++ b/clang/test/C/C23/n3037.c
@@ -401,3 +401,77 @@ _Static_assert(0 == _Generic(inner_anon_tagged.untagged, struct { int i; } : 1,
 // unions and structures are both RecordDecl objects, whereas EnumDecl is not).
 enum { E_Untagged1 } nontag_enum; // both-note {{previous definition is here}}
 _Static_assert(0 == _Generic(nontag_enum, enum { E_Untagged1 } : 1, default : 0)); // both-error {{redefinition of enumerator 'E_Untagged1'}}
+
+// Test that enumerations with mixed underlying types are properly handled.
+enum GH150594_E1 : int { GH150594_Val1 };
+enum GH150594_E2 : int { GH150594_Val2 };
+enum GH150594_E3 { GH150594_Val3 };
+enum GH150594_E4 : int { GH150594_Val4 };
+void GH150594(void) {
+  extern enum GH150594_E1 Fn1(void); // both-note {{previous declaration is here}}
+  extern enum GH150594_E2 Fn2(void); // c17-note {{previous declaration is here}}
+  extern enum GH150594_E3 Fn3(void); // both-note {{previous declaration is here}}
+  extern enum GH150594_E4 Fn4(void); // both-note {{previous declaration is here}}
+  enum GH150594_E1 { GH150594_Val1 };
+  enum GH150594_E2 : int { GH150594_Val2 };
+  enum GH150594_E3 : int { GH150594_Val3 };
+  enum GH150594_E4 : short { GH150594_Val4 };
+  extern enum GH150594_E1 Fn1(void); // both-error {{conflicting types for 'Fn1'}}
+  extern enum GH150594_E2 Fn2(void); // c17-error {{conflicting types for 'Fn2'}}
+  extern enum GH150594_E3 Fn3(void); // both-error {{conflicting types for 'Fn3'}}
+  extern enum GH150594_E4 Fn4(void); // both-error {{conflicting types for 'Fn4'}}
+
+  // Show that two declarations in the same scope give expected diagnostics.
+  enum E1 { e1 };       // both-note {{previous declaration is here}}
+  enum E1 : int { e1 }; // both-error {{enumeration previously declared with nonfixed underlying type}}
+
+  enum E2 : int { e2 }; // both-note {{previous declaration is here}}
+  enum E2 { e2 };       // both-error {{enumeration previously declared with fixed underlying type}}
+
+  enum E3 : int { e3 };   // both-note {{previous declaration is here}}
+  enum E3 : short { e3 }; // both-error {{enumeration redeclared with different underlying type 'short' (was 'int')}}
+
+  typedef short foo;
+  enum E4 : foo { e4 };   // c17-note 2 {{previous definition is here}}
+  enum E4 : short { e4 }; // c17-error {{redefinition of 'E4'}} \
+                             c17-error {{redefinition of enumerator 'e4'}}
+
+  enum E5 : foo { e5 }; // both-note {{previous declaration is here}}
+  enum E5 : int { e5 }; // both-error {{enumeration redeclared with different underlying type 'int' (was 'foo' (aka 'short'))}}
+}
+
+// Test that enumerations are compatible with their underlying type, but still
+// diagnose when "same type" is required rather than merely "compatible type".
+enum E1 : int { e1 }; // Fixed underlying type
+enum E2 { e2 };       // Unfixed underlying type, defaults to int or unsigned int
+
+struct GH149965_1 { int h; };
+// This typeof trick is used to get the underlying type of the enumeration in a
+// platform agnostic way.
+struct GH149965_2 { __typeof__(+(enum E2){}) h; };
+void gh149965(void) {
+  extern struct GH149965_1 x1; // c17-note {{previous declaration is here}}
+  extern struct GH149965_2 x2; // c17-note {{previous declaration is here}}
+
+  // Both the structure and the variable declarations are fine because only a
+  // compatible type is required, not the same type, because the structures are
+  // declared in different scopes.
+  struct GH149965_1 { enum E1 h; };
+  struct GH149965_2 { enum E2 h; };
+
+  extern struct GH149965_1 x1; // c17-error {{redeclaration of 'x1'}}
+  extern struct GH149965_2 x2; // c17-error {{redeclaration of 'x2'}}
+
+  // However, in the same scope, the same type is required, not just compatible
+  // types.
+  // FIXME: this should be an error in both C17 and C23 mode.
+  struct GH149965_3 { int h; };     // c17-note {{previous definition is here}}
+  struct GH149965_3 { enum E1 h; }; // c17-error {{redefinition of 'GH149965_3'}}
+
+  // For Clang, the composite type after declaration merging is the enumeration
+  // type rather than an integer type.
+  enum E1 *eptr;
+  [[maybe_unused]] __typeof__(x1.h) *ptr = eptr;
+  enum E2 *eptr2;
+  [[maybe_unused]] __typeof__(x2.h) *ptr2 = eptr2;
+}
diff --git a/clang/test/CIR/CodeGen/aapcs-volatile-bitfields.c b/clang/test/CIR/CodeGen/aapcs-volatile-bitfields.c
new file mode 100644
index 0000000..3643cf2
--- /dev/null
+++ b/clang/test/CIR/CodeGen/aapcs-volatile-bitfields.c
@@ -0,0 +1,73 @@
+// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -fclangir -emit-cir -fdump-record-layouts %s -o %t.cir 1> %t.cirlayout
+// RUN: FileCheck --input-file=%t.cirlayout %s --check-prefix=CIR-LAYOUT
+
+// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -emit-llvm -fdump-record-layouts %s -o %t.ll 1> %t.ogcglayout
+// RUN: FileCheck --input-file=%t.ogcglayout %s --check-prefix=OGCG-LAYOUT
+
+typedef struct  {
+    unsigned int a : 9;
+    volatile unsigned int b : 1;
+    unsigned int c : 1;
+} st1;
+
+// CIR-LAYOUT:  BitFields:[
+// CIR-LAYOUT-NEXT:    <CIRBitFieldInfo name:a offset:0 size:9 isSigned:0 storageSize:16 storageOffset:0 volatileOffset:0 volatileStorageSize:32 volatileStorageOffset:0>
+// CIR-LAYOUT-NEXT:    <CIRBitFieldInfo name:b offset:9 size:1 isSigned:0 storageSize:16 storageOffset:0 volatileOffset:9 volatileStorageSize:32 volatileStorageOffset:0>
+// CIR-LAYOUT-NEXT:    <CIRBitFieldInfo name:c offset:10 size:1 isSigned:0 storageSize:16 storageOffset:0 volatileOffset:10 volatileStorageSize:32 volatileStorageOffset:0>
+
+// OGCG-LAYOUT:  BitFields:[
+// OGCG-LAYOUT-NEXT:    <CGBitFieldInfo Offset:0 Size:9 IsSigned:0 StorageSize:16 StorageOffset:0 VolatileOffset:0 VolatileStorageSize:32 VolatileStorageOffset:0>
+// OGCG-LAYOUT-NEXT:    <CGBitFieldInfo Offset:9 Size:1 IsSigned:0 StorageSize:16 StorageOffset:0 VolatileOffset:9 VolatileStorageSize:32 VolatileStorageOffset:0>
+// OGCG-LAYOUT-NEXT:    <CGBitFieldInfo Offset:10 Size:1 IsSigned:0 StorageSize:16 StorageOffset:0 VolatileOffset:10 VolatileStorageSize:32 VolatileStorageOffset:0>
+
+// different base types
+typedef struct{
+    volatile  short a : 3;
+    volatile  int b: 13;
+    volatile  long c : 5;
+} st2;
+
+// CIR-LAYOUT: BitFields:[
+// CIR-LAYOUT-NEXT:   <CIRBitFieldInfo name:a offset:0 size:3 isSigned:1 storageSize:32 storageOffset:0 volatileOffset:0 volatileStorageSize:16 volatileStorageOffset:0>
+// CIR-LAYOUT-NEXT:   <CIRBitFieldInfo name:b offset:3 size:13 isSigned:1 storageSize:32 storageOffset:0 volatileOffset:3 volatileStorageSize:32 volatileStorageOffset:0>
+// CIR-LAYOUT-NEXT:   <CIRBitFieldInfo name:c offset:16 size:5 isSigned:1 storageSize:32 storageOffset:0 volatileOffset:16 volatileStorageSize:64 volatileStorageOffset:0>
+
+// OGCG-LAYOUT: BitFields:[
+// OGCG-LAYOUT-NEXT:   <CGBitFieldInfo Offset:0 Size:3 IsSigned:1 StorageSize:32 StorageOffset:0 VolatileOffset:0 VolatileStorageSize:16 VolatileStorageOffset:0>
+// OGCG-LAYOUT-NEXT:   <CGBitFieldInfo Offset:3 Size:13 IsSigned:1 StorageSize:32 StorageOffset:0 VolatileOffset:3 VolatileStorageSize:32 VolatileStorageOffset:0>
+// OGCG-LAYOUT-NEXT:   <CGBitFieldInfo Offset:16 Size:5 IsSigned:1 StorageSize:32 StorageOffset:0 VolatileOffset:16 VolatileStorageSize:64 VolatileStorageOffset:0>
+
+typedef struct{
+    volatile unsigned int a : 3;
+    unsigned int : 0; // zero-length bit-field force next field to aligned int boundary
+    volatile unsigned int b : 5;
+} st3;
+
+// CIR-LAYOUT: BitFields:[
+// CIR-LAYOUT-NEXT:   <CIRBitFieldInfo name:a offset:0 size:3 isSigned:0 storageSize:8 storageOffset:0 volatileOffset:0 volatileStorageSize:32 volatileStorageOffset:0>
+// CIR-LAYOUT-NEXT:   <CIRBitFieldInfo name:b offset:0 size:5 isSigned:0 storageSize:8 storageOffset:4 volatileOffset:0 volatileStorageSize:0 volatileStorageOffset:0>
+
+// OGCG-LAYOUT: BitFields:[
+// OGCG-LAYOUT-NEXT:   <CGBitFieldInfo Offset:0 Size:3 IsSigned:0 StorageSize:8 StorageOffset:0 VolatileOffset:0 VolatileStorageSize:32 VolatileStorageOffset:0>
+// OGCG-LAYOUT-NEXT:   <CGBitFieldInfo Offset:0 Size:5 IsSigned:0 StorageSize:8 StorageOffset:4 VolatileOffset:0 VolatileStorageSize:0 VolatileStorageOffset:0>
+
+typedef struct{
+    volatile unsigned int a : 3;
+    unsigned int z: 2;
+    volatile unsigned int b : 5;
+} st4;
+
+// CIR-LAYOUT: BitFields:[
+// CIR-LAYOUT-NEXT:   <CIRBitFieldInfo name:a offset:0 size:3 isSigned:0 storageSize:16 storageOffset:0 volatileOffset:0 volatileStorageSize:32 volatileStorageOffset:0>
+// CIR-LAYOUT-NEXT:   <CIRBitFieldInfo name:z offset:3 size:2 isSigned:0 storageSize:16 storageOffset:0 volatileOffset:3 volatileStorageSize:32 volatileStorageOffset:0>
+// CIR-LAYOUT-NEXT:   <CIRBitFieldInfo name:b offset:5 size:5 isSigned:0 storageSize:16 storageOffset:0 volatileOffset:5 volatileStorageSize:32 volatileStorageOffset:0>
+
+// OGCG-LAYOUT: BitFields:[
+// OGCG-LAYOUT-NEXT:   <CGBitFieldInfo Offset:0 Size:3 IsSigned:0 StorageSize:16 StorageOffset:0 VolatileOffset:0 VolatileStorageSize:32 VolatileStorageOffset:0>
+// OGCG-LAYOUT-NEXT:   <CGBitFieldInfo Offset:3 Size:2 IsSigned:0 StorageSize:16 StorageOffset:0 VolatileOffset:3 VolatileStorageSize:32 VolatileStorageOffset:0>
+// OGCG-LAYOUT-NEXT:   <CGBitFieldInfo Offset:5 Size:5 IsSigned:0 StorageSize:16 StorageOffset:0 VolatileOffset:5 VolatileStorageSize:32 VolatileStorageOffset:0>
+
+st1 s1;
+st2 s2;
+st3 s3;
+st4 s4;
diff --git a/clang/test/CIR/CodeGen/array-ctor.cpp b/clang/test/CIR/CodeGen/array-ctor.cpp
index b3d81a8..c373acf 100644
--- a/clang/test/CIR/CodeGen/array-ctor.cpp
+++ b/clang/test/CIR/CodeGen/array-ctor.cpp
@@ -33,8 +33,8 @@ void foo() {
 // CIR:   cir.store %[[DECAY]], %[[ITER]] : !cir.ptr<!rec_S>, !cir.ptr<!cir.ptr<!rec_S>>
 // CIR:   cir.do {
 // CIR:     %[[CURRENT:.*]] = cir.load %[[ITER]] : !cir.ptr<!cir.ptr<!rec_S>>, !cir.ptr<!rec_S>
-// CIR:     %[[CONST1:.*]] = cir.const #cir.int<1> : !u64i
 // CIR:     cir.call @_ZN1SC1Ev(%[[CURRENT]]) : (!cir.ptr<!rec_S>) -> ()
+// CIR:     %[[CONST1:.*]] = cir.const #cir.int<1> : !u64i
 // CIR:     %[[NEXT:.*]] = cir.ptr_stride(%[[CURRENT]] : !cir.ptr<!rec_S>, %[[CONST1]] : !u64i), !cir.ptr<!rec_S>
 // CIR:     cir.store %[[NEXT]], %[[ITER]] : !cir.ptr<!rec_S>, !cir.ptr<!cir.ptr<!rec_S>>
 // CIR:     cir.yield
diff --git a/clang/test/CIR/CodeGen/array-dtor.cpp b/clang/test/CIR/CodeGen/array-dtor.cpp
new file mode 100644
index 0000000..3edc6f1
--- /dev/null
+++ b/clang/test/CIR/CodeGen/array-dtor.cpp
@@ -0,0 +1,104 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-lowering-prepare %s -o %t.cir  2> %t-before-lp.cir
+// RUN: FileCheck --input-file=%t-before-lp.cir %s -check-prefix=CIR-BEFORE-LPP
+// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG
+
+struct S {
+    ~S();
+};
+
+void test_cleanup_array() {
+    S s[42];
+}
+
+// CIR-BEFORE-LPP: cir.func{{.*}} @_Z18test_cleanup_arrayv()
+// CIR-BEFORE-LPP:   %[[S:.*]] = cir.alloca !cir.array<!rec_S x 42>, !cir.ptr<!cir.array<!rec_S x 42>>, ["s"]
+// CIR-BEFORE-LPP:   cir.array.dtor %[[S]] : !cir.ptr<!cir.array<!rec_S x 42>> {
+// CIR-BEFORE-LPP:   ^bb0(%arg0: !cir.ptr<!rec_S>
+// CIR-BEFORE-LPP:     cir.call @_ZN1SD1Ev(%arg0) nothrow : (!cir.ptr<!rec_S>) -> ()
+// CIR-BEFORE-LPP:     cir.yield
+// CIR-BEFORE-LPP:   }
+// CIR-BEFORE-LPP:   cir.return
+
+// CIR: cir.func{{.*}} @_Z18test_cleanup_arrayv()
+// CIR:   %[[S:.*]] = cir.alloca !cir.array<!rec_S x 42>, !cir.ptr<!cir.array<!rec_S x 42>>, ["s"]
+// CIR:   %[[CONST41:.*]] = cir.const #cir.int<41> : !u64i
+// CIR:   %[[DECAY:.*]] = cir.cast(array_to_ptrdecay, %[[S]] : !cir.ptr<!cir.array<!rec_S x 42>>), !cir.ptr<!rec_S>
+// CIR:   %[[END_PTR:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_S>, %[[CONST41]] : !u64i), !cir.ptr<!rec_S>
+// CIR:   %[[ITER:.*]] = cir.alloca !cir.ptr<!rec_S>, !cir.ptr<!cir.ptr<!rec_S>>, ["__array_idx"]
+// CIR:   cir.store %[[END_PTR]], %[[ITER]] : !cir.ptr<!rec_S>, !cir.ptr<!cir.ptr<!rec_S>>
+// CIR:   cir.do {
+// CIR:     %[[CURRENT:.*]] = cir.load %[[ITER]] : !cir.ptr<!cir.ptr<!rec_S>>, !cir.ptr<!rec_S>
+// CIR:     cir.call @_ZN1SD1Ev(%[[CURRENT]]) nothrow : (!cir.ptr<!rec_S>) -> ()
+// CIR:     %[[CONST_MINUS1:.*]] = cir.const #cir.int<-1> : !s64i
+// CIR:     %[[NEXT:.*]] = cir.ptr_stride(%[[CURRENT]] : !cir.ptr<!rec_S>, %[[CONST_MINUS1]] : !s64i), !cir.ptr<!rec_S>
+// CIR:     cir.store %[[NEXT]], %[[ITER]] : !cir.ptr<!rec_S>, !cir.ptr<!cir.ptr<!rec_S>>
+// CIR:     cir.yield
+// CIR:   } while {
+// CIR:     %[[CURRENT2:.*]] = cir.load %[[ITER]] : !cir.ptr<!cir.ptr<!rec_S>>, !cir.ptr<!rec_S>
+// CIR:     %[[CMP:.*]] = cir.cmp(ne, %[[CURRENT2]], %[[DECAY]])
+// CIR:     cir.condition(%[[CMP]])
+// CIR:   }
+// CIR:   cir.return
+
+// LLVM: define{{.*}} void @_Z18test_cleanup_arrayv()
+// LLVM:   %[[ARRAY:.*]] = alloca [42 x %struct.S]
+// LLVM:   %[[START:.*]] = getelementptr %struct.S, ptr %[[ARRAY]], i32 0
+// LLVM:   %[[END:.*]] = getelementptr %struct.S, ptr %[[START]], i64 41
+// LLVM:   %[[ITER:.*]] = alloca ptr
+// LLVM:   store ptr %[[END]], ptr %[[ITER]]
+// LLVM:   br label %[[LOOP:.*]]
+// LLVM: [[COND:.*]]:
+// LLVM:   %[[CURRENT_CHECK:.*]] = load ptr, ptr %[[ITER]]
+// LLVM:   %[[DONE:.*]] = icmp ne ptr %[[CURRENT_CHECK]], %[[START]]
+// LLVM:   br i1 %[[DONE]], label %[[LOOP]], label %[[EXIT:.*]]
+// LLVM: [[LOOP]]:
+// LLVM:   %[[CURRENT:.*]] = load ptr, ptr %[[ITER]]
+// LLVM:   call void @_ZN1SD1Ev(ptr %[[CURRENT]])
+// LLVM:   %[[NEXT:.*]] = getelementptr %struct.S, ptr %[[CURRENT]], i64 -1
+// LLVM:   store ptr %[[NEXT]], ptr %[[ITER]]
+// LLVM:   br label %[[COND]]
+// LLVM: [[EXIT]]:
+// LLVM:   ret void
+
+// OGCG: define{{.*}} void @_Z18test_cleanup_arrayv()
+// OGCG:   %[[ARRAY:.*]] = alloca [42 x %struct.S]
+// OGCG:   %[[START:.*]] = getelementptr{{.*}} %struct.S{{.*}}
+// OGCG:   %[[END:.*]] = getelementptr{{.*}} %struct.S{{.*}} i64 42
+// OGCG:   br label %[[LOOP:.*]]
+// OGCG: [[LOOP]]:
+// OGCG:   %[[NEXT:.*]] = phi ptr [ %[[END]], %{{.*}} ], [ %[[LAST:.*]], %[[LOOP]] ]
+// OGCG:   %[[LAST]] = getelementptr{{.*}} %struct.S{{.*}}, ptr %[[NEXT]], i64 -1
+// OGCG:   call void @_ZN1SD1Ev(ptr{{.*}} %[[LAST]])
+// OGCG:   %[[DONE:.*]] = icmp eq ptr %[[LAST]], %[[START]]
+// OGCG:   br i1 %[[DONE]], label %[[EXIT:.*]], label %[[LOOP]]
+// OGCG: [[EXIT]]:
+// OGCG:   ret void
+
+void test_cleanup_zero_length_array() {
+    S s[0];
+}
+
+// CIR-BEFORE-LPP:     cir.func{{.*}} @_Z30test_cleanup_zero_length_arrayv()
+// CIR-BEFORE-LPP:       %[[S:.*]] = cir.alloca !cir.array<!rec_S x 0>, !cir.ptr<!cir.array<!rec_S x 0>>, ["s"]
+// CIR-BEFORE-LPP-NOT:   cir.array.dtor
+// CIR-BEFORE-LPP:       cir.return
+
+// CIR:     cir.func{{.*}} @_Z30test_cleanup_zero_length_arrayv()
+// CIR:       %[[S:.*]] = cir.alloca !cir.array<!rec_S x 0>, !cir.ptr<!cir.array<!rec_S x 0>>, ["s"]
+// CIR-NOT:   cir.do
+// CIR-NOT:   cir.call @_ZN1SD1Ev
+// CIR:       cir.return
+
+// LLVM:     define{{.*}} void @_Z30test_cleanup_zero_length_arrayv()
+// LLVM:       alloca [0 x %struct.S]
+// LLVM-NOT:   call void @_ZN1SD1Ev
+// LLVM:       ret void
+
+// OGCG:     define{{.*}} void @_Z30test_cleanup_zero_length_arrayv()
+// OGCG:       alloca [0 x %struct.S]
+// OGCG-NOT:   call void @_ZN1SD1Ev
+// OGCG:       ret void
diff --git a/clang/test/CIR/CodeGen/builtin_bit.cpp b/clang/test/CIR/CodeGen/builtin_bit.cpp
index 4ac82bd..8b9a187 100644
--- a/clang/test/CIR/CodeGen/builtin_bit.cpp
+++ b/clang/test/CIR/CodeGen/builtin_bit.cpp
@@ -216,6 +216,78 @@ int test_builtin_clzg(unsigned x) {
 // OGCG-LABEL: _Z17test_builtin_clzgj
 // OGCG:         %{{.+}} = call i32 @llvm.ctlz.i32(i32 %{{.+}}, i1 true)
 
+int test_builtin_ffs(int x) {
+  return __builtin_ffs(x);
+}
+
+// CIR-LABEL: _Z16test_builtin_ffsi
+// CIR:         %{{.+}} = cir.ffs %{{.+}} : !s32i
+// CIR:       }
+
+// LLVM-LABEL: _Z16test_builtin_ffsi
+// LLVM:         %[[INPUT:.+]] = load i32, ptr %{{.+}}, align 4
+// LLVM-NEXT:    %[[CTZ:.+]] = call i32 @llvm.cttz.i32(i32 %[[INPUT]], i1 true)
+// LLVM-NEXT:    %[[R1:.+]] = add i32 %[[CTZ]], 1
+// LLVM-NEXT:    %[[IS_ZERO:.+]] = icmp eq i32 %[[INPUT]], 0
+// LLVM-NEXT:    %{{.+}} = select i1 %[[IS_ZERO]], i32 0, i32 %[[R1]]
+// LLVM:       }
+
+// OGCG-LABEL: _Z16test_builtin_ffsi
+// OGCG:         %[[INPUT:.+]] = load i32, ptr %{{.+}}, align 4
+// OGCG-NEXT:    %[[CTZ:.+]] = call i32 @llvm.cttz.i32(i32 %[[INPUT]], i1 true)
+// OGCG-NEXT:    %[[R1:.+]] = add i32 %[[CTZ]], 1
+// OGCG-NEXT:    %[[IS_ZERO:.+]] = icmp eq i32 %[[INPUT]], 0
+// OGCG-NEXT:    %{{.+}} = select i1 %[[IS_ZERO]], i32 0, i32 %[[R1]]
+// OGCG:       }
+
+int test_builtin_ffsl(long x) {
+  return __builtin_ffsl(x);
+}
+
+// CIR-LABEL: _Z17test_builtin_ffsll
+// CIR:         %{{.+}} = cir.ffs %{{.+}} : !s64i
+// CIR:       }
+
+// LLVM-LABEL: _Z17test_builtin_ffsll
+// LLVM:         %[[INPUT:.+]] = load i64, ptr %{{.+}}, align 8
+// LLVM-NEXT:    %[[CTZ:.+]] = call i64 @llvm.cttz.i64(i64 %[[INPUT]], i1 true)
+// LLVM-NEXT:    %[[R1:.+]] = add i64 %[[CTZ]], 1
+// LLVM-NEXT:    %[[IS_ZERO:.+]] = icmp eq i64 %[[INPUT]], 0
+// LLVM-NEXT:    %{{.+}} = select i1 %[[IS_ZERO]], i64 0, i64 %[[R1]]
+// LLVM:       }
+
+// OGCG-LABEL: _Z17test_builtin_ffsll
+// OGCG:         %[[INPUT:.+]] = load i64, ptr %{{.+}}, align 8
+// OGCG-NEXT:    %[[CTZ:.+]] = call i64 @llvm.cttz.i64(i64 %[[INPUT]], i1 true)
+// OGCG-NEXT:    %[[R1:.+]] = add i64 %[[CTZ]], 1
+// OGCG-NEXT:    %[[IS_ZERO:.+]] = icmp eq i64 %[[INPUT]], 0
+// OGCG-NEXT:    %{{.+}} = select i1 %[[IS_ZERO]], i64 0, i64 %[[R1]]
+// OGCG:       }
+
+int test_builtin_ffsll(long long x) {
+  return __builtin_ffsll(x);
+}
+
+// CIR-LABEL: _Z18test_builtin_ffsllx
+// CIR:         %{{.+}} = cir.ffs %{{.+}} : !s64i
+// CIR:       }
+
+// LLVM-LABEL: _Z18test_builtin_ffsllx
+// LLVM:         %[[INPUT:.+]] = load i64, ptr %{{.+}}, align 8
+// LLVM-NEXT:    %[[CTZ:.+]] = call i64 @llvm.cttz.i64(i64 %[[INPUT]], i1 true)
+// LLVM-NEXT:    %[[R1:.+]] = add i64 %[[CTZ]], 1
+// LLVM-NEXT:    %[[IS_ZERO:.+]] = icmp eq i64 %[[INPUT]], 0
+// LLVM-NEXT:    %{{.+}} = select i1 %[[IS_ZERO]], i64 0, i64 %[[R1]]
+// LLVM:       }
+
+// OGCG-LABEL: _Z18test_builtin_ffsllx
+// OGCG:         %[[INPUT:.+]] = load i64, ptr %{{.+}}, align 8
+// OGCG-NEXT:    %[[CTZ:.+]] = call i64 @llvm.cttz.i64(i64 %[[INPUT]], i1 true)
+// OGCG-NEXT:    %[[R1:.+]] = add i64 %[[CTZ]], 1
+// OGCG-NEXT:    %[[IS_ZERO:.+]] = icmp eq i64 %[[INPUT]], 0
+// OGCG-NEXT:    %{{.+}} = select i1 %[[IS_ZERO]], i64 0, i64 %[[R1]]
+// OGCG:       }
+
 int test_builtin_parity(unsigned x) {
   return __builtin_parity(x);
 }
diff --git a/clang/test/CIR/CodeGen/call.c b/clang/test/CIR/CodeGen/call.c
index 83a66fc..9d516c6 100644
--- a/clang/test/CIR/CodeGen/call.c
+++ b/clang/test/CIR/CodeGen/call.c
@@ -11,7 +11,7 @@ struct S {
 };
 
 void f1(struct S);
-void f2() {
+void f2(void) {
   struct S s;
   f1(s);
 }
@@ -28,8 +28,8 @@ void f2() {
 // OGCG:         %[[S:.+]] = load i64, ptr %{{.+}}, align 4
 // OGCG-NEXT:    call void @f1(i64 %[[S]])
 
-struct S f3();
-void f4() {
+struct S f3(void);
+void f4(void) {
   struct S s = f3();
 }
 
@@ -38,11 +38,11 @@ void f4() {
 // CIR-NEXT:    cir.store align(4) %[[S]], %{{.+}} : !rec_S, !cir.ptr<!rec_S>
 
 // LLVM-LABEL: define{{.*}} void @f4() {
-// LLVM:         %[[S:.+]] = call %struct.S (...) @f3()
+// LLVM:         %[[S:.+]] = call %struct.S @f3()
 // LLVM-NEXT:    store %struct.S %[[S]], ptr %{{.+}}, align 4
 
 // OGCG-LABEL: define{{.*}} void @f4() #0 {
-// OGCG:         %[[S:.+]] = call i64 (...) @f3()
+// OGCG:         %[[S:.+]] = call i64 @f3()
 // OGCG-NEXT:    store i64 %[[S]], ptr %{{.+}}, align 4
 
 struct Big {
@@ -50,9 +50,9 @@ struct Big {
 };
 
 void f5(struct Big);
-struct Big f6();
+struct Big f6(void);
 
-void f7() {
+void f7(void) {
   struct Big b;
   f5(b);
 }
@@ -69,7 +69,7 @@ void f7() {
 // OGCG:         %[[B:.+]] = alloca %struct.Big, align 8
 // OGCG-NEXT:    call void @f5(ptr noundef byval(%struct.Big) align 8 %[[B]])
 
-void f8() {
+void f8(void) {
   struct Big b = f6();
 }
 
@@ -78,14 +78,14 @@ void f8() {
 // CIR:         cir.store align(4) %[[B]], %{{.+}} : !rec_Big, !cir.ptr<!rec_Big>
 
 // LLVM-LABEL: define{{.*}} void @f8() {
-// LLVM:        %[[B:.+]] = call %struct.Big (...) @f6()
+// LLVM:        %[[B:.+]] = call %struct.Big @f6()
 // LLVM-NEXT:   store %struct.Big %[[B]], ptr %{{.+}}, align 4
 
 // OGCG-LABEL: define{{.*}} void @f8() #0 {
 // OGCG:         %[[B:.+]] = alloca %struct.Big, align 4
-// OGCG-NEXT:    call void (ptr, ...) @f6(ptr dead_on_unwind writable sret(%struct.Big) align 4 %[[B]])
+// OGCG-NEXT:    call void @f6(ptr dead_on_unwind writable sret(%struct.Big) align 4 %[[B]])
 
-void f9() {
+void f9(void) {
   f1(f3());
 }
 
@@ -98,14 +98,14 @@ void f9() {
 
 // LLVM-LABEL: define{{.*}} void @f9() {
 // LLVM:         %[[SLOT:.+]] = alloca %struct.S, i64 1, align 4
-// LLVM-NEXT:    %[[RET:.+]] = call %struct.S (...) @f3()
+// LLVM-NEXT:    %[[RET:.+]] = call %struct.S @f3()
 // LLVM-NEXT:    store %struct.S %[[RET]], ptr %[[SLOT]], align 4
 // LLVM-NEXT:    %[[ARG:.+]] = load %struct.S, ptr %[[SLOT]], align 4
 // LLVM-NEXT:    call void @f1(%struct.S %[[ARG]])
 
 // OGCG-LABEL: define{{.*}} void @f9() #0 {
 // OGCG:         %[[SLOT:.+]] = alloca %struct.S, align 4
-// OGCG-NEXT:    %[[RET:.+]] = call i64 (...) @f3()
+// OGCG-NEXT:    %[[RET:.+]] = call i64 @f3()
 // OGCG-NEXT:    store i64 %[[RET]], ptr %[[SLOT]], align 4
 // OGCG-NEXT:    %[[ARG:.+]] = load i64, ptr %[[SLOT]], align 4
 // OGCG-NEXT:    call void @f1(i64 %[[ARG]])
diff --git a/clang/test/CIR/CodeGen/complex-mul-div.cpp b/clang/test/CIR/CodeGen/complex-mul-div.cpp
new file mode 100644
index 0000000..9d71ef7
--- /dev/null
+++ b/clang/test/CIR/CodeGen/complex-mul-div.cpp
@@ -0,0 +1,280 @@
+// complex-range basic
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -complex-range=basic -Wno-unused-value -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-canonicalize -o %t.cir %s 2>&1 | FileCheck --check-prefix=CIR-BEFORE-BASIC %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=basic -Wno-unused-value -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefixes=CIR-AFTER-INT,CIR-AFTER-MUL-COMBINED
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=basic -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefixes=LLVM-INT,LLVM-MUL-COMBINED
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=basic -Wno-unused-value -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefixes=OGCG-INT,OGCG-MUL-COMBINED
+
+// complex-range improved
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -complex-range=improved -Wno-unused-value -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-canonicalize -o %t.cir %s 2>&1 | FileCheck --check-prefix=CIR-BEFORE-IMPROVED %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=improved -Wno-unused-value -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefixes=CIR-AFTER-INT,CIR-AFTER-MUL-COMBINED
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=improved -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefixes=LLVM-INT,LLVM-MUL-COMBINED
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=improved -Wno-unused-value -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefixes=OGCG-INT,OGCG-MUL-COMBINED
+
+// complex-range promoted
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -complex-range=promoted -Wno-unused-value -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-canonicalize -o %t.cir %s 2>&1 | FileCheck --check-prefix=CIR-BEFORE-PROMOTED %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=promoted -Wno-unused-value -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefixes=CIR-AFTER-INT,CIR-AFTER-MUL-COMBINED
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=promoted -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefixes=LLVM-INT,LLVM-MUL-COMBINED
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=promoted -Wno-unused-value -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefixes=OGCG-INT,OGCG-MUL-COMBINED
+
+// complex-range full
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -complex-range=full -Wno-unused-value -fclangir -emit-cir -mmlir --mlir-print-ir-before=cir-canonicalize -o %t.cir %s 2>&1 | FileCheck --check-prefix=CIR-BEFORE-FULL %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=full -Wno-unused-value -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefixes=CIR-AFTER-FULL,CIR-AFTER-INT
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=full -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefixes=LLVM-FULL,LLVM-INT
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -complex-range=full -Wno-unused-value -emit-llvm %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefixes=OGCG-FULL,OGCG-INT
+
+void foo() {
+  float _Complex a;
+  float _Complex b;
+  float _Complex c = a * b;
+}
+
+// CIR-BEFORE-BASIC: %{{.*}} = cir.complex.mul {{.*}}, {{.*}} range(basic) : !cir.complex<!cir.float>
+
+// CIR-BEFORE-IMPROVED: %{{.*}} = cir.complex.mul {{.*}}, {{.*}} range(improved) : !cir.complex<!cir.float>
+
+// CIR-BEFORE-PROMOTED: %{{.*}} = cir.complex.mul {{.*}}, {{.*}} range(promoted) : !cir.complex<!cir.float>
+
+// CIR-AFTER-MUL-COMBINED: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a"]
+// CIR-AFTER-MUL-COMBINED: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["b"]
+// CIR-AFTER-MUL-COMBINED: %[[C_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["c", init]
+// CIR-AFTER-MUL-COMBINED: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR-AFTER-MUL-COMBINED: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR-AFTER-MUL-COMBINED: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.complex<!cir.float> -> !cir.float
+// CIR-AFTER-MUL-COMBINED: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.complex<!cir.float> -> !cir.float
+// CIR-AFTER-MUL-COMBINED: %[[B_REAL:.*]] = cir.complex.real %[[TMP_B]] : !cir.complex<!cir.float> -> !cir.float
+// CIR-AFTER-MUL-COMBINED: %[[B_IMAG:.*]] = cir.complex.imag %[[TMP_B]] : !cir.complex<!cir.float> -> !cir.float
+// CIR-AFTER-MUL-COMBINED: %[[MUL_AR_BR:.*]] = cir.binop(mul, %[[A_REAL]], %[[B_REAL]]) : !cir.float
+// CIR-AFTER-MUL-COMBINED: %[[MUL_AI_BI:.*]] = cir.binop(mul, %[[A_IMAG]], %[[B_IMAG]]) : !cir.float
+// CIR-AFTER-MUL-COMBINED: %[[MUL_AR_BI:.*]] = cir.binop(mul, %[[A_REAL]], %[[B_IMAG]]) : !cir.float
+// CIR-AFTER-MUL-COMBINED: %[[MUL_AI_BR:.*]] = cir.binop(mul, %[[A_IMAG]], %[[B_REAL]]) : !cir.float
+// CIR-AFTER-MUL-COMBINED: %[[C_REAL:.*]] = cir.binop(sub, %[[MUL_AR_BR]], %[[MUL_AI_BI]]) : !cir.float
+// CIR-AFTER-MUL-COMBINED: %[[C_IMAG:.*]] = cir.binop(add, %[[MUL_AR_BI]], %[[MUL_AI_BR]]) : !cir.float
+// CIR-AFTER-MUL-COMBINED: %[[RESULT:.*]] = cir.complex.create %[[C_REAL]], %[[C_IMAG]] : !cir.float -> !cir.complex<!cir.float>
+// CIR-AFTER-MUL-COMBINED: cir.store{{.*}} %[[RESULT]], %[[C_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
+
+// LLVM-MUL-COMBINED: %[[A_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM-MUL-COMBINED: %[[B_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM-MUL-COMBINED: %[[C_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM-MUL-COMBINED: %[[TMP_A:.*]] = load { float, float }, ptr %[[A_ADDR]], align 4
+// LLVM-MUL-COMBINED: %[[TMP_B:.*]] = load { float, float }, ptr %[[B_ADDR]], align 4
+// LLVM-MUL-COMBINED: %[[A_REAL:.*]] = extractvalue { float, float } %[[TMP_A]], 0
+// LLVM-MUL-COMBINED: %[[A_IMAG:.*]] = extractvalue { float, float } %[[TMP_A]], 1
+// LLVM-MUL-COMBINED: %[[B_REAL:.*]] = extractvalue { float, float } %[[TMP_B]], 0
+// LLVM-MUL-COMBINED: %[[B_IMAG:.*]] = extractvalue { float, float } %[[TMP_B]], 1
+// LLVM-MUL-COMBINED: %[[MUL_AR_BR:.*]] = fmul float %[[A_REAL]], %[[B_REAL]]
+// LLVM-MUL-COMBINED: %[[MUL_AI_BI:.*]] = fmul float %[[A_IMAG]], %[[B_IMAG]]
+// LLVM-MUL-COMBINED: %[[MUL_AR_BI:.*]] = fmul float %[[A_REAL]], %[[B_IMAG]]
+// LLVM-MUL-COMBINED: %[[MUL_AI_BR:.*]] = fmul float %[[A_IMAG]], %[[B_REAL]]
+// LLVM-MUL-COMBINED: %[[C_REAL:.*]] = fsub float %[[MUL_AR_BR]], %[[MUL_AI_BI]]
+// LLVM-MUL-COMBINED: %[[C_IMAG:.*]] = fadd float %[[MUL_AR_BI]], %[[MUL_AI_BR]]
+// LLVM-MUL-COMBINED: %[[MUL_A_B:.*]] = insertvalue { float, float } {{.*}}, float %[[C_REAL]], 0
+// LLVM-MUL-COMBINED: %[[RESULT:.*]] = insertvalue { float, float } %[[MUL_A_B]], float %[[C_IMAG]], 1
+// LLVM-MUL-COMBINED: store { float, float } %[[RESULT]], ptr %[[C_ADDR]], align 4
+
+// OGCG-MUL-COMBINED: %[[A_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG-MUL-COMBINED: %[[B_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG-MUL-COMBINED: %[[C_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG-MUL-COMBINED: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 0
+// OGCG-MUL-COMBINED: %[[A_REAL:.*]] = load float, ptr %[[A_REAL_PTR]], align 4
+// OGCG-MUL-COMBINED: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 1
+// OGCG-MUL-COMBINED: %[[A_IMAG:.*]] = load float, ptr %[[A_IMAG_PTR]], align 4
+// OGCG-MUL-COMBINED: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 0
+// OGCG-MUL-COMBINED: %[[B_REAL:.*]] = load float, ptr %[[B_REAL_PTR]], align 4
+// OGCG-MUL-COMBINED: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 1
+// OGCG-MUL-COMBINED: %[[B_IMAG:.*]] = load float, ptr %[[B_IMAG_PTR]], align 4
+// OGCG-MUL-COMBINED: %[[MUL_AR_BR:.*]] = fmul float %[[A_REAL]], %[[B_REAL]]
+// OGCG-MUL-COMBINED: %[[MUL_AI_BI:.*]] = fmul float %[[A_IMAG]], %[[B_IMAG]]
+// OGCG-MUL-COMBINED: %[[MUL_AR_BI:.*]] = fmul float %[[A_REAL]], %[[B_IMAG]]
+// OGCG-MUL-COMBINED: %[[MUL_AI_BR:.*]] = fmul float %[[A_IMAG]], %[[B_REAL]]
+// OGCG-MUL-COMBINED: %[[C_REAL:.*]] = fsub float %[[MUL_AR_BR]], %[[MUL_AI_BI]]
+// OGCG-MUL-COMBINED: %[[C_IMAG:.*]] = fadd float %[[MUL_AR_BI]], %[[MUL_AI_BR]]
+// OGCG-MUL-COMBINED: %[[C_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[C_ADDR]], i32 0, i32 0
+// OGCG-MUL-COMBINED: %[[C_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[C_ADDR]], i32 0, i32 1
+// OGCG-MUL-COMBINED: store float %[[C_REAL]], ptr %[[C_REAL_PTR]], align 4
+// OGCG-MUL-COMBINED: store float %[[C_IMAG]], ptr %[[C_IMAG_PTR]], align 4
+
+// CIR-BEFORE-FULL: %{{.*}} = cir.complex.mul {{.*}}, {{.*}} range(full) : !cir.complex<!cir.float>
+
+// CIR-AFTER-FULL: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a"]
+// CIR-AFTER-FULL: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["b"]
+// CIR-AFTER-FULL: %[[C_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["c", init]
+// CIR-AFTER-FULL: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR-AFTER-FULL: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR-AFTER-FULL: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.complex<!cir.float> -> !cir.float
+// CIR-AFTER-FULL: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.complex<!cir.float> -> !cir.float
+// CIR-AFTER-FULL: %[[B_REAL:.*]] = cir.complex.real %[[TMP_B]] : !cir.complex<!cir.float> -> !cir.float
+// CIR-AFTER-FULL: %[[B_IMAG:.*]] = cir.complex.imag %[[TMP_B]] : !cir.complex<!cir.float> -> !cir.float
+// CIR-AFTER-FULL: %[[MUL_AR_BR:.*]] = cir.binop(mul, %[[A_REAL]], %[[B_REAL]]) : !cir.float
+// CIR-AFTER-FULL: %[[MUL_AI_BI:.*]] = cir.binop(mul, %[[A_IMAG]], %[[B_IMAG]]) : !cir.float
+// CIR-AFTER-FULL: %[[MUL_AR_BI:.*]] = cir.binop(mul, %[[A_REAL]], %[[B_IMAG]]) : !cir.float
+// CIR-AFTER-FULL: %[[MUL_AI_BR:.*]] = cir.binop(mul, %[[A_IMAG]], %[[B_REAL]]) : !cir.float
+// CIR-AFTER-FULL: %[[C_REAL:.*]] = cir.binop(sub, %[[MUL_AR_BR]], %[[MUL_AI_BI]]) : !cir.float
+// CIR-AFTER-FULL: %[[C_IMAG:.*]] = cir.binop(add, %[[MUL_AR_BI]], %[[MUL_AI_BR]]) : !cir.float
+// CIR-AFTER-FULL: %[[COMPLEX:.*]] = cir.complex.create %[[C_REAL]], %[[C_IMAG]] : !cir.float -> !cir.complex<!cir.float>
+// CIR-AFTER-FULL: %[[IS_C_REAL_NAN:.*]] = cir.cmp(ne, %[[C_REAL]], %[[C_REAL]]) : !cir.float, !cir.bool
+// CIR-AFTER-FULL: %[[IS_C_IMAG_NAN:.*]] = cir.cmp(ne, %[[C_IMAG]], %[[C_IMAG]]) : !cir.float, !cir.bool
+// CIR-AFTER-FULL: %[[CONST_FALSE:.*]] = cir.const #false
+// CIR-AFTER-FULL: %[[SELECT_CONDITION:.*]] = cir.select if %[[IS_C_REAL_NAN]] then %[[IS_C_IMAG_NAN]] else %[[CONST_FALSE]] : (!cir.bool, !cir.bool, !cir.bool) -> !cir.bool
+// CIR-AFTER-FULL: %[[RESULT:.*]] = cir.ternary(%[[SELECT_CONDITION]], true {
+// CIR-AFTER-FULL:   %[[LIBC_COMPLEX:.*]] = cir.call @__mulsc3(%[[A_REAL]], %[[A_IMAG]], %[[B_REAL]], %[[B_IMAG]]) : (!cir.float, !cir.float, !cir.float, !cir.float) -> !cir.complex<!cir.float>
+// CIR-AFTER-FULL:   cir.yield %[[LIBC_COMPLEX]] : !cir.complex<!cir.float>
+// CIR-AFTER-FULL: }, false {
+// CIR-AFTER-FULL:   cir.yield %[[COMPLEX]] : !cir.complex<!cir.float>
+// CIR-AFTER-FULL: }) : (!cir.bool) -> !cir.complex<!cir.float>
+// CIR-AFTER-FULL: cir.store{{.*}} %[[RESULT]], %[[C_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
+
+// LLVM-FULL: %[[A_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM-FULL: %[[B_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM-FULL: %[[C_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM-FULL: %[[TMP_A:.*]] = load { float, float }, ptr %[[A_ADDR]], align 4
+// LLVM-FULL: %[[TMP_B:.*]] = load { float, float }, ptr %[[B_ADDR]], align 4
+// LLVM-FULL: %[[A_REAL:.*]] = extractvalue { float, float } %[[TMP_A]], 0
+// LLVM-FULL: %[[A_IMAG:.*]] = extractvalue { float, float } %[[TMP_A]], 1
+// LLVM-FULL: %[[B_REAL:.*]] = extractvalue { float, float } %[[TMP_B]], 0
+// LLVM-FULL: %[[B_IMAG:.*]] = extractvalue { float, float } %[[TMP_B]], 1
+// LLVM-FULL: %[[MUL_AR_BR:.*]] = fmul float %[[A_REAL]], %[[B_REAL]]
+// LLVM-FULL: %[[MUL_AI_BI:.*]] = fmul float %[[A_IMAG]], %[[B_IMAG]]
+// LLVM-FULL: %[[MUL_AR_BI:.*]] = fmul float %[[A_REAL]], %[[B_IMAG]]
+// LLVM-FULL: %[[MUL_AI_BR:.*]] = fmul float %[[A_IMAG]], %[[B_REAL]]
+// LLVM-FULL: %[[C_REAL:.*]] = fsub float %[[MUL_AR_BR]], %[[MUL_AI_BI]]
+// LLVM-FULL: %[[C_IMAG:.*]] = fadd float %[[MUL_AR_BI]], %[[MUL_AI_BR]]
+// LLVM-FULL: %[[MUL_A_B:.*]] = insertvalue { float, float } {{.*}}, float %[[C_REAL]], 0
+// LLVM-FULL: %[[COMPLEX:.*]] = insertvalue { float, float } %[[MUL_A_B]], float %[[C_IMAG]], 1
+// LLVM-FULL: %[[IS_C_REAL_NAN:.*]] = fcmp une float %[[C_REAL]], %[[C_REAL]]
+// LLVM-FULL: %[[IS_C_IMAG_NAN:.*]] = fcmp une float %[[C_IMAG]], %[[C_IMAG]]
+// LLVM-FULL: %[[SELECT_CONDITION:.*]] = and i1 %[[IS_C_REAL_NAN]], %[[IS_C_IMAG_NAN]]
+// LLVM-FULL: br i1 %[[SELECT_CONDITION]], label %[[THEN_LABEL:.*]], label %[[ELSE_LABEL:.*]]
+// LLVM-FULL: [[THEN_LABEL]]:
+// LLVM-FULL:  %[[LIBC_COMPLEX:.*]] = call { float, float } @__mulsc3(float %[[A_REAL]], float %[[A_IMAG]], float %[[B_REAL]], float %[[B_IMAG]])
+// LLVM-FULL:  br label %[[PHI_BRANCH:.*]]
+// LLVM-FULL: [[ELSE_LABEL]]:
+// LLVM-FULL:  br label %[[PHI_BRANCH:]]
+// LLVM-FULL: [[PHI_BRANCH:]]:
+// LLVM-FULL:  %[[RESULT:.*]] = phi { float, float } [ %[[COMPLEX]], %[[ELSE_LABEL]] ], [ %[[LIBC_COMPLEX]], %[[THEN_LABEL]] ]
+// LLVM-FULL:  br label %[[END_LABEL:.*]]
+// LLVM-FULL: [[END_LABEL]]:
+// LLVM-FULL:  store { float, float } %[[RESULT]], ptr %[[C_ADDR]], align 4
+
+// OGCG-FULL: %[[A_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG-FULL: %[[B_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG-FULL: %[[C_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG-FULL: %[[COMPLEX_CALL_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG-FULL: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 0
+// OGCG-FULL: %[[A_REAL:.*]] = load float, ptr %[[A_REAL_PTR]], align 4
+// OGCG-FULL: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 1
+// OGCG-FULL: %[[A_IMAG:.*]] = load float, ptr %[[A_IMAG_PTR]], align 4
+// OGCG-FULL: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 0
+// OGCG-FULL: %[[B_REAL:.*]] = load float, ptr %[[B_REAL_PTR]], align 4
+// OGCG-FULL: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 1
+// OGCG-FULL: %[[B_IMAG:.*]] = load float, ptr %[[B_IMAG_PTR]], align 4
+// OGCG-FULL: %[[MUL_AR_BR:.*]] = fmul float %[[A_REAL]], %[[B_REAL]]
+// OGCG-FULL: %[[MUL_AI_BI:.*]] = fmul float %[[A_IMAG]], %[[B_IMAG]]
+// OGCG-FULL: %[[MUL_AR_BI:.*]] = fmul float %[[A_REAL]], %[[B_IMAG]]
+// OGCG-FULL: %[[MUL_AI_BR:.*]] = fmul float %[[A_IMAG]], %[[B_REAL]]
+// OGCG-FULL: %[[C_REAL:.*]] = fsub float %[[MUL_AR_BR]], %[[MUL_AI_BI]]
+// OGCG-FULL: %[[C_IMAG:.*]] = fadd float %[[MUL_AR_BI]], %[[MUL_AI_BR]]
+// OGCG-FULL: %[[IS_C_REAL_NAN:.*]] = fcmp uno float %[[C_REAL]], %[[C_REAL]]
+// OGCG-FULL: br i1 %[[IS_C_REAL_NAN]], label %[[COMPLEX_IS_IMAG_NAN:.*]], label %[[END_LABEL:.*]], !prof !2
+// OGCG-FULL: [[COMPLEX_IS_IMAG_NAN]]:
+// OGCG-FULL:  %[[IS_C_IMAG_NAN:.*]] = fcmp uno float %[[C_IMAG]], %[[C_IMAG]]
+// OGCG-FULL:  br i1 %[[IS_C_IMAG_NAN]], label %[[COMPLEX_LIB_CALL:.*]], label %[[END_LABEL]], !prof !2
+// OGCG-FULL: [[COMPLEX_LIB_CALL]]:
+// OGCG-FULL:  %[[CALL_RESULT:.*]] = call noundef <2 x float> @__mulsc3(float noundef %[[A_REAL]], float noundef %[[A_IMAG]], float noundef %[[B_REAL]], float noundef %[[B_IMAG]])
+// OGCG-FULL:  store <2 x float> %[[CALL_RESULT]], ptr %[[COMPLEX_CALL_ADDR]], align 4
+// OGCG-FULL:  %[[COMPLEX_CALL_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX_CALL_ADDR]], i32 0, i32 0
+// OGCG-FULL:  %[[COMPLEX_CALL_REAL:.*]] = load float, ptr %[[COMPLEX_CALL_REAL_PTR]], align 4
+// OGCG-FULL:  %[[COMPLEX_CALL_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[COMPLEX_CALL_ADDR]], i32 0, i32 1
+// OGCG-FULL:  %[[COMPLEX_CALL_IMAG:.*]] = load float, ptr %[[COMPLEX_CALL_IMAG_PTR]], align 4
+// OGCG-FULL:  br label %[[END_LABEL]]
+// OGCG-FULL: [[END_LABEL]]:
+// OGCG-FULL:  %[[FINAL_REAL:.*]] = phi float [ %[[C_REAL]], %[[ENTRY:.*]] ], [ %[[C_REAL]], %[[COMPLEX_IS_IMAG_NAN]] ], [ %[[COMPLEX_CALL_REAL]], %[[COMPLEX_LIB_CALL]] ]
+// OGCG-FULL:  %[[FINAL_IMAG:.*]] = phi float [ %[[C_IMAG]], %[[ENTRY]] ], [ %[[C_IMAG]], %[[COMPLEX_IS_IMAG_NAN]] ], [ %[[COMPLEX_CALL_IMAG]], %[[COMPLEX_LIB_CALL]] ]
+// OGCG-FULL:  %[[C_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[C_ADDR]], i32 0, i32 0
+// OGCG-FULL:  %[[C_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[C_ADDR]], i32 0, i32 1
+// OGCG-FULL:  store float %[[FINAL_REAL]], ptr %[[C_REAL_PTR]], align 4
+// OGCG-FULL:  store float %[[FINAL_IMAG]], ptr %[[C_IMAG_PTR]], align 4
+
+void foo1() {
+  int _Complex a;
+  int _Complex b;
+  int _Complex c = a * b;
+}
+
+// CIR-BEFORE-BASIC: %{{.*}} = cir.complex.mul {{.*}}, {{.*}} range(basic) : !cir.complex<!s32i>
+
+// CIR-BEFORE-IMPROVED: %{{.*}} = cir.complex.mul {{.*}}, {{.*}} range(improved) : !cir.complex<!s32i>
+
+// CIR-BEFORE-PROMOTED: %{{.*}} = cir.complex.mul {{.*}}, {{.*}} range(promoted) : !cir.complex<!s32i>
+
+// CIR-BEFORE-FULL: %{{.*}} = cir.complex.mul {{.*}}, {{.*}} range(full) : !cir.complex<!s32i>
+
+// CIR-AFTER-INT: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["a"]
+// CIR-AFTER-INT: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["b"]
+// CIR-AFTER-INT: %[[C_ADDR:.*]] = cir.alloca !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>, ["c", init]
+// CIR-AFTER-INT: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CIR-AFTER-INT: %[[TMP_B:.*]] = cir.load{{.*}} %[[B_ADDR]] : !cir.ptr<!cir.complex<!s32i>>, !cir.complex<!s32i>
+// CIR-AFTER-INT: %[[A_REAL:.*]] = cir.complex.real %[[TMP_A]] : !cir.complex<!s32i> -> !s32i
+// CIR-AFTER-INT: %[[A_IMAG:.*]] = cir.complex.imag %[[TMP_A]] : !cir.complex<!s32i> -> !s32i
+// CIR-AFTER-INT: %[[B_REAL:.*]] = cir.complex.real %[[TMP_B]] : !cir.complex<!s32i> -> !s32i
+// CIR-AFTER-INT: %[[B_IMAG:.*]] = cir.complex.imag %[[TMP_B]] : !cir.complex<!s32i> -> !s32i
+// CIR-AFTER-INT: %[[MUL_AR_BR:.*]] = cir.binop(mul, %[[A_REAL]], %[[B_REAL]]) : !s32i
+// CIR-AFTER-INT: %[[MUL_AI_BI:.*]] = cir.binop(mul, %[[A_IMAG]], %[[B_IMAG]]) : !s32i
+// CIR-AFTER-INT: %[[MUL_AR_BI:.*]] = cir.binop(mul, %[[A_REAL]], %[[B_IMAG]]) : !s32i
+// CIR-AFTER-INT: %[[MUL_AI_BR:.*]] = cir.binop(mul, %[[A_IMAG]], %[[B_REAL]]) : !s32i
+// CIR-AFTER-INT: %[[C_REAL:.*]] = cir.binop(sub, %[[MUL_AR_BR]], %[[MUL_AI_BI]]) : !s32i
+// CIR-AFTER-INT: %[[C_IMAG:.*]] = cir.binop(add, %[[MUL_AR_BI]], %[[MUL_AI_BR]]) : !s32i
+// CIR-AFTER-INT: %[[RESULT:.*]] = cir.complex.create %[[C_REAL]], %[[C_IMAG]] : !s32i -> !cir.complex<!s32i>
+// CIR-AFTER-INT: cir.store{{.*}} %[[RESULT]], %[[C_ADDR]] : !cir.complex<!s32i>, !cir.ptr<!cir.complex<!s32i>>
+
+// LLVM-INT: %[[A_ADDR:.*]] = alloca { i32, i32 }, i64 1, align 4
+// LLVM-INT: %[[B_ADDR:.*]] = alloca { i32, i32 }, i64 1, align 4
+// LLVM-INT: %[[C_ADDR:.*]] = alloca { i32, i32 }, i64 1, align 4
+// LLVM-INT: %[[TMP_A:.*]] = load { i32, i32 }, ptr %[[A_ADDR]], align 4
+// LLVM-INT: %[[TMP_B:.*]] = load { i32, i32 }, ptr %[[B_ADDR]], align 4
+// LLVM-INT: %[[A_REAL:.*]] = extractvalue { i32, i32 } %[[TMP_A]], 0
+// LLVM-INT: %[[A_IMAG:.*]] = extractvalue { i32, i32 } %[[TMP_A]], 1
+// LLVM-INT: %[[B_REAL:.*]] = extractvalue { i32, i32 } %[[TMP_B]], 0
+// LLVM-INT: %[[B_IMAG:.*]] = extractvalue { i32, i32 } %[[TMP_B]], 1
+// LLVM-INT: %[[MUL_AR_BR:.*]] = mul i32 %[[A_REAL]], %[[B_REAL]]
+// LLVM-INT: %[[MUL_AI_BI:.*]] = mul i32 %[[A_IMAG]], %[[B_IMAG]]
+// LLVM-INT: %[[MUL_AR_BI:.*]] = mul i32 %[[A_REAL]], %[[B_IMAG]]
+// LLVM-INT: %[[MUL_AI_BR:.*]] = mul i32 %[[A_IMAG]], %[[B_REAL]]
+// LLVM-INT: %[[C_REAL:.*]] = sub i32 %[[MUL_AR_BR]], %[[MUL_AI_BI]]
+// LLVM-INT: %[[C_IMAG:.*]] = add i32 %[[MUL_AR_BI]], %[[MUL_AI_BR]]
+// LLVM-INT: %[[MUL_A_B:.*]] = insertvalue { i32, i32 } {{.*}}, i32 %[[C_REAL]], 0
+// LLVM-INT: %[[RESULT:.*]] = insertvalue { i32, i32 } %[[MUL_A_B]], i32 %[[C_IMAG]], 1
+// LLVM-INT: store { i32, i32 } %[[RESULT]], ptr %[[C_ADDR]], align 4
+
+// OGCG-INT: %[[A_ADDR:.*]] = alloca { i32, i32 }, align 4
+// OGCG-INT: %[[B_ADDR:.*]] = alloca { i32, i32 }, align 4
+// OGCG-INT: %[[C_ADDR:.*]] = alloca { i32, i32 }, align 4
+// OGCG-INT: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[A_ADDR]], i32 0, i32 0
+// OGCG-INT: %[[A_REAL:.*]] = load i32, ptr %[[A_REAL_PTR]], align 4
+// OGCG-INT: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[A_ADDR]], i32 0, i32 1
+// OGCG-INT: %[[A_IMAG:.*]] = load i32, ptr %[[A_IMAG_PTR]], align 4
+// OGCG-INT: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[B_ADDR]], i32 0, i32 0
+// OGCG-INT: %[[B_REAL:.*]] = load i32, ptr %[[B_REAL_PTR]], align 4
+// OGCG-INT: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[B_ADDR]], i32 0, i32 1
+// OGCG-INT: %[[B_IMAG:.*]] = load i32, ptr %[[B_IMAG_PTR]], align 4
+// OGCG-INT: %[[MUL_AR_BR:.*]] = mul i32 %[[A_REAL]], %[[B_REAL]]
+// OGCG-INT: %[[MUL_AI_BI:.*]] = mul i32 %[[A_IMAG]], %[[B_IMAG]]
+// OGCG-INT: %[[C_REAL:.*]] = sub i32 %[[MUL_AR_BR]], %[[MUL_AI_BI]]
+// OGCG-INT: %[[MUL_AI_BR:.*]] = mul i32 %[[A_IMAG]], %[[B_REAL]]
+// OGCG-INT: %[[MUL_AR_BI:.*]] = mul i32 %[[A_REAL]], %[[B_IMAG]]
+// OGCG-INT: %[[C_IMAG:.*]] = add i32 %[[MUL_AI_BR]], %[[MUL_AR_BI]]
+// OGCG-INT: %[[C_REAL_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[C_ADDR]], i32 0, i32 0
+// OGCG-INT: %[[C_IMAG_PTR:.*]] = getelementptr inbounds nuw { i32, i32 }, ptr %[[C_ADDR]], i32 0, i32 1
+// OGCG-INT: store i32 %[[C_REAL]], ptr %[[C_REAL_PTR]], align 4
+// OGCG-INT: store i32 %[[C_IMAG]], ptr %[[C_IMAG_PTR]], align 4
diff --git a/clang/test/CIR/CodeGen/complex-unary.cpp b/clang/test/CIR/CodeGen/complex-unary.cpp
index 676b554..4cd81eb 100644
--- a/clang/test/CIR/CodeGen/complex-unary.cpp
+++ b/clang/test/CIR/CodeGen/complex-unary.cpp
@@ -284,3 +284,89 @@ void foo6() {
 // OGCG: %[[RESULT_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 1
 // OGCG: store float %[[A_REAL_DEC]], ptr %[[RESULT_REAL_PTR]], align 4
 // OGCG: store float %[[A_IMAG]], ptr %[[RESULT_IMAG_PTR]], align 4
+
+void foo7() {
+  float _Complex a;
+  float _Complex b = +a;
+}
+
+// CIR-BEFORE: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a"]
+// CIR-BEFORE: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["b", init]
+// CIR-BEFORE: %[[TMP:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR-BEFORE: %[[COMPLEX_PLUS:.*]] = cir.unary(plus, %[[TMP]]) : !cir.complex<!cir.float>, !cir.complex<!cir.float>
+// CIR-BEFORE: cir.store{{.*}} %[[COMPLEX_PLUS]], %[[B_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
+
+// CIR-AFTER: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a"]
+// CIR-AFTER: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["b", init]
+// CIR-AFTER: %[[TMP:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR-AFTER: %[[REAL:.*]] = cir.complex.real %[[TMP]] : !cir.complex<!cir.float> -> !cir.float
+// CIR-AFTER: %[[IMAG:.*]] = cir.complex.imag %[[TMP]] : !cir.complex<!cir.float> -> !cir.float
+// CIR-AFTER: %[[REAL_PLUS:.*]] = cir.unary(plus, %[[REAL]]) : !cir.float, !cir.float
+// CIR-AFTER: %[[IMAG_PLUS:.*]] = cir.unary(plus, %[[IMAG]]) : !cir.float, !cir.float
+// CIR-AFTER: %[[NEW_COMPLEX:.*]] = cir.complex.create %[[REAL_PLUS]], %[[IMAG_PLUS]] : !cir.float -> !cir.complex<!cir.float>
+// CIR-AFTER: cir.store{{.*}} %[[NEW_COMPLEX]], %[[B_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
+
+// LLVM: %[[A_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: %[[B_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: %[[TMP:.*]] = load { float, float }, ptr %[[A_ADDR]], align 4
+// LLVM: %[[REAL:.*]] = extractvalue { float, float } %[[TMP]], 0
+// LLVM: %[[IMAG:.*]] = extractvalue { float, float } %[[TMP]], 1
+// LLVM: %[[RESULT_TMP:.*]] = insertvalue { float, float } {{.*}}, float %[[REAL]], 0
+// LLVM: %[[RESULT_VAL:.*]] = insertvalue { float, float } %[[RESULT_TMP]], float %[[IMAG]], 1
+// LLVM: store { float, float } %[[RESULT_VAL]], ptr %[[B_ADDR]], align 4
+
+// OGCG: %[[A_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG: %[[B_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 0
+// OGCG: %[[A_REAL:.*]] = load float, ptr %[[A_REAL_PTR]], align 4
+// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 1
+// OGCG: %[[A_IMAG:.*]] = load float, ptr %[[A_IMAG_PTR]], align 4
+// OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 0
+// OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 1
+// OGCG: store float %[[A_REAL]], ptr %[[B_REAL_PTR]], align 4
+// OGCG: store float %[[A_IMAG]], ptr %[[B_IMAG_PTR]], align 4
+
+void foo8() {
+  float _Complex a;
+  float _Complex b = -a;
+}
+
+// CIR-BEFORE: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a"]
+// CIR-BEFORE: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["b", init]
+// CIR-BEFORE: %[[TMP:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR-BEFORE: %[[COMPLEX_MINUS:.*]] = cir.unary(minus, %[[TMP]]) : !cir.complex<!cir.float>, !cir.complex<!cir.float>
+// CIR-BEFORE: cir.store{{.*}} %[[COMPLEX_MINUS]], %[[B_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
+
+// CIR-AFTER: %[[A_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["a"]
+// CIR-AFTER: %[[B_ADDR:.*]] = cir.alloca !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>, ["b", init]
+// CIR-AFTER: %[[TMP:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.complex<!cir.float>>, !cir.complex<!cir.float>
+// CIR-AFTER: %[[REAL:.*]] = cir.complex.real %[[TMP]] : !cir.complex<!cir.float> -> !cir.float
+// CIR-AFTER: %[[IMAG:.*]] = cir.complex.imag %[[TMP]] : !cir.complex<!cir.float> -> !cir.float
+// CIR-AFTER: %[[REAL_MINUS:.*]] = cir.unary(minus, %[[REAL]]) : !cir.float, !cir.float
+// CIR-AFTER: %[[IMAG_MINUS:.*]] = cir.unary(minus, %[[IMAG]]) : !cir.float, !cir.float
+// CIR-AFTER: %[[NEW_COMPLEX:.*]] = cir.complex.create %[[REAL_MINUS]], %[[IMAG_MINUS]] : !cir.float -> !cir.complex<!cir.float>
+// CIR-AFTER: cir.store{{.*}} %[[NEW_COMPLEX]], %[[B_ADDR]] : !cir.complex<!cir.float>, !cir.ptr<!cir.complex<!cir.float>>
+
+// LLVM: %[[A_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: %[[B_ADDR:.*]] = alloca { float, float }, i64 1, align 4
+// LLVM: %[[TMP:.*]] = load { float, float }, ptr %[[A_ADDR]], align 4
+// LLVM: %[[REAL:.*]] = extractvalue { float, float } %[[TMP]], 0
+// LLVM: %[[IMAG:.*]] = extractvalue { float, float } %[[TMP]], 1
+// LLVM: %[[REAL_MINUS:.*]] = fneg float %[[REAL]]
+// LLVM: %[[IMAG_MINUS:.*]] = fneg float %[[IMAG]]
+// LLVM: %[[RESULT_TMP:.*]] = insertvalue { float, float } {{.*}}, float %[[REAL_MINUS]], 0
+// LLVM: %[[RESULT_VAL:.*]] = insertvalue { float, float } %[[RESULT_TMP]], float %[[IMAG_MINUS]], 1
+// LLVM: store { float, float } %[[RESULT_VAL]], ptr %[[B_ADDR]], align 4
+
+// OGCG: %[[A_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG: %[[B_ADDR:.*]] = alloca { float, float }, align 4
+// OGCG: %[[A_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 0
+// OGCG: %[[A_REAL:.*]] = load float, ptr %[[A_REAL_PTR]], align 4
+// OGCG: %[[A_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[A_ADDR]], i32 0, i32 1
+// OGCG: %[[A_IMAG:.*]] = load float, ptr %[[A_IMAG_PTR]], align 4
+// OGCG: %[[A_REAL_MINUS:.*]] = fneg float %[[A_REAL]]
+// OGCG: %[[A_IMAG_MINUS:.*]] = fneg float %[[A_IMAG]]
+// OGCG: %[[B_REAL_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 0
+// OGCG: %[[B_IMAG_PTR:.*]] = getelementptr inbounds nuw { float, float }, ptr %[[B_ADDR]], i32 0, i32 1
+// OGCG: store float %[[A_REAL_MINUS]], ptr %[[B_REAL_PTR]], align 4
+// OGCG: store float %[[A_IMAG_MINUS]], ptr %[[B_IMAG_PTR]], align 4
diff --git a/clang/test/CIR/CodeGen/ctor-alias.cpp b/clang/test/CIR/CodeGen/ctor-alias.cpp
index a20e206..c4bf455 100644
--- a/clang/test/CIR/CodeGen/ctor-alias.cpp
+++ b/clang/test/CIR/CodeGen/ctor-alias.cpp
@@ -11,6 +11,8 @@ struct B {
 B::B() {
 }
 
+// LLVM: @_ZN1BC1Ev = alias void (ptr), ptr @_ZN1BC2Ev
+
 // OGCG: @_ZN1BC1Ev = unnamed_addr alias void (ptr), ptr @_ZN1BC2Ev
 
 // CHECK: cir.func{{.*}} @_ZN1BC2Ev(%arg0: !cir.ptr<!rec_B>
@@ -25,15 +27,30 @@ B::B() {
 // LLVM:   store ptr %[[THIS_ARG]], ptr %[[THIS_ADDR]]
 // LLVM:   %[[THIS:.*]] = load ptr, ptr %[[THIS_ADDR]]
 
-// This should be an alias, like the similar OGCG alias above, but that's not
-// implemented yet.
-// LLVM: declare dso_local void @_ZN1BC1Ev(ptr)
-
 // OGCG: define{{.*}} @_ZN1BC2Ev(ptr{{.*}} %[[THIS_ARG:.*]])
 // OGCG:   %[[THIS_ADDR:.*]] = alloca ptr
 // OGCG:   store ptr %[[THIS_ARG]], ptr %[[THIS_ADDR]]
 // OGCG:   %[[THIS:.*]] = load ptr, ptr %[[THIS_ADDR]]
 
+void bar() {
+  B b;
+}
+
+// CHECK: cir.func{{.*}} @_Z3barv()
+// CHECK:   %[[B:.*]] = cir.alloca !rec_B, !cir.ptr<!rec_B>, ["b", init]
+// CHECK:   cir.call @_ZN1BC1Ev(%[[B]]) : (!cir.ptr<!rec_B>) -> ()
+// CHECK:   cir.return
+
+// LLVM: define{{.*}} void @_Z3barv()
+// LLVM:   %[[B:.*]] = alloca %struct.B, i64 1, align 1
+// LLVM:   call void @_ZN1BC1Ev(ptr %[[B]])
+// LLVM:   ret void
+
+// OGCG: define{{.*}} void @_Z3barv()
+// OGCG:   %[[B:.*]] = alloca %struct.B, align 1
+// OGCG:   call void @_ZN1BC1Ev(ptr{{.*}} %[[B]])
+// OGCG:   ret void
+
 // The constructor in this cases is handled by RAUW rather than aliasing.
 struct Struk {
   Struk() {}
diff --git a/clang/test/CIR/CodeGen/cxx-conversion-operators.cpp b/clang/test/CIR/CodeGen/cxx-conversion-operators.cpp
new file mode 100644
index 0000000..a386a41
--- /dev/null
+++ b/clang/test/CIR/CodeGen/cxx-conversion-operators.cpp
@@ -0,0 +1,124 @@
+// RUN: %clang_cc1 -std=c++11 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -std=c++11 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s
+// RUN: %clang_cc1 -std=c++11 -triple aarch64-none-linux-android21 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+struct inline_operator {
+  operator int() const { return 987; }
+
+  int operator+(inline_operator) { return 666; }
+};
+
+struct out_of_line_operator {
+  operator int();
+};
+
+out_of_line_operator::operator int() { return 123; }
+
+void test() {
+  int x = 42;
+
+  inline_operator i;
+  x = i;
+
+  out_of_line_operator o;
+  x = o;
+}
+
+// CIR: cir.func dso_local @_ZN20out_of_line_operatorcviEv(%[[THIS_ARG:.+]]: !cir.ptr<!rec_out_of_line_operator>{{.*}}) -> !s32i
+// CIR:   %[[THIS_ALLOCA:.+]] = cir.alloca !cir.ptr<!rec_out_of_line_operator>, !cir.ptr<!cir.ptr<!rec_out_of_line_operator>>, ["this", init]
+// CIR:   %[[RETVAL:.+]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CIR:   cir.store %[[THIS_ARG]], %[[THIS_ALLOCA]] : !cir.ptr<!rec_out_of_line_operator>, !cir.ptr<!cir.ptr<!rec_out_of_line_operator>>
+// CIR:   %[[THIS_LOAD:.+]] = cir.load %[[THIS_ALLOCA]] : !cir.ptr<!cir.ptr<!rec_out_of_line_operator>>, !cir.ptr<!rec_out_of_line_operator>
+// CIR:   %[[CONST_123:.+]] = cir.const #cir.int<123> : !s32i
+// CIR:   cir.store %[[CONST_123]], %[[RETVAL]] : !s32i, !cir.ptr<!s32i>
+// CIR:   %[[RET_LOAD:.+]] = cir.load %[[RETVAL]] : !cir.ptr<!s32i>, !s32i
+// CIR:   cir.return %[[RET_LOAD]] : !s32i
+// CIR: }
+
+// CIR: cir.func comdat linkonce_odr @_ZNK15inline_operatorcviEv(%[[INLINE_THIS_ARG:.+]]: !cir.ptr<!rec_inline_operator>{{.*}}) -> !s32i
+// CIR:   %[[INLINE_THIS_ALLOCA:.+]] = cir.alloca !cir.ptr<!rec_inline_operator>, !cir.ptr<!cir.ptr<!rec_inline_operator>>, ["this", init]
+// CIR:   %[[INLINE_RETVAL:.+]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["__retval"]
+// CIR:   cir.store %[[INLINE_THIS_ARG]], %[[INLINE_THIS_ALLOCA]] : !cir.ptr<!rec_inline_operator>, !cir.ptr<!cir.ptr<!rec_inline_operator>>
+// CIR:   %[[INLINE_THIS_LOAD:.+]] = cir.load %[[INLINE_THIS_ALLOCA]] : !cir.ptr<!cir.ptr<!rec_inline_operator>>, !cir.ptr<!rec_inline_operator>
+// CIR:   %[[CONST_987:.+]] = cir.const #cir.int<987> : !s32i
+// CIR:   cir.store %[[CONST_987]], %[[INLINE_RETVAL]] : !s32i, !cir.ptr<!s32i>
+// CIR:   %[[INLINE_RET_LOAD:.+]] = cir.load %[[INLINE_RETVAL]] : !cir.ptr<!s32i>, !s32i
+// CIR:   cir.return %[[INLINE_RET_LOAD]] : !s32i
+// CIR: }
+
+// CIR: cir.func dso_local @_Z4testv()
+// CIR:   %[[X_ALLOCA:.+]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["x", init]
+// CIR:   %[[I_ALLOCA:.+]] = cir.alloca {{.*}}, {{.*}}, ["i"]
+// CIR:   %[[O_ALLOCA:.+]] = cir.alloca {{.*}}, {{.*}}, ["o"]
+// CIR:   %[[CONST_42:.+]] = cir.const #cir.int<42> : !s32i
+// CIR:   cir.store align(4) %[[CONST_42]], %[[X_ALLOCA]] : !s32i, !cir.ptr<!s32i>
+// CIR:   %[[INLINE_CALL:.+]] = cir.call @_ZNK15inline_operatorcviEv(%[[I_ALLOCA]]) : ({{.*}}) -> !s32i
+// CIR:   cir.store align(4) %[[INLINE_CALL]], %[[X_ALLOCA]] : !s32i, !cir.ptr<!s32i>
+// CIR:   %[[OUTLINE_CALL:.+]] = cir.call @_ZN20out_of_line_operatorcviEv(%[[O_ALLOCA]]) : ({{.*}}) -> !s32i
+// CIR:   cir.store align(4) %[[OUTLINE_CALL]], %[[X_ALLOCA]] : !s32i, !cir.ptr<!s32i>
+// CIR:   cir.return
+// CIR: }
+
+// LLVM: define dso_local i32 @_ZN20out_of_line_operatorcviEv(ptr %[[PARAM0:.+]])
+// LLVM:   %[[THIS_ALLOCA:.+]] = alloca ptr, i64 1
+// LLVM:   %[[RETVAL:.+]] = alloca i32, i64 1
+// LLVM:   store ptr %[[PARAM0]], ptr %[[THIS_ALLOCA]]
+// LLVM:   %[[THIS_LOAD:.+]] = load ptr, ptr %[[THIS_ALLOCA]]
+// LLVM:   store i32 123, ptr %[[RETVAL]]
+// LLVM:   %[[RET_LOAD:.+]] = load i32, ptr %[[RETVAL]]
+// LLVM:   ret i32 %[[RET_LOAD]]
+// LLVM: }
+
+// LLVM: define linkonce_odr i32 @_ZNK15inline_operatorcviEv(ptr %[[INLINE_PARAM0:.+]])
+// LLVM:   %[[INLINE_THIS_ALLOCA:.+]] = alloca ptr, i64 1
+// LLVM:   %[[INLINE_RETVAL:.+]] = alloca i32, i64 1
+// LLVM:   store ptr %[[INLINE_PARAM0]], ptr %[[INLINE_THIS_ALLOCA]]
+// LLVM:   %[[INLINE_THIS_LOAD:.+]] = load ptr, ptr %[[INLINE_THIS_ALLOCA]]
+// LLVM:   store i32 987, ptr %[[INLINE_RETVAL]]
+// LLVM:   %[[INLINE_RET_LOAD:.+]] = load i32, ptr %[[INLINE_RETVAL]]
+// LLVM:   ret i32 %[[INLINE_RET_LOAD]]
+// LLVM: }
+
+// LLVM: define dso_local void @_Z4testv()
+// LLVM:   %[[X_ALLOCA:.+]] = alloca i32, i64 1
+// LLVM:   %[[I_ALLOCA:.+]] = alloca {{.*}}, i64 1
+// LLVM:   %[[O_ALLOCA:.+]] = alloca {{.*}}, i64 1
+// LLVM:   store i32 42, ptr %[[X_ALLOCA]]
+// LLVM:   %[[INLINE_CALL:.+]] = call i32 @_ZNK15inline_operatorcviEv(ptr %[[I_ALLOCA]])
+// LLVM:   store i32 %[[INLINE_CALL]], ptr %[[X_ALLOCA]]
+// LLVM:   %[[OUTLINE_CALL:.+]] = call i32 @_ZN20out_of_line_operatorcviEv(ptr %[[O_ALLOCA]])
+// LLVM:   store i32 %[[OUTLINE_CALL]], ptr %[[X_ALLOCA]]
+// LLVM:   ret void
+// LLVM: }
+
+// OGCG: define dso_local noundef i32 @_ZN20out_of_line_operatorcviEv(ptr {{.*}} %[[THIS_PARAM:.+]])
+// OGCG: entry:
+// OGCG:   %[[THIS_ADDR:.+]] = alloca ptr
+// OGCG:   store ptr %[[THIS_PARAM]], ptr %[[THIS_ADDR]]
+// OGCG:   %[[THIS_LOAD:.+]] = load ptr, ptr %[[THIS_ADDR]]
+// OGCG:   ret i32 123
+// OGCG: }
+
+// OGCG: define dso_local void @_Z4testv()
+// OGCG: entry:
+// OGCG:   %[[X_VAR:.+]] = alloca i32
+// OGCG:   %[[I_VAR:.+]] = alloca {{.*}}
+// OGCG:   %[[O_VAR:.+]] = alloca {{.*}}
+// OGCG:   store i32 42, ptr %[[X_VAR]]
+// OGCG:   %[[INLINE_CALL:.+]] = call noundef i32 @_ZNK15inline_operatorcviEv(ptr {{.*}} %[[I_VAR]])
+// OGCG:   store i32 %[[INLINE_CALL]], ptr %[[X_VAR]]
+// OGCG:   %[[OUTLINE_CALL:.+]] = call noundef i32 @_ZN20out_of_line_operatorcviEv(ptr {{.*}} %[[O_VAR]])
+// OGCG:   store i32 %[[OUTLINE_CALL]], ptr %[[X_VAR]]
+// OGCG:   ret void
+// OGCG: }
+
+// OGCG: define linkonce_odr noundef i32 @_ZNK15inline_operatorcviEv(ptr {{.*}} %[[INLINE_THIS_PARAM:.+]])
+// OGCG: entry:
+// OGCG:   %[[INLINE_THIS_ADDR:.+]] = alloca ptr
+// OGCG:   store ptr %[[INLINE_THIS_PARAM]], ptr %[[INLINE_THIS_ADDR]]
+// OGCG:   %[[INLINE_THIS_LOAD:.+]] = load ptr, ptr %[[INLINE_THIS_ADDR]]
+// OGCG:   ret i32 987
+// OGCG: }
diff --git a/clang/test/CIR/CodeGen/dtor-alias.cpp b/clang/test/CIR/CodeGen/dtor-alias.cpp
index e37ddab..f4d54df 100644
--- a/clang/test/CIR/CodeGen/dtor-alias.cpp
+++ b/clang/test/CIR/CodeGen/dtor-alias.cpp
@@ -11,6 +11,8 @@ struct B {
 B::~B() {
 }
 
+// LLVM: @_ZN1BD1Ev = alias void (ptr), ptr @_ZN1BD2Ev
+
 // OGCG: @_ZN1BD1Ev = unnamed_addr alias void (ptr), ptr @_ZN1BD2Ev
 
 // CHECK: cir.func{{.*}} @_ZN1BD2Ev(%arg0: !cir.ptr<!rec_B>
@@ -25,10 +27,6 @@ B::~B() {
 // LLVM:   store ptr %[[THIS_ARG]], ptr %[[THIS_ADDR]]
 // LLVM:   %[[THIS:.*]] = load ptr, ptr %[[THIS_ADDR]]
 
-// This should be an alias, like the similar OGCG alias above, but that's not
-// implemented yet.
-// LLVM: declare dso_local void @_ZN1BD1Ev(ptr)
-
 // OGCG: define{{.*}} @_ZN1BD2Ev(ptr{{.*}} %[[THIS_ARG:.*]])
 // OGCG:   %[[THIS_ADDR:.*]] = alloca ptr
 // OGCG:   store ptr %[[THIS_ARG]], ptr %[[THIS_ADDR]]
diff --git a/clang/test/CIR/CodeGen/empty.cpp b/clang/test/CIR/CodeGen/empty.cpp
new file mode 100644
index 0000000..378ae21
--- /dev/null
+++ b/clang/test/CIR/CodeGen/empty.cpp
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+
+// These declarations shouldn't emit any code. Therefore the module is expected to be empty.
+
+template<typename T>
+concept some_concept = true;
+
+template<some_concept T>
+class class_template {};
+
+; // Empty declaration
+
+template<typename T>
+void function_template();
+
+static_assert(true, "top level static assert");
+
+template<typename T>
+using type_alias = T;
+
+namespace N {
+    using ::class_template; // UsingShadow
+}
+
+template<typename T>
+struct deduction_guide {};
+
+deduction_guide() -> deduction_guide<int>;
+
+// CIR: module {{.*}} {
+// CIR-NEXT: }
diff --git a/clang/test/CIR/CodeGen/finegrain-bitfield-access.cpp b/clang/test/CIR/CodeGen/finegrain-bitfield-access.cpp
new file mode 100644
index 0000000..930b0a9
--- /dev/null
+++ b/clang/test/CIR/CodeGen/finegrain-bitfield-access.cpp
@@ -0,0 +1,271 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -ffine-grained-bitfield-accesses %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -ffine-grained-bitfield-accesses %s -o %t-cir.ll
+// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -ffine-grained-bitfield-accesses %s -o %t.ll
+// RUN: FileCheck --input-file=%t.ll %s --check-prefix=OGCG
+
+struct S1 {
+  unsigned f1:2;
+  unsigned f2:6;
+  unsigned f3:8;
+  unsigned f4:4;
+  unsigned f5:8;
+};
+
+// CIR-DAG: !rec_S1 = !cir.record<struct "S1" {!u8i, !u8i, !u16i}>
+// LLVM-DAG: %struct.S1 = type { i8, i8, i16 }
+// OGCG-DAG: %struct.S1 = type { i8, i8, i16 }
+
+struct S2 {
+  unsigned long f1:16;
+  unsigned long f2:16;
+  unsigned long f3:6;
+};
+
+// CIR-DAG: !rec_S2 = !cir.record<struct "S2" padded {!u16i, !u16i, !u8i, !cir.array<!u8i x 3>}>
+// LLVM-DAG: %struct.S2 = type { i16, i16, i8, [3 x i8] }
+// OGCG-DAG: %struct.S2 = type { i16, i16, i8, [3 x i8] }
+
+struct S3 {
+  unsigned long f1:14;
+  unsigned long f2:18;
+  unsigned long f3:32;
+};
+
+// CIR-DAG: !rec_S3 = !cir.record<struct "S3" {!u32i, !u32i}>
+// LLVM-DAG: %struct.S3 = type { i32, i32 }
+// OGCG-DAG: %struct.S3 = type { i32, i32 }
+
+S1 a1;
+S2 a2;
+S3 a3;
+
+unsigned read8_1() {
+  return a1.f3;
+}
+
+// CIR-LABEL: @_Z7read8_1v
+// CIR: [[MEMBER:%.*]] = cir.get_member %1[1] {name = "f3"} : !cir.ptr<!rec_S1> -> !cir.ptr<!u8i>
+// CIR: [[BITFI:%.*]] = cir.get_bitfield align(1) (#bfi_f3, [[MEMBER]] : !cir.ptr<!u8i>) -> !u32i
+// CIR: cir.store [[BITFI]], {{.*}} : !u32i, !cir.ptr<!u32i>
+// CIR: [[RET:%.*]] = cir.load {{.*}} : !cir.ptr<!u32i>, !u32i
+// CIR: cir.return [[RET]] : !u32i
+
+// LLVM-LABEL: @_Z7read8_1v
+// LLVM:  [[MEMBER:%.*]] = load i8, ptr getelementptr inbounds nuw (i8, ptr {{.*}}, i64 1), align 1
+// LLVM:  [[BFCAST:%.*]] = zext i8 [[MEMBER]] to i32
+// LLVM:  store i32 [[BFCAST]], ptr {{.*}}, align 4
+// LLVM:  [[RET:%.*]] = load i32, ptr {{.*}}, align 4
+// LLVM:  ret i32 [[RET]]
+
+// OGCG-LABEL: @_Z7read8_1v
+// OGCG: [[BFLOAD:%.*]] = load i8, ptr getelementptr inbounds nuw (%struct.S1, ptr {{.*}}, i32 0, i32 1), align 1
+// OGCG-NEXT: [[BFCAST:%.*]] = zext i8 [[BFLOAD]] to i32
+// OGCG-NEXT: ret i32 [[BFCAST]]
+
+void write8_1() {
+  a1.f3 = 3;
+}
+
+// CIR-LABEL: @_Z8write8_1v
+// CIR: [[CONST3:%.*]] = cir.const #cir.int<3> : !s32i
+// CIR: [[INT3:%.*]] = cir.cast(integral, [[CONST3]] : !s32i), !u32i
+// CIR: [[MEMBER:%.*]] = cir.get_member {{.*}}[1] {name = "f3"} : !cir.ptr<!rec_S1> -> !cir.ptr<!u8i>
+// CIR: cir.set_bitfield align(1) (#bfi_f3, [[MEMBER]] : !cir.ptr<!u8i>, [[INT3]] : !u32i) -> !u32i
+
+// LLVM-LABEL: @_Z8write8_1v
+// LLVM:  store i8 3, ptr getelementptr inbounds nuw (i8, ptr {{.*}}, i64 1), align 1
+// LLVM:  ret void
+
+// OGCG-LABEL: @_Z8write8_1v
+// OGCG: store i8 3, ptr getelementptr inbounds nuw (%struct.S1, ptr {{.*}}, i32 0, i32 1), align 1
+// OGCG-NEXT: ret void
+
+unsigned read8_2() {
+
+  return a1.f5;
+}
+
+// CIR-LABEL: @_Z7read8_2v
+// CIR: [[MEMBER:%.*]] = cir.get_member {{.*}}[2] {name = "f5"} : !cir.ptr<!rec_S1> -> !cir.ptr<!u16i>
+// CIR: [[BITFI:%.*]] = cir.get_bitfield align(2) (#bfi_f5, [[MEMBER]] : !cir.ptr<!u16i>) -> !u32i
+// CIR: cir.store [[BITFI]], {{.*}} : !u32i, !cir.ptr<!u32i>
+// CIR: [[RET:%.*]] = cir.load {{.*}} : !cir.ptr<!u32i>, !u32i
+// CIR: cir.return [[RET]] : !u32i
+
+// LLVM-LABEL: @_Z7read8_2v
+// LLVM:  [[BFLOAD:%.*]] = load i16, ptr getelementptr inbounds nuw (i8, ptr {{.*}}, i64 2), align 2
+// LLVM:  [[BFLSHR:%.*]] = lshr i16 [[BFLOAD]], 4
+// LLVM:  [[BFCLEAR:%.*]] = and i16 [[BFLSHR]], 255
+// LLVM:  [[BFCAST:%.*]] = zext i16 [[BFCLEAR]] to i32
+// LLVM:  store i32 [[BFCAST]], ptr {{.*}}, align 4
+// LLVM:  [[RET:%.*]] = load i32, ptr {{.*}}, align 4
+// LLVM:  ret i32 [[RET]]
+
+// OGCG-LABEL: @_Z7read8_2v
+// OGCG: [[BFLOAD:%.*]] = load i16, ptr getelementptr inbounds nuw (%struct.S1, ptr {{.*}}, i32 0, i32 2), align 2
+// OGCG-NEXT: [[BFLSHR:%.*]] = lshr i16 [[BFLOAD]], 4
+// OGCG-NEXT: [[BFCLEAR:%.*]] = and i16 [[BFLSHR]], 255
+// OGCG-NEXT: [[BFCAST:%.*]] = zext i16 [[BFCLEAR]] to i32
+// OGCG-NEXT: ret i32 [[BFCAST]]
+
+void write8_2() {
+  a1.f5 = 3;
+}
+
+// CIR-LABEL: @_Z8write8_2v
+// CIR: [[CONST3:%.*]] = cir.const #cir.int<3> : !s32i
+// CIR: [[INT3:%.*]] = cir.cast(integral, [[CONST3]] : !s32i), !u32i
+// CIR: [[MEMBER:%.*]] = cir.get_member {{.*}}[2] {name = "f5"} : !cir.ptr<!rec_S1> -> !cir.ptr<!u16i>
+// CIR: cir.set_bitfield align(2) (#bfi_f5, %3 : !cir.ptr<!u16i>, {{.*}} : !u32i) -> !u32i
+
+// LLVM-LABEL: @_Z8write8_2v
+// LLVM:  [[BFLOAD:%.*]] = load i16, ptr getelementptr inbounds nuw (i8, ptr {{.*}}, i64 2), align 2
+// LLVM:  [[BFCLEAR:%.*]] = and i16 [[BFLOAD]], -4081
+// LLVM:  [[BFSET:%.*]] = or i16 [[BFCLEAR]], 48
+// LLVM:  store i16 [[BFSET]], ptr getelementptr inbounds nuw (i8, ptr {{.*}}, i64 2), align 2
+// LLVM:  ret void
+
+// OGCG-LABEL: @_Z8write8_2v
+// OGCG: [[BFLOAD:%.*]] = load i16, ptr getelementptr inbounds nuw (%struct.S1, ptr {{.*}}, i32 0, i32 2), align 2
+// OGCG-NEXT: [[BFCLEAR:%.*]] = and i16 [[BFLOAD]], -4081
+// OGCG-NEXT: [[BFSET:%.*]] = or i16 [[BFCLEAR]], 48
+// OGCG-NEXT: store i16 [[BFSET]], ptr getelementptr inbounds nuw (%struct.S1, ptr {{.*}}, i32 0, i32 2), align 2
+// OGCG-NEXT: ret void
+
+unsigned read16_1() {
+  return a2.f1;
+}
+
+// CIR-LABEL: @_Z8read16_1v
+// CIR: [[MEMBER:%.*]] = cir.get_member {{.*}}[0] {name = "f1"} : !cir.ptr<!rec_S2> -> !cir.ptr<!u16i>
+// CIR: [[BITFI:%.*]] = cir.get_bitfield align(8) (#bfi_f1, [[MEMBER]] : !cir.ptr<!u16i>) -> !u64i
+// CIR: [[BFCAST:%.*]] = cir.cast(integral, [[BITFI]] : !u64i), !u32i
+// CIR: cir.store [[BFCAST]], {{.*}} : !u32i, !cir.ptr<!u32i>
+// CIR: [[RET:%.*]] = cir.load {{.*}} : !cir.ptr<!u32i>, !u32i
+// CIR: cir.return [[RET]] : !u32i
+
+// LLVM-LABEL: @_Z8read16_1v
+// LLVM:  [[BFLOAD:%.*]] = load i16, ptr {{.*}}, align 8
+// LLVM:  [[BFCAST:%.*]] = zext i16 [[BFLOAD]] to i64
+// LLVM:  [[BF:%.*]] = trunc i64 [[BFCAST]] to i32
+// LLVM:  store i32 [[BF]], ptr {{.*}}, align 4
+// LLVM:  [[RET:%.*]] = load i32, ptr {{.*}}, align 4
+// LLVM:  ret i32 [[RET]]
+
+// OGCG-LABEL: @_Z8read16_1v
+// OGCG: [[BFLOAD:%.*]] = load i16, ptr {{.*}}, align 8
+// OGCG-NEXT: [[BFCAST:%.*]] = zext i16 [[BFLOAD]] to i64
+// OGCG-NEXT: [[RET:%.*]] = trunc i64 [[BFCAST]] to i32
+// OGCG-NEXT: ret i32 [[RET]]
+
+unsigned read16_2() {
+  return a2.f2;
+}
+
+// CIR-LABEL: @_Z8read16_2v
+// CIR: [[MEMBER:%.*]] = cir.get_member {{.*}}[1] {name = "f2"} : !cir.ptr<!rec_S2> -> !cir.ptr<!u16i>
+// CIR: [[BITFI:%.*]] = cir.get_bitfield align(2) (#bfi_f2, [[MEMBER]] : !cir.ptr<!u16i>) -> !u64i
+// CIR: [[BFCAST:%.*]] = cir.cast(integral, [[BITFI]] : !u64i), !u32i
+// CIR: cir.store [[BFCAST]], {{.*}} : !u32i, !cir.ptr<!u32i>
+// CIR: [[RET:%.*]] = cir.load {{.*}} : !cir.ptr<!u32i>, !u32i
+// CIR: cir.return [[RET]] : !u32i
+
+// LLVM-LABEL: @_Z8read16_2v
+// LLVM:  [[BFLOAD:%.*]] = load i16, ptr getelementptr inbounds nuw (i8, ptr {{.*}}, i64 2), align 2
+// LLVM:  [[BFCAST:%.*]] = zext i16 [[BFLOAD]] to i64
+// LLVM:  [[BF:%.*]] = trunc i64 [[BFCAST]] to i32
+// LLVM:  store i32 [[BF]], ptr {{.*}}, align 4
+// LLVM:  [[RET:%.*]] = load i32, ptr {{.*}}, align 4
+// LLVM:  ret i32 [[RET]]
+
+// OGCG-LABEL: @_Z8read16_2v
+// OGCG: [[BFLOAD:%.*]] = load i16, ptr getelementptr inbounds nuw (%struct.S2, ptr {{.*}}, i32 0, i32 1), align 2
+// OGCG-NEXT: [[BFCAST:%.*]] = zext i16 [[BFLOAD]] to i64
+// OGCG-NEXT: [[RET:%.*]] = trunc i64 [[BFCAST]] to i32
+// OGCG-NEXT: ret i32 [[RET]]
+
+void write16_1() {
+  a2.f1 = 5;
+}
+
+// CIR-LABEL: @_Z9write16_1v
+// CIR: [[CONST5:%.*]] = cir.const #cir.int<5> : !s32i
+// CIR: [[INT5:%.*]] = cir.cast(integral, [[CONST5]] : !s32i), !u64i
+// CIR: [[MEMBER:%.*]]  = cir.get_member {{.*}}[0] {name = "f1"} : !cir.ptr<!rec_S2> -> !cir.ptr<!u16i>
+// CIR: cir.set_bitfield align(8) (#bfi_f1, [[MEMBER]] : !cir.ptr<!u16i>, [[INT5]] : !u64i) -> !u64i
+// CIR: cir.return
+
+// LLVM-LABEL: @_Z9write16_1v
+// LLVM:  store i16 5, ptr {{.*}}, align 8
+// LLVM:  ret void
+
+// OGCG-LABEL: @_Z9write16_1v
+// OGCG: store i16 5, ptr {{.*}}, align 8
+// OGCG-NEXT: ret void
+
+void write16_2() {
+
+  a2.f2 = 5;
+}
+
+// CIR-LABEL: @_Z9write16_2v
+// CIR: [[CONST5:%.*]] = cir.const #cir.int<5> : !s32i
+// CIR: [[INT5:%.*]] = cir.cast(integral, [[CONST5]] : !s32i), !u64i
+// CIR: [[MEMBER:%.*]] = cir.get_member {{.*}}[1] {name = "f2"} : !cir.ptr<!rec_S2> -> !cir.ptr<!u16i>
+// CIR: cir.set_bitfield align(2) (#bfi_f2, [[MEMBER]] : !cir.ptr<!u16i>, {{.*}} : !u64i) -> !u64i
+// CIR: cir.return
+
+// LLVM-LABEL: @_Z9write16_2v
+// LLVM: store i16 5, ptr getelementptr inbounds nuw (i8, ptr {{.*}}, i64 2), align 2
+// LLVM: ret void
+
+// OGCG-LABEL: @_Z9write16_2v
+// OGCG: store i16 5, ptr getelementptr inbounds nuw (%struct.S2, ptr {{.*}}, i32 0, i32 1), align 2
+// OGCG-NEXT: ret void
+
+unsigned read32_1() {
+
+  return a3.f3;
+}
+// CIR-LABEL: @_Z8read32_1v
+// CIR: [[MEMBER:%.*]] = cir.get_member {{.*}}[1] {name = "f3"} : !cir.ptr<!rec_S3> -> !cir.ptr<!u32i>
+// CIR: [[BITFI:%.*]] = cir.get_bitfield align(4) (#bfi_f3_1, [[MEMBER]] : !cir.ptr<!u32i>) -> !u64i
+// CIR: [[BFCAST:%.*]] = cir.cast(integral, [[BITFI]] : !u64i), !u32i
+// CIR: cir.store [[BFCAST]], {{.*}} : !u32i, !cir.ptr<!u32i>
+// CIR: [[RET:%.*]] = cir.load {{.*}} : !cir.ptr<!u32i>, !u32i
+// CIR: cir.return [[RET]] : !u32i
+
+// LLVM-LABEL: @_Z8read32_1v
+// LLVM: [[BFLOAD:%.*]] = load i32, ptr getelementptr inbounds nuw (i8, ptr {{.*}}, i64 4), align 4
+// LLVM: [[BFCAST:%.*]] = zext i32 [[BFLOAD]] to i64
+// LLVM: [[BF:%.*]] = trunc i64 [[BFCAST]] to i32
+// LLVM: store i32 [[BF]], ptr {{.*}}, align 4
+// LLVM: [[RET:%.*]] = load i32, ptr {{.*}}, align 4
+// LLVM: ret i32 [[RET]]
+
+// OGCG-LABEL: @_Z8read32_1v
+// OGCG: [[BFLOAD:%.*]] = load i32, ptr getelementptr inbounds nuw (%struct.S3, ptr {{.*}}, i32 0, i32 1), align 4
+// OGCG-NEXT: [[BFCAST:%.*]] = zext i32 %bf.load to i64
+// OGCG-NEXT: [[RET:%.*]] = trunc i64 %bf.cast to i32
+// OGCG-NEXT: ret i32 [[RET]]
+
+void write32_1() {
+  a3.f3 = 5;
+}
+
+// CIR-LABEL: @_Z9write32_1v
+// CIR: [[CONST5:%.*]] = cir.const #cir.int<5> : !s32i
+// CIR: [[INT5:%.*]] = cir.cast(integral, [[CONST5]] : !s32i), !u64i
+// CIR: [[MEMBER:%.*]] = cir.get_member {{.*}}[1] {name = "f3"} : !cir.ptr<!rec_S3> -> !cir.ptr<!u32i>
+// CIR: cir.set_bitfield align(4) (#bfi_f3_1, [[MEMBER]] : !cir.ptr<!u32i>, [[INT5]] : !u64i) -> !u64i
+// CIR: cir.return
+
+// LLVM-LABEL: @_Z9write32_1v
+// LLVM:  store i32 5, ptr getelementptr inbounds nuw (i8, ptr {{.*}}, i64 4), align 4
+// LLVM:  ret void
+
+// OGCG-LABEL: @_Z9write32_1v
+// OGCG: store i32 5, ptr getelementptr inbounds nuw (%struct.S3, ptr {{.*}}, i32 0, i32 1), align 4
+// OGCG-NEXT: ret void
diff --git a/clang/test/CIR/CodeGen/no-prototype.c b/clang/test/CIR/CodeGen/no-prototype.c
new file mode 100644
index 0000000..4be6a94
--- /dev/null
+++ b/clang/test/CIR/CodeGen/no-prototype.c
@@ -0,0 +1,84 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+//===----------------------------------------------------------------------===//
+// DEFINED BEHAVIOUR
+//===----------------------------------------------------------------------===//
+
+// No-proto definition followed by a correct call.
+int noProto0(x) int x; { return x; }
+// CHECK: cir.func no_proto dso_local @noProto0(%arg0: !s32i {{.+}}) -> !s32i
+int test0(int x) {
+  // CHECK: cir.func dso_local @test0
+  return noProto0(x); // We know the definition. Should be a direct call.
+  // CHECK: %{{.+}} = cir.call @noProto0(%{{.+}})
+}
+
+// Declaration without prototype followed by its definition, then a correct call.
+//
+// Prototyped definition overrides no-proto declaration before any call is made,
+// only allowing calls with proper arguments. This is the only case where the
+// definition is not marked as no-proto.
+int noProto1();
+int noProto1(int x) { return x; }
+// CHECK: cir.func dso_local @noProto1(%arg0: !s32i {{.+}}) -> !s32i
+int test1(int x) {
+  // CHECK: cir.func dso_local @test1
+  return noProto1(x);
+  // CHECK: %{{.+}} = cir.call @noProto1(%{{[0-9]+}}) : (!s32i) -> !s32i
+}
+
+// Declaration without prototype followed by a correct call, then its definition.
+//
+// Call to no-proto is made before definition, so a variadic call that takes anything
+// is created. Later, when the definition is found, no-proto is replaced.
+int noProto2();
+int test2(int x) {
+  return noProto2(x);
+  // CHECK:  [[GGO:%.*]] = cir.get_global @noProto2 : !cir.ptr<!cir.func<(!s32i) -> !s32i>>
+  // CHECK:  {{.*}} = cir.call [[GGO]](%{{[0-9]+}}) : (!cir.ptr<!cir.func<(!s32i) -> !s32i>>, !s32i) -> !s32i
+}
+int noProto2(int x) { return x; }
+// CHECK: cir.func no_proto dso_local @noProto2(%arg0: !s32i {{.+}}) -> !s32i
+
+// No-proto declaration without definition (any call here is "correct").
+//
+// Call to no-proto is made before definition, so a variadic call that takes anything
+// is created. Definition is not in the translation unit, so it is left as is.
+int noProto3();
+// cir.func private no_proto @noProto3(...) -> !s32i
+int test3(int x) {
+// CHECK: cir.func dso_local @test3
+  return noProto3(x);
+  // CHECK:  [[GGO:%.*]] = cir.get_global @noProto3 : !cir.ptr<!cir.func<(...) -> !s32i>>
+  // CHECK:  [[CAST:%.*]] = cir.cast(bitcast, [[GGO]] : !cir.ptr<!cir.func<(...) -> !s32i>>), !cir.ptr<!cir.func<(!s32i) -> !s32i>>
+  // CHECK:  {{%.*}} = cir.call [[CAST]](%{{[0-9]+}}) : (!cir.ptr<!cir.func<(!s32i) -> !s32i>>, !s32i) -> !s32i
+}
+
+
+//===----------------------------------------------------------------------===//
+// UNDEFINED BEHAVIOUR
+//
+// No-proto definitions followed by incorrect calls.
+//===----------------------------------------------------------------------===//
+
+// No-proto definition followed by an incorrect call due to extra args.
+int noProto4() { return 0; }
+// cir.func private no_proto @noProto4() -> !s32i
+int test4(int x) {
+  return noProto4(x); // Even if we know the definition, this should compile.
+  // CHECK:  [[GGO:%.*]] = cir.get_global @noProto4 : !cir.ptr<!cir.func<() -> !s32i>>
+  // CHECK:  [[CAST:%.*]] = cir.cast(bitcast, [[GGO]] : !cir.ptr<!cir.func<() -> !s32i>>), !cir.ptr<!cir.func<(!s32i) -> !s32i>>
+  // CHECK:  {{%.*}} = cir.call [[CAST]]({{%.*}}) : (!cir.ptr<!cir.func<(!s32i) -> !s32i>>, !s32i) -> !s32i
+}
+
+// No-proto definition followed by an incorrect call due to lack of args.
+int noProto5();
+int test5(int x) {
+  return noProto5();
+  // CHECK:  [[GGO:%.*]] = cir.get_global @noProto5 : !cir.ptr<!cir.func<(!s32i) -> !s32i>>
+  // CHECK:  [[CAST:%.*]] = cir.cast(bitcast, [[GGO]] : !cir.ptr<!cir.func<(!s32i) -> !s32i>>), !cir.ptr<!cir.func<() -> !s32i>>
+  // CHECK:  {{%.*}} = cir.call [[CAST]]() : (!cir.ptr<!cir.func<() -> !s32i>>) -> !s32i
+}
+int noProto5(int x) { return x; }
+// CHECK: cir.func no_proto dso_local @noProto5(%arg0: !s32i {{.+}}) -> !s32i
diff --git a/clang/test/CIR/CodeGen/variable-decomposition.cpp b/clang/test/CIR/CodeGen/variable-decomposition.cpp
new file mode 100644
index 0000000..022d06a
--- /dev/null
+++ b/clang/test/CIR/CodeGen/variable-decomposition.cpp
@@ -0,0 +1,55 @@
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-pc-linux-gnu -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-pc-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s
+// RUN: %clang_cc1 -std=c++17 -triple x86_64-pc-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+struct some_struct {
+  int a;
+  float b;
+};
+
+float function() {
+  auto[a, b] = some_struct{1, 2.f};
+
+  return a + b;
+}
+
+// CIR-LABEL: cir.func dso_local @_Z8functionv() -> !cir.float
+// CIR:  %[[RETVAL:.+]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["__retval"]
+// CIR:  %[[STRUCT:.+]] = cir.alloca !rec_some_struct, !cir.ptr<!rec_some_struct>, [""]
+// CIR:  %[[MEMBER_A:.+]] = cir.get_member %[[STRUCT]][0] {name = "a"} : !cir.ptr<!rec_some_struct> -> !cir.ptr<!s32i>
+// CIR:  %[[LOAD_A:.+]] = cir.load align(4) %[[MEMBER_A]] : !cir.ptr<!s32i>, !s32i
+// CIR:  %[[CAST_A:.+]] = cir.cast(int_to_float, %[[LOAD_A]] : !s32i), !cir.float
+// CIR:  %[[MEMBER_B:.+]] = cir.get_member %[[STRUCT]][1] {name = "b"} : !cir.ptr<!rec_some_struct> -> !cir.ptr<!cir.float>
+// CIR:  %[[LOAD_B:.+]] = cir.load align(4) %[[MEMBER_B]] : !cir.ptr<!cir.float>, !cir.float
+// CIR:  %[[ADD:.+]] = cir.binop(add, %[[CAST_A]], %[[LOAD_B]]) : !cir.float
+// CIR:  cir.store %[[ADD]], %[[RETVAL]] : !cir.float, !cir.ptr<!cir.float>
+// CIR:  %[[RET:.+]] = cir.load %[[RETVAL]] : !cir.ptr<!cir.float>, !cir.float
+// CIR:  cir.return %[[RET]] : !cir.float
+
+// LLVM-LABEL: define dso_local float @_Z8functionv()
+// LLVM:  %[[RETVAL:.+]] = alloca float, i64 1
+// LLVM:  %[[STRUCT:.+]] = alloca %struct.some_struct, i64 1
+// LLVM:  %[[GEP_A:.+]] = getelementptr %struct.some_struct, ptr %[[STRUCT]], i32 0, i32 0
+// LLVM:  %[[LOAD_A:.+]] = load i32, ptr %[[GEP_A]]
+// LLVM:  %[[CAST_A:.+]] = sitofp i32 %[[LOAD_A]] to float
+// LLVM:  %[[GEP_B:.+]] = getelementptr %struct.some_struct, ptr %[[STRUCT]], i32 0, i32 1
+// LLVM:  %[[LOAD_B:.+]] = load float, ptr %[[GEP_B]]
+// LLVM:  %[[ADD:.+]] = fadd float %[[CAST_A]], %[[LOAD_B]]
+// LLVM:  store float %[[ADD]], ptr %[[RETVAL]]
+// LLVM:  %[[RET:.+]] = load float, ptr %[[RETVAL]]
+// LLVM:  ret float %[[RET]]
+
+// OGCG: @__const._Z8functionv.{{.*}} = private unnamed_addr constant %struct.some_struct { i32 1, float 2.000000e+00 }
+// OGCG-LABEL: define dso_local noundef float @_Z8functionv()
+// OGCG:  %[[STRUCT:.+]] = alloca %struct.some_struct
+// OGCG:  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %[[STRUCT]], ptr align 4 @__const._Z8functionv.{{.*}}, i64 8, i1 false)
+// OGCG:  %[[GEP_A:.+]] = getelementptr inbounds nuw %struct.some_struct, ptr %[[STRUCT]], i32 0, i32 0
+// OGCG:  %[[LOAD_A:.+]] = load i32, ptr %[[GEP_A]]
+// OGCG:  %[[CAST_A:.+]] = sitofp i32 %[[LOAD_A]] to float
+// OGCG:  %[[GEP_B:.+]] = getelementptr inbounds nuw %struct.some_struct, ptr %[[STRUCT]], i32 0, i32 1
+// OGCG:  %[[LOAD_B:.+]] = load float, ptr %[[GEP_B]]
+// OGCG:  %[[ADD:.+]] = fadd float %[[CAST_A]], %[[LOAD_B]]
+// OGCG:  ret float %[[ADD]]
diff --git a/clang/test/CIR/IR/array-dtor.cir b/clang/test/CIR/IR/array-dtor.cir
new file mode 100644
index 0000000..6d08d16
--- /dev/null
+++ b/clang/test/CIR/IR/array-dtor.cir
@@ -0,0 +1,28 @@
+// RUN: cir-opt %s | FileCheck %s
+
+!u8i = !cir.int<u, 8>
+!rec_S = !cir.record<struct "S" padded {!u8i}>
+
+module {
+  cir.func private @_ZN1SD1Ev(!cir.ptr<!rec_S>)
+  cir.func dso_local @_Z3foov() {
+    %0 = cir.alloca !cir.array<!rec_S x 42>, !cir.ptr<!cir.array<!rec_S x 42>>, ["s", init] {alignment = 16 : i64}
+    cir.array.dtor %0 : !cir.ptr<!cir.array<!rec_S x 42>> {
+    ^bb0(%arg0: !cir.ptr<!rec_S>):
+      cir.call @_ZN1SD1Ev(%arg0) : (!cir.ptr<!rec_S>) -> ()
+      cir.yield
+    }
+    cir.return
+  }
+
+  // CHECK: cir.func private @_ZN1SD1Ev(!cir.ptr<!rec_S>)
+  // CHECK: cir.func dso_local @_Z3foov() {
+  // CHECK:   %0 = cir.alloca !cir.array<!rec_S x 42>, !cir.ptr<!cir.array<!rec_S x 42>>, ["s", init] {alignment = 16 : i64}
+  // CHECK:   cir.array.dtor %0 : !cir.ptr<!cir.array<!rec_S x 42>> {
+  // CHECK:   ^bb0(%arg0: !cir.ptr<!rec_S>):
+  // CHECK:     cir.call @_ZN1SD1Ev(%arg0) : (!cir.ptr<!rec_S>) -> ()
+  // CHECK:     cir.yield
+  // CHECK:   }
+  // CHECK:   cir.return
+  // CHECK: }
+}
diff --git a/clang/test/CIR/IR/func.cir b/clang/test/CIR/IR/func.cir
index 5636194..0e9a92f 100644
--- a/clang/test/CIR/IR/func.cir
+++ b/clang/test/CIR/IR/func.cir
@@ -14,6 +14,14 @@ cir.func @empty() {
 // CHECK:   cir.return
 // CHECK: }
 
+// void empty() { }
+cir.func no_proto @noProto() {
+  cir.return
+}
+// CHECK: cir.func no_proto @noProto() {
+// CHECK:   cir.return
+// CHECK: }
+
 // void voidret() { return; }
 cir.func @voidret() {
   cir.return
diff --git a/clang/test/CIR/Transforms/bit.cir b/clang/test/CIR/Transforms/bit.cir
index fc27adb..11f47aa 100644
--- a/clang/test/CIR/Transforms/bit.cir
+++ b/clang/test/CIR/Transforms/bit.cir
@@ -75,6 +75,27 @@ module {
   // CHECK-NEXT:    cir.return %[[R]] : !u32i
   // CHECK-NEXT:  }
 
+  cir.func @fold_ffs() -> !s32i {
+    // 40 is 0b0010_1000
+    %0 = cir.const #cir.int<40> : !s32i
+    %1 = cir.ffs %0 : !s32i
+    cir.return %1 : !s32i
+  }
+  // CHECK-LABEL: @fold_ffs
+  // CHECK-NEXT:    %[[R:.+]] = cir.const #cir.int<4> : !s32i
+  // CHECK-NEXT:    cir.return %[[R]] : !s32i
+  // CHECK-NEXT:  }
+
+  cir.func @fold_ffs_zero() -> !s32i {
+    %0 = cir.const #cir.int<0> : !s32i
+    %1 = cir.ffs %0 : !s32i
+    cir.return %1 : !s32i
+  }
+  // CHECK-LABEL: @fold_ffs_zero
+  // CHECK-NEXT:    %[[R:.+]] = cir.const #cir.int<0> : !s32i
+  // CHECK-NEXT:    cir.return %[[R]] : !s32i
+  // CHECK-NEXT:  }
+
   cir.func @fold_parity() -> !u32i {
     // 0xdeadbeef is 0b1101_1110_1010_1101_1011_1110_1110_1111
     // 0xdeadbeef contains 24 ones
diff --git a/clang/test/CXX/drs/cwg18xx.cpp b/clang/test/CXX/drs/cwg18xx.cpp
index 9948075..5b4551b 100644
--- a/clang/test/CXX/drs/cwg18xx.cpp
+++ b/clang/test/CXX/drs/cwg18xx.cpp
@@ -564,12 +564,11 @@ struct A {
 namespace ex2 {
 #if __cplusplus >= 201103L
 struct Bar {
-  struct Baz { // #cwg1890-Baz
+  struct Baz {
     int a = 0;
   };
   static_assert(__is_constructible(Baz), "");
   // since-cxx11-error@-1 {{static assertion failed due to requirement '__is_constructible(cwg1890::ex2::Bar::Baz)'}}
-  // since-cxx11-note@#cwg1890-Baz {{'Baz' defined here}}
 };
 #endif
 } // namespace ex2
diff --git a/clang/test/CXX/expr/expr.const/p2-0x.cpp b/clang/test/CXX/expr/expr.const/p2-0x.cpp
index 910c863..8401d30 100644
--- a/clang/test/CXX/expr/expr.const/p2-0x.cpp
+++ b/clang/test/CXX/expr/expr.const/p2-0x.cpp
@@ -438,6 +438,11 @@ namespace ReinterpretCast {
   struct U {
     int m : (long)(S*)6; // expected-warning {{constant expression}} expected-note {{reinterpret_cast}}
   };
+  void f();
+  constexpr void* fp1 = (void*)f; // expected-error {{constant expression}} expected-note {{reinterpret_cast}}
+  constexpr int* fp2 = (int*)f; // expected-error {{constant expression}} expected-note {{reinterpret_cast}}
+  constexpr int (*fp3)() = (int(*)())f; // expected-error {{constant expression}} expected-note {{reinterpret_cast}}
+  constexpr int (&fp4)() = (int(&)())f; // expected-error {{constant expression}} expected-note {{reinterpret_cast}}
 }
 
 // - a pseudo-destructor call (5.2.4);
diff --git a/clang/test/CodeCompletion/skip-explicit-object-parameter.cpp b/clang/test/CodeCompletion/skip-explicit-object-parameter.cpp
index 55c16bb..587d6cb 100644
--- a/clang/test/CodeCompletion/skip-explicit-object-parameter.cpp
+++ b/clang/test/CodeCompletion/skip-explicit-object-parameter.cpp
@@ -1,14 +1,48 @@
 struct A {
-  void foo(this A self, int arg);
+  void foo(this auto&& self, int arg);
+  void bar(this A self, int arg);
 };
 
-int main() {
+int func1() {
   A a {};
   a.
 }
-// RUN: %clang_cc1 -cc1 -fsyntax-only -code-completion-at=%s:%(line-2):5 -std=c++23 %s | FileCheck %s
-// CHECK: COMPLETION: A : A::
-// CHECK-NEXT: COMPLETION: foo : [#void#]foo(<#int arg#>)
-// CHECK-NEXT: COMPLETION: operator= : [#A &#]operator=(<#const A &#>)
-// CHECK-NEXT: COMPLETION: operator= : [#A &#]operator=(<#A &&#>)
-// CHECK-NEXT: COMPLETION: ~A : [#void#]~A()
+// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:%(line-2):5 -std=c++23 %s | FileCheck -check-prefix=CHECK-CC1 %s
+// CHECK-CC1: COMPLETION: A : A::
+// CHECK-NEXT-CC1: COMPLETION: bar : [#void#]bar(<#int arg#>)
+// CHECK-NEXT-CC1: COMPLETION: foo : [#void#]foo(<#int arg#>)
+// CHECK-NEXT-CC1: COMPLETION: operator= : [#A &#]operator=(<#const A &#>)
+// CHECK-NEXT-CC1: COMPLETION: operator= : [#A &#]operator=(<#A &&#>)
+// CHECK-NEXT-CC1: COMPLETION: ~A : [#void#]~A()
+
+struct B {
+  template <typename T>
+  void foo(this T&& self, int arg);
+};
+
+int func2() {
+  B b {};
+  b.foo();
+}
+// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:%(line-2):9 -std=c++23 %s | FileCheck -check-prefix=CHECK-CC2 %s
+// CHECK-CC2: OVERLOAD: [#void#]foo(int arg)
+
+// TODO: llvm/llvm-project/146649
+// This is incorrect behavior. Correct Result should be a variant of, 
+// CC3: should be something like [#void#]foo(<#A self#>, <#int arg#>)
+// CC4: should be something like [#void#]bar(<#A self#>, <#int arg#>)
+int func3() {
+  (&A::foo)
+  (&A::bar)
+}
+// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:%(line-3):10 -std=c++23 %s | FileCheck -check-prefix=CHECK-CC3 %s
+// CHECK-CC3: COMPLETION: foo : [#void#]foo<<#class self:auto#>>(<#int arg#>)
+// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:%(line-4):10 -std=c++23 %s | FileCheck -check-prefix=CHECK-CC4 %s
+// CHECK-CC4: COMPLETION: bar : [#void#]bar(<#int arg#>)
+
+int func4() {
+  // TODO (&A::foo)(
+  (&A::bar)(
+}
+// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:%(line-2):13 -std=c++23 %s | FileCheck -check-prefix=CHECK-CC5 %s
+// CHECK-CC5: OVERLOAD: [#void#](<#A#>, int)
diff --git a/clang/test/CodeGen/AArch64/neon-scalar-copy.c b/clang/test/CodeGen/AArch64/neon-scalar-copy.c
index 4ad1ce5..bd80068 100644
--- a/clang/test/CodeGen/AArch64/neon-scalar-copy.c
+++ b/clang/test/CodeGen/AArch64/neon-scalar-copy.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -flax-vector-conversions=none\
 // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
 
 // REQUIRES: aarch64-registered-target || arm-registered-target
diff --git a/clang/test/CodeGen/AArch64/neon-vget.c b/clang/test/CodeGen/AArch64/neon-vget.c
index b17a7ab..ebc8c2f 100644
--- a/clang/test/CodeGen/AArch64/neon-vget.c
+++ b/clang/test/CodeGen/AArch64/neon-vget.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple arm64-apple-darwin -target-feature +neon \
+// RUN: %clang_cc1 -triple arm64-apple-darwin -target-feature +neon -flax-vector-conversions=none \
 // RUN:   -disable-O0-optnone -emit-llvm -o - %s \
 // RUN: | opt -S -passes=mem2reg | FileCheck %s
 
diff --git a/clang/test/CodeGen/AArch64/poly64.c b/clang/test/CodeGen/AArch64/poly64.c
index 578dd20..00838b6 100644
--- a/clang/test/CodeGen/AArch64/poly64.c
+++ b/clang/test/CodeGen/AArch64/poly64.c
@@ -1,5 +1,5 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
-// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
+// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -flax-vector-conversions=none\
 // RUN:  -ffp-contract=fast -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa \
 // RUN:  | FileCheck %s
 
diff --git a/clang/test/CodeGen/X86/avx10_2_512ni-builtins.c b/clang/test/CodeGen/X86/avx10_2_512ni-builtins.c
index 26e0d12..d143188 100644
--- a/clang/test/CodeGen/X86/avx10_2_512ni-builtins.c
+++ b/clang/test/CodeGen/X86/avx10_2_512ni-builtins.c
@@ -187,12 +187,12 @@ __m512i test_mm512_mask_dpwsud_epi32(__m512i __A, __mmask16 __B, __m512i __C, __
   return _mm512_mask_dpwsud_epi32(__A, __B, __C, __D);
 }
 
-__m512i test_mm512_maskz_dpwsud_epi32(__m512i __A, __mmask16 __B, __m512i __C, __m512i __D) {
+__m512i test_mm512_maskz_dpwsud_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
 // CHECK-LABEL: @test_mm512_maskz_dpwsud_epi32(
 // CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwsud.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
 // CHECK: zeroinitializer
 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
-  return _mm512_maskz_dpwsud_epi32(__A, __B, __C, __D);
+  return _mm512_maskz_dpwsud_epi32(__U, __A, __B, __C);
 }
 
 __m512i test_mm512_dpwsuds_epi32(__m512i __A, __m512i __B, __m512i __C) {
@@ -208,12 +208,12 @@ __m512i test_mm512_mask_dpwsuds_epi32(__m512i __A, __mmask16 __B, __m512i __C, _
   return _mm512_mask_dpwsuds_epi32(__A, __B, __C, __D);
 }
 
-__m512i test_mm512_maskz_dpwsuds_epi32(__m512i __A, __mmask16 __B, __m512i __C, __m512i __D) {
+__m512i test_mm512_maskz_dpwsuds_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
 // CHECK-LABEL: @test_mm512_maskz_dpwsuds_epi32(
 // CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwsuds.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
 // CHECK: zeroinitializer
 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
-  return _mm512_maskz_dpwsuds_epi32(__A, __B, __C, __D);
+  return _mm512_maskz_dpwsuds_epi32(__U, __A, __B, __C);
 }
 
 __m512i test_mm512_dpwusd_epi32(__m512i __A, __m512i __B, __m512i __C) {
@@ -229,12 +229,12 @@ __m512i test_mm512_mask_dpwusd_epi32(__m512i __A, __mmask16 __B, __m512i __C, __
   return _mm512_mask_dpwusd_epi32(__A, __B, __C, __D);
 }
 
-__m512i test_mm512_maskz_dpwusd_epi32(__m512i __A, __mmask16 __B, __m512i __C, __m512i __D) {
+__m512i test_mm512_maskz_dpwusd_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
 // CHECK-LABEL: @test_mm512_maskz_dpwusd_epi32(
 // CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwusd.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
 // CHECK: zeroinitializer
 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
-  return _mm512_maskz_dpwusd_epi32(__A, __B, __C, __D);
+  return _mm512_maskz_dpwusd_epi32(__U, __A, __B, __C);
 }
 
 __m512i test_mm512_dpwusds_epi32(__m512i __A, __m512i __B, __m512i __C) {
@@ -250,12 +250,12 @@ __m512i test_mm512_mask_dpwusds_epi32(__m512i __A, __mmask16 __B, __m512i __C, _
   return _mm512_mask_dpwusds_epi32(__A, __B, __C, __D);
 }
 
-__m512i test_mm512_maskz_dpwusds_epi32(__m512i __A, __mmask16 __B, __m512i __C, __m512i __D) {
+__m512i test_mm512_maskz_dpwusds_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
 // CHECK-LABEL: @test_mm512_maskz_dpwusds_epi32(
 // CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwusds.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
 // CHECK: zeroinitializer
 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
-  return _mm512_maskz_dpwusds_epi32(__A, __B, __C, __D);
+  return _mm512_maskz_dpwusds_epi32(__U, __A, __B, __C);
 }
 
 __m512i test_mm512_dpwuud_epi32(__m512i __A, __m512i __B, __m512i __C) {
@@ -271,12 +271,12 @@ __m512i test_mm512_mask_dpwuud_epi32(__m512i __A, __mmask16 __B, __m512i __C, __
   return _mm512_mask_dpwuud_epi32(__A, __B, __C, __D);
 }
 
-__m512i test_mm512_maskz_dpwuud_epi32(__m512i __A, __mmask16 __B, __m512i __C, __m512i __D) {
+__m512i test_mm512_maskz_dpwuud_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
 // CHECK-LABEL: @test_mm512_maskz_dpwuud_epi32(
 // CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwuud.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
 // CHECK: zeroinitializer
 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
-  return _mm512_maskz_dpwuud_epi32(__A, __B, __C, __D);
+  return _mm512_maskz_dpwuud_epi32(__U, __A, __B, __C);
 }
 
 __m512i test_mm512_dpwuuds_epi32(__m512i __A, __m512i __B, __m512i __C) {
@@ -292,10 +292,10 @@ __m512i test_mm512_mask_dpwuuds_epi32(__m512i __A, __mmask16 __B, __m512i __C, _
   return _mm512_mask_dpwuuds_epi32(__A, __B, __C, __D);
 }
 
-__m512i test_mm512_maskz_dpwuuds_epi32(__m512i __A, __mmask16 __B, __m512i __C, __m512i __D) {
+__m512i test_mm512_maskz_dpwuuds_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
 // CHECK-LABEL: @test_mm512_maskz_dpwuuds_epi32(
 // CHECK: call <16 x i32> @llvm.x86.avx10.vpdpwuuds.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
 // CHECK: zeroinitializer
 // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
-  return _mm512_maskz_dpwuuds_epi32(__A, __B, __C, __D);
+  return _mm512_maskz_dpwuuds_epi32(__U, __A, __B, __C);
 }
diff --git a/clang/test/CodeGen/X86/avx10_2ni-builtins.c b/clang/test/CodeGen/X86/avx10_2ni-builtins.c
index 936be27..b4b12c9 100644
--- a/clang/test/CodeGen/X86/avx10_2ni-builtins.c
+++ b/clang/test/CodeGen/X86/avx10_2ni-builtins.c
@@ -264,11 +264,11 @@ __m128i test_mm_mask_dpwsud_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128
   return _mm_mask_dpwsud_epi32(__A, __B, __C, __D);
 }
 
-__m128i test_mm_maskz_dpwsud_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
+__m128i test_mm_maskz_dpwsud_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
 // CHECK-LABEL: @test_mm_maskz_dpwsud_epi32(
 // CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwsud.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
 // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
-  return _mm_maskz_dpwsud_epi32(__A, __B, __C, __D);
+  return _mm_maskz_dpwsud_epi32(__U, __A, __B, __C);
 }
 
 __m256i test_mm256_mask_dpwsud_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
@@ -278,11 +278,11 @@ __m256i test_mm256_mask_dpwsud_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m
   return _mm256_mask_dpwsud_epi32(__A, __B, __C, __D);
 }
 
-__m256i test_mm256_maskz_dpwsud_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
+__m256i test_mm256_maskz_dpwsud_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
 // CHECK-LABEL: @test_mm256_maskz_dpwsud_epi32(
 // CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwsud.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
 // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
-  return _mm256_maskz_dpwsud_epi32(__A, __B, __C, __D);
+  return _mm256_maskz_dpwsud_epi32(__U, __A, __B, __C);
 }
 
 __m128i test_mm_mask_dpwsuds_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
@@ -292,11 +292,11 @@ __m128i test_mm_mask_dpwsuds_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m12
   return _mm_mask_dpwsuds_epi32(__A, __B, __C, __D);
 }
 
-__m128i test_mm_maskz_dpwsuds_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
+__m128i test_mm_maskz_dpwsuds_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
 // CHECK-LABEL: @test_mm_maskz_dpwsuds_epi32(
 // CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwsuds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
 // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
-  return _mm_maskz_dpwsuds_epi32(__A, __B, __C, __D);
+  return _mm_maskz_dpwsuds_epi32(__U, __A, __B, __C);
 }
 
 __m256i test_mm256_mask_dpwsuds_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
@@ -306,11 +306,11 @@ __m256i test_mm256_mask_dpwsuds_epi32(__m256i __A, __mmask8 __B, __m256i __C, __
   return _mm256_mask_dpwsuds_epi32(__A, __B, __C, __D);
 }
 
-__m256i test_mm256_maskz_dpwsuds_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
+__m256i test_mm256_maskz_dpwsuds_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
 // CHECK-LABEL: @test_mm256_maskz_dpwsuds_epi32(
 // CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwsuds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
 // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
-  return _mm256_maskz_dpwsuds_epi32(__A, __B, __C, __D);
+  return _mm256_maskz_dpwsuds_epi32(__U, __A, __B, __C);
 }
 
 __m128i test_mm_mask_dpwusd_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
@@ -320,11 +320,11 @@ __m128i test_mm_mask_dpwusd_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128
   return _mm_mask_dpwusd_epi32(__A, __B, __C, __D);
 }
 
-__m128i test_mm_maskz_dpwusd_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
+__m128i test_mm_maskz_dpwusd_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
 // CHECK-LABEL: @test_mm_maskz_dpwusd_epi32(
 // CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwusd.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
 // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
-  return _mm_maskz_dpwusd_epi32(__A, __B, __C, __D);
+  return _mm_maskz_dpwusd_epi32(__U, __A, __B, __C);
 }
 
 __m256i test_mm256_mask_dpwusd_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
@@ -334,11 +334,11 @@ __m256i test_mm256_mask_dpwusd_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m
   return _mm256_mask_dpwusd_epi32(__A, __B, __C, __D);
 }
 
-__m256i test_mm256_maskz_dpwusd_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
+__m256i test_mm256_maskz_dpwusd_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
 // CHECK-LABEL: @test_mm256_maskz_dpwusd_epi32(
 // CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwusd.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
 // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
-  return _mm256_maskz_dpwusd_epi32(__A, __B, __C, __D);
+  return _mm256_maskz_dpwusd_epi32(__U, __A, __B, __C);
 }
 
 __m128i test_mm_mask_dpwusds_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
@@ -348,11 +348,11 @@ __m128i test_mm_mask_dpwusds_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m12
   return _mm_mask_dpwusds_epi32(__A, __B, __C, __D);
 }
 
-__m128i test_mm_maskz_dpwusds_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
+__m128i test_mm_maskz_dpwusds_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
 // CHECK-LABEL: @test_mm_maskz_dpwusds_epi32(
 // CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwusds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
 // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
-  return _mm_maskz_dpwusds_epi32(__A, __B, __C, __D);
+  return _mm_maskz_dpwusds_epi32(__U, __A, __B, __C);
 }
 
 __m256i test_mm256_mask_dpwusds_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
@@ -362,11 +362,11 @@ __m256i test_mm256_mask_dpwusds_epi32(__m256i __A, __mmask8 __B, __m256i __C, __
   return _mm256_mask_dpwusds_epi32(__A, __B, __C, __D);
 }
 
-__m256i test_mm256_maskz_dpwusds_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
+__m256i test_mm256_maskz_dpwusds_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
 // CHECK-LABEL: @test_mm256_maskz_dpwusds_epi32(
 // CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwusds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
 // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
-  return _mm256_maskz_dpwusds_epi32(__A, __B, __C, __D);
+  return _mm256_maskz_dpwusds_epi32(__U, __A, __B, __C);
 }
 
 __m128i test_mm_mask_dpwuud_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
@@ -376,11 +376,11 @@ __m128i test_mm_mask_dpwuud_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128
   return _mm_mask_dpwuud_epi32(__A, __B, __C, __D);
 }
 
-__m128i test_mm_maskz_dpwuud_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
+__m128i test_mm_maskz_dpwuud_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
 // CHECK-LABEL: @test_mm_maskz_dpwuud_epi32(
 // CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwuud.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
 // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
-  return _mm_maskz_dpwuud_epi32(__A, __B, __C, __D);
+  return _mm_maskz_dpwuud_epi32(__U, __A, __B, __C);
 }
 
 __m256i test_mm256_mask_dpwuud_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
@@ -390,11 +390,11 @@ __m256i test_mm256_mask_dpwuud_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m
   return _mm256_mask_dpwuud_epi32(__A, __B, __C, __D);
 }
 
-__m256i test_mm256_maskz_dpwuud_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
+__m256i test_mm256_maskz_dpwuud_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
 // CHECK-LABEL: @test_mm256_maskz_dpwuud_epi32(
 // CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwuud.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
 // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
-  return _mm256_maskz_dpwuud_epi32(__A, __B, __C, __D);
+  return _mm256_maskz_dpwuud_epi32(__U, __A, __B, __C);
 }
 
 __m128i test_mm_mask_dpwuuds_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
@@ -404,11 +404,11 @@ __m128i test_mm_mask_dpwuuds_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m12
   return _mm_mask_dpwuuds_epi32(__A, __B, __C, __D);
 }
 
-__m128i test_mm_maskz_dpwuuds_epi32(__m128i __A, __mmask8 __B, __m128i __C, __m128i __D) {
+__m128i test_mm_maskz_dpwuuds_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) {
 // CHECK-LABEL: @test_mm_maskz_dpwuuds_epi32(
 // CHECK: call <4 x i32> @llvm.x86.avx2.vpdpwuuds.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
 // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
-  return _mm_maskz_dpwuuds_epi32(__A, __B, __C, __D);
+  return _mm_maskz_dpwuuds_epi32(__U, __A, __B, __C);
 }
 
 __m256i test_mm256_mask_dpwuuds_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
@@ -418,9 +418,9 @@ __m256i test_mm256_mask_dpwuuds_epi32(__m256i __A, __mmask8 __B, __m256i __C, __
   return _mm256_mask_dpwuuds_epi32(__A, __B, __C, __D);
 }
 
-__m256i test_mm256_maskz_dpwuuds_epi32(__m256i __A, __mmask8 __B, __m256i __C, __m256i __D) {
+__m256i test_mm256_maskz_dpwuuds_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) {
 // CHECK-LABEL: @test_mm256_maskz_dpwuuds_epi32(
 // CHECK: call <8 x i32> @llvm.x86.avx2.vpdpwuuds.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
 // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
-  return _mm256_maskz_dpwuuds_epi32(__A, __B, __C, __D);
+  return _mm256_maskz_dpwuuds_epi32(__U, __A, __B, __C);
 }
diff --git a/clang/test/CodeGen/attr-counted-by-for-pointers.c b/clang/test/CodeGen/attr-counted-by-for-pointers.c
index 2407654..e939e49 100644
--- a/clang/test/CodeGen/attr-counted-by-for-pointers.c
+++ b/clang/test/CodeGen/attr-counted-by-for-pointers.c
@@ -471,3 +471,80 @@ size_t test9(struct annotated_sized_ptr *p, int index) {
 size_t test10(struct annotated_sized_ptr *p, int index) {
   return __bdos(&((unsigned int *) p->buf)[index]);
 }
+
+struct pr151236_struct {
+        int *a __counted_by(a_count);
+        short a_count;
+};
+
+// SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -262144, 262137) i64 @test11(
+// SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
+// SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i16, ptr [[COUNTED_BY_GEP]], align 4
+// SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp sgt i16 [[COUNTED_BY_LOAD]], -1
+// SANITIZE-WITH-ATTR-NEXT:    [[COUNT:%.*]] = sext i16 [[COUNTED_BY_LOAD]] to i64
+// SANITIZE-WITH-ATTR-NEXT:    [[ARRAY_SIZE:%.*]] = shl nsw i64 [[COUNT]], 3
+// SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = select i1 [[TMP0]], i64 [[ARRAY_SIZE]], i64 0
+// SANITIZE-WITH-ATTR-NEXT:    ret i64 [[TMP1]]
+//
+// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -262144, 262137) i64 @test11(
+// NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i16, ptr [[COUNTED_BY_GEP]], align 4
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNT:%.*]] = sext i16 [[COUNTED_BY_LOAD]] to i64
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAY_SIZE:%.*]] = shl nsw i64 [[COUNT]], 3
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp sgt i16 [[COUNTED_BY_LOAD]], -1
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = select i1 [[TMP0]], i64 [[ARRAY_SIZE]], i64 0
+// NO-SANITIZE-WITH-ATTR-NEXT:    ret i64 [[TMP1]]
+//
+// SANITIZE-WITHOUT-ATTR-LABEL: define dso_local range(i64 0, -1) i64 @test11(
+// SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -2
+//
+// NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local range(i64 0, -1) i64 @test11(
+// NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -2
+//
+size_t test11(struct pr151236_struct *p) {
+  return __bdos(p->a) + __bdos((p->a));
+}
+
+// SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -262144, 262137) i64 @test12(
+// SANITIZE-WITH-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// SANITIZE-WITH-ATTR-NEXT:  entry:
+// SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
+// SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i16, ptr [[COUNTED_BY_GEP]], align 4
+// SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp sgt i16 [[COUNTED_BY_LOAD]], -1
+// SANITIZE-WITH-ATTR-NEXT:    [[COUNT:%.*]] = sext i16 [[COUNTED_BY_LOAD]] to i64
+// SANITIZE-WITH-ATTR-NEXT:    [[ARRAY_SIZE:%.*]] = shl nsw i64 [[COUNT]], 3
+// SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = select i1 [[TMP0]], i64 [[ARRAY_SIZE]], i64 0
+// SANITIZE-WITH-ATTR-NEXT:    ret i64 [[TMP1]]
+//
+// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -262144, 262137) i64 @test12(
+// NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// NO-SANITIZE-WITH-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNTED_BY_LOAD:%.*]] = load i16, ptr [[COUNTED_BY_GEP]], align 4
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[COUNT:%.*]] = sext i16 [[COUNTED_BY_LOAD]] to i64
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[ARRAY_SIZE:%.*]] = shl nsw i64 [[COUNT]], 3
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP0:%.*]] = icmp sgt i16 [[COUNTED_BY_LOAD]], -1
+// NO-SANITIZE-WITH-ATTR-NEXT:    [[TMP1:%.*]] = select i1 [[TMP0]], i64 [[ARRAY_SIZE]], i64 0
+// NO-SANITIZE-WITH-ATTR-NEXT:    ret i64 [[TMP1]]
+//
+// SANITIZE-WITHOUT-ATTR-LABEL: define dso_local range(i64 0, -1) i64 @test12(
+// SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -2
+//
+// NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local range(i64 0, -1) i64 @test12(
+// NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readonly captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:  entry:
+// NO-SANITIZE-WITHOUT-ATTR-NEXT:    ret i64 -2
+//
+size_t test12(struct pr151236_struct *p) {
+  return __bdos(p->a) + __bdos(((int *)p->a));
+}
diff --git a/clang/test/CodeGen/debug-info-abspath.c b/clang/test/CodeGen/debug-info-abspath.c
index b2047a7..193a72c 100644
--- a/clang/test/CodeGen/debug-info-abspath.c
+++ b/clang/test/CodeGen/debug-info-abspath.c
@@ -2,20 +2,15 @@
 // RUN: cp %s %t/UNIQUEISH_SENTINEL/debug-info-abspath.c
 
 // RUN: %clang_cc1 -debug-info-kind=limited -triple %itanium_abi_triple \
+// RUN:   -fdebug-compilation-dir=%t/UNIQUEISH_SENTINEL/debug-info-abspath.c \
 // RUN:   %t/UNIQUEISH_SENTINEL/debug-info-abspath.c -emit-llvm -o - \
 // RUN:   | FileCheck %s
 
 // RUN: cp %s %t.c
 // RUN: %clang_cc1 -debug-info-kind=limited -triple %itanium_abi_triple \
+// RUN:   -fdebug-compilation-dir=%t \
 // RUN:   %t.c -emit-llvm -o - | FileCheck %s --check-prefix=INTREE
 
-// RUN: cd %t/UNIQUEISH_SENTINEL
-// RUN: %clang_cc1 -debug-info-kind=limited -triple %itanium_abi_triple \
-// RUN:   debug-info-abspath.c -emit-llvm -o - \
-// RUN:   | FileCheck %s --check-prefix=CURDIR
-// RUN: %clang_cc1 -debug-info-kind=limited -triple %itanium_abi_triple \
-// RUN:   %s -emit-llvm -o - | FileCheck %s --check-prefix=CURDIR
-
 void foo(void) {}
 
 // Since %s is an absolute path, directory should be the common
@@ -28,7 +23,3 @@ void foo(void) {}
 
 // INTREE: = distinct !DISubprogram({{.*}}![[SPFILE:[0-9]+]]
 // INTREE: DIFile({{.*}}directory: "{{.+}}CodeGen{{.*}}")
-
-// CURDIR: = distinct !DICompileUnit({{.*}}file: ![[CUFILE:[0-9]+]]
-// CURDIR: ![[CUFILE]] = !DIFile({{.*}}directory: "{{.+}}UNIQUEISH_SENTINEL")
-
diff --git a/clang/test/CodeGen/debug-info-compilation-dir.c b/clang/test/CodeGen/debug-info-compilation-dir.c
index b49a0f5..5f5542c 100644
--- a/clang/test/CodeGen/debug-info-compilation-dir.c
+++ b/clang/test/CodeGen/debug-info-compilation-dir.c
@@ -7,3 +7,10 @@
 // RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited %s -o - | FileCheck -check-prefix=CHECK-DIR %s
 // CHECK-DIR: CodeGen
 
+/// Test path remapping.
+// RUN: %clang_cc1 -fdebug-compilation-dir=%S -main-file-name %s -emit-llvm -debug-info-kind=limited %s -o - | FileCheck -check-prefix=CHECK-ABS %s
+// CHECK-ABS: DIFile(filename: "{{.*}}debug-info-compilation-dir.c", directory: "{{.*}}CodeGen")
+
+// RUN: %clang_cc1 -main-file-name %s -emit-llvm -debug-info-kind=limited %s -o - | FileCheck -check-prefix=CHECK-NOMAP %s
+// CHECK-NOMAP: DIFile(filename: "{{.*}}debug-info-compilation-dir.c", directory: "")
+
diff --git a/clang/test/CodeGen/debug-prefix-map.c b/clang/test/CodeGen/debug-prefix-map.c
index e242180..e58909f 100644
--- a/clang/test/CodeGen/debug-prefix-map.c
+++ b/clang/test/CodeGen/debug-prefix-map.c
@@ -12,6 +12,7 @@
 // RUN: rm -rf %t && mkdir -p %t/a/b && cp %s %t/a/b/c.c
 // RUN: %clang_cc1 -emit-llvm -debug-info-kind=standalone -I%S -fdebug-prefix-map=%t/a/b=y -fdebug-prefix-map=%t/a=x %t/a/b/c.c -o - | FileCheck %s --check-prefix=CHECK-X
 // RUN: %clang_cc1 -emit-llvm -debug-info-kind=standalone -I%S -fdebug-prefix-map=%t/a=x -fdebug-prefix-map=%t/a/b=y %t/a/b/c.c -o - | FileCheck %s --check-prefix=CHECK-Y
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=standalone -I%S -main-file-name %t/a/b/c.c -fdebug-compilation-dir=%t/a -fdebug-prefix-map=%t/a=x -fdebug-prefix-map=%t/a/b=y %t/a/b/c.c -o - | FileCheck %s --check-prefix=CHECK-REMAP-Y
 
 #include "Inputs/stdio.h"
 
@@ -26,9 +27,9 @@ void test_rewrite_includes(void) {
   vprintf("string", argp);
 }
 
-// CHECK-NO-MAIN-FILE-NAME: !DIFile(filename: "{{/|.:\\\\}}UNLIKELY_PATH{{/|\\\\}}empty{{/|\\\\}}<stdin>",
 // CHECK-NO-MAIN-FILE-NAME: !DIFile(filename: "{{/|.:\\\\}}UNLIKELY_PATH{{/|\\\\}}empty{{/|\\\\}}{{.*}}",
 // CHECK-NO-MAIN-FILE-NAME-SAME:    directory: "")
+// CHECK-NO-MAIN-FILE-NAME: !DIFile(filename: "{{/|.:\\\\}}UNLIKELY_PATH{{/|\\\\}}empty{{/|\\\\}}<stdin>",
 // CHECK-NO-MAIN-FILE-NAME: !DIFile(filename: "{{/|.:\\\\}}UNLIKELY_PATH{{/|\\\\}}empty{{/|\\\\}}Inputs{{/|\\\\}}stdio.h",
 // CHECK-NO-MAIN-FILE-NAME-SAME:    directory: "")
 // CHECK-NO-MAIN-FILE-NAME-NOT: !DIFile(filename:
@@ -54,3 +55,5 @@ void test_rewrite_includes(void) {
 
 // CHECK-X: !DIFile(filename: "x{{/|\\\\}}b{{/|\\\\}}c.c", directory: "")
 // CHECK-Y: !DIFile(filename: "y{{/|\\\\}}c.c", directory: "")
+
+// CHECK-REMAP-Y: !DIFile(filename: "y{{/|\\\\}}c.c", directory: "x")
diff --git a/clang/test/CodeGen/ms_struct-long-double.c b/clang/test/CodeGen/ms_struct-long-double.c
index 9b3ea79..eaab217 100644
--- a/clang/test/CodeGen/ms_struct-long-double.c
+++ b/clang/test/CodeGen/ms_struct-long-double.c
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -emit-llvm-only -triple i686-windows-gnu -fdump-record-layouts %s | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm-only -triple i686-windows-cygnus -fdump-record-layouts %s | FileCheck %s
 // RUN: %clang_cc1 -emit-llvm-only -triple i686-linux -fdump-record-layouts -Wno-incompatible-ms-struct %s | FileCheck %s
 // RUN: not %clang_cc1 -emit-llvm-only -triple i686-linux -fdump-record-layouts %s 2>&1 | FileCheck %s -check-prefix=ERROR
 
diff --git a/clang/test/CodeGenCXX/debug-info-function-context.cpp b/clang/test/CodeGenCXX/debug-info-function-context.cpp
index 63fdf87..29c87b6 100644
--- a/clang/test/CodeGenCXX/debug-info-function-context.cpp
+++ b/clang/test/CodeGenCXX/debug-info-function-context.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited -triple x86_64-pc-linux-gnu %s \
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited -triple x86_64-pc-linux-gnu %s -fdebug-compilation-dir=%S \
 // RUN:     -dwarf-version=5 -main-file-name debug-info-function-context.cpp  -o - | FileCheck %s
 
 struct C {
diff --git a/clang/test/CodeGenCXX/debug-info-gline-tables-only.cpp b/clang/test/CodeGenCXX/debug-info-gline-tables-only.cpp
index 9b86a49..192169b 100644
--- a/clang/test/CodeGenCXX/debug-info-gline-tables-only.cpp
+++ b/clang/test/CodeGenCXX/debug-info-gline-tables-only.cpp
@@ -14,9 +14,11 @@ class E : public C {
   // CHECK-NOT: DW_TAG_reference type
   void x(const D& d);
 };
+// CHECK-NOT: DW_TAG_structure_type
 struct F {
   enum X { };
   void func(X);
+  // CHECK-NOT: DW_TAG_member
   virtual ~F();
 };
 F::~F() {
diff --git a/clang/test/CodeGenCXX/difile_entry.cpp b/clang/test/CodeGenCXX/difile_entry.cpp
index 8bf6dc3..5fcd56e 100644
--- a/clang/test/CodeGenCXX/difile_entry.cpp
+++ b/clang/test/CodeGenCXX/difile_entry.cpp
@@ -3,7 +3,7 @@
 /// Test that we canonicalize the DIFile.
 // RUN: rm -rf %t && mkdir %t && cd %t
 // RUN: cp %s .
-// RUN: %clang_cc1 -triple %itanium_abi_triple -main-file-name difile_entry.cpp -debug-info-kind=limited %t/difile_entry.cpp -std=c++11 -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -triple %itanium_abi_triple -main-file-name difile_entry.cpp -fdebug-compilation-dir=%t -debug-info-kind=limited %t/difile_entry.cpp -std=c++11 -emit-llvm -o - | FileCheck %s
 int x();
 static int i = x();
 
diff --git a/clang/test/CodeGenCoroutines/coro-await.cpp b/clang/test/CodeGenCoroutines/coro-await.cpp
index 8dd53a7..7b7d141 100644
--- a/clang/test/CodeGenCoroutines/coro-await.cpp
+++ b/clang/test/CodeGenCoroutines/coro-await.cpp
@@ -100,7 +100,7 @@ extern "C" void f0() {
   // CHECK: call i8 @llvm.coro.suspend(token %[[FINALSP_ID]], i1 true)
 
   // Await suspend wrapper
-  // CHECK: define{{.*}} @f0.__await_suspend_wrapper__await(ptr {{[^,]*}} %[[AWAITABLE_ARG:.+]], ptr {{[^,]*}} %[[FRAME_ARG:.+]])
+  // CHECK: define internal {{.*}} @f0.__await_suspend_wrapper__await(ptr {{[^,]*}} %[[AWAITABLE_ARG:.+]], ptr {{[^,]*}} %[[FRAME_ARG:.+]])
   // CHECK: store ptr %[[AWAITABLE_ARG]], ptr %[[AWAITABLE_TMP:.+]],
   // CHECK: store ptr %[[FRAME_ARG]], ptr %[[FRAME_TMP:.+]],
   // CHECK: %[[AWAITABLE:.+]] = load ptr, ptr %[[AWAITABLE_TMP]]
@@ -162,7 +162,7 @@ extern "C" void f1(int) {
   // CHECK:     call void @_ZN13suspend_maybe12await_resumeEv(ptr {{[^,]*}} %[[AWAITABLE]])
 
   // Await suspend wrapper
-  // CHECK: define {{.*}} i1 @f1.__await_suspend_wrapper__yield(ptr {{[^,]*}} %[[AWAITABLE_ARG:.+]], ptr {{[^,]*}} %[[FRAME_ARG:.+]])
+  // CHECK: define internal {{.*}} i1 @f1.__await_suspend_wrapper__yield(ptr {{[^,]*}} %[[AWAITABLE_ARG:.+]], ptr {{[^,]*}} %[[FRAME_ARG:.+]])
   // CHECK: store ptr %[[AWAITABLE_ARG]], ptr %[[AWAITABLE_TMP:.+]],
   // CHECK: store ptr %[[FRAME_ARG]], ptr %[[FRAME_TMP:.+]],
   // CHECK: %[[AWAITABLE:.+]] = load ptr, ptr %[[AWAITABLE_TMP]]
@@ -379,7 +379,7 @@ extern "C" void TestTailcall() {
   // CHECK-NEXT: ]
 
   // Await suspend wrapper
-  // CHECK: define {{.*}} ptr @TestTailcall.__await_suspend_wrapper__await(ptr {{[^,]*}} %[[AWAITABLE_ARG:.+]], ptr {{[^,]*}} %[[FRAME_ARG:.+]])
+  // CHECK: define internal {{.*}} ptr @TestTailcall.__await_suspend_wrapper__await(ptr {{[^,]*}} %[[AWAITABLE_ARG:.+]], ptr {{[^,]*}} %[[FRAME_ARG:.+]])
   // CHECK: store ptr %[[AWAITABLE_ARG]], ptr %[[AWAITABLE_TMP:.+]],
   // CHECK: store ptr %[[FRAME_ARG]], ptr %[[FRAME_TMP:.+]],
   // CHECK: %[[AWAITABLE:.+]] = load ptr, ptr %[[AWAITABLE_TMP]]
diff --git a/clang/test/CodeGenHLSL/builtins/D3DCOLORtoUBYTE4.hlsl b/clang/test/CodeGenHLSL/builtins/D3DCOLORtoUBYTE4.hlsl
index 990f0aa..3c9e35a 100644
--- a/clang/test/CodeGenHLSL/builtins/D3DCOLORtoUBYTE4.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/D3DCOLORtoUBYTE4.hlsl
@@ -5,8 +5,16 @@
 // CHECK-LABEL: D3DCOLORtoUBYTE4
 int4 test_D3DCOLORtoUBYTE4(float4 p1) {
   // CHECK: %[[SCALED:.*]] = fmul [[FMFLAGS:.*]][[FLOAT_TYPE:<4 x float>]] %{{.*}}, splat (float 0x406FE01000000000)
-  // CHECK: %[[CONVERTED:.*]] = fptoui [[FLOAT_TYPE]] %[[SCALED]] to [[INT_TYPE:<4 x i32>]]
+  // CHECK: %[[CONVERTED:.*]] = fptosi [[FLOAT_TYPE]] %[[SCALED]] to [[INT_TYPE:<4 x i32>]]
   // CHECK: %[[SHUFFLED:.*]] = shufflevector [[INT_TYPE]] %[[CONVERTED]], [[INT_TYPE]] poison, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
   // CHECK: ret [[INT_TYPE]] %[[SHUFFLED]]
   return D3DCOLORtoUBYTE4(p1);
 }
+
+// Note this test confirms issue 150673 is fixed 
+// by confirming the negative does not become a poison
+// CHECK-LABEL: test_constant_inputs
+int4 test_constant_inputs() {
+  // CHECK: ret <4 x i32> <i32 -12877, i32 2833, i32 0, i32 25500>
+  return D3DCOLORtoUBYTE4(float4(0, 11.11, -50.5, 100));
+}
diff --git a/clang/test/CodeGenObjC/exceptions.m b/clang/test/CodeGenObjC/exceptions.m
index 1546ed2..832d3a45 100644
--- a/clang/test/CodeGenObjC/exceptions.m
+++ b/clang/test/CodeGenObjC/exceptions.m
@@ -144,18 +144,17 @@ void f4(void) {
   // CHECK-NEXT: br label
   //   -> rethrow
 
-  // finally.call-exit:  Predecessors are the @try and @catch fallthroughs
-  // as well as the no-match case in the catch mechanism.  The i1 is whether
-  // to rethrow and should be true only in the last case.
-  // CHECK:      phi ptr
-  // CHECK-NEXT: phi i1
-  // CHECK-NEXT: call void @objc_exception_try_exit(ptr nonnull [[EXNDATA]])
+  // finally.call-exit:  Predecessor is the no-match case in the catch mechanism
+  // which rethrows.
+  // CHECK:      call void @objc_exception_try_exit(ptr nonnull [[EXNDATA]])
   // CHECK-NEXT: call void @f4_help(i32 noundef 2)
-  // CHECK-NEXT: br i1
-  //   -> ret, rethrow
+  // CHECK-NEXT: br label
+  //   -> rethrow
 
-  // ret:
-  // CHECK:      ret void
+  // finally.end.critedge:  Predecessors are the @try and @catch fallthroughs.
+  // CHECK:      call void @objc_exception_try_exit(ptr nonnull [[EXNDATA]])
+  // CHECK-NEXT: call void @f4_help(i32 noundef 2)
+  // CHECK-NEXT: ret void
 
   // Catch mechanism:
   // CHECK:      call ptr @objc_exception_extract(ptr nonnull [[EXNDATA]])
diff --git a/clang/test/CodeGenOpenCL/amdgpu-features-readonly.cl b/clang/test/CodeGenOpenCL/amdgpu-features-readonly.cl
index d23e6f2..5b76cff 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-features-readonly.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-features-readonly.cl
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple amdgcn -target-feature +gws -o /dev/null %s 2>&1 \
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx942 -target-feature +gws -o /dev/null %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=GWS %s
 
 // GWS: warning: feature flag '+gws' is ignored since the feature is read only [-Winvalid-command-line-argument]
diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl
index e96dd66..efd70a9 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -108,7 +108,7 @@
 // GFX1153: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
 // GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
 // GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
-// GFX1250: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf16-trans-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32
+// GFX1250: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32
 
 // GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64"
 
diff --git a/clang/test/CodeGenOpenCL/amdgpu-readonly-features-written-with-no-target.cl b/clang/test/CodeGenOpenCL/amdgpu-readonly-features-written-with-no-target.cl
new file mode 100644
index 0000000..1542efa
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/amdgpu-readonly-features-written-with-no-target.cl
@@ -0,0 +1,16 @@
+// REQUIRES: amdgpu-registered-target
+
+// Check the readonly feature will can be written to the IR
+// if there is no target specified.
+
+// RUN: %clang_cc1 -triple amdgcn -emit-llvm -o - %s | FileCheck --check-prefix=NOCPU %s
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx942 -emit-llvm -o - %s | FileCheck --check-prefix=GFX942 %s
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1100 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1100 %s
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1200 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1200 %s
+
+__attribute__((target("gws,image-insts,vmem-to-lds-load-insts"))) void test() {}
+
+// NOCPU: "target-features"="+gws,+image-insts,+vmem-to-lds-load-insts"
+// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
+// GFX1100: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
+// GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32"
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-async-load-store-lds.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-async-load-store-lds.cl
index e3fe31f..ccc05f0 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-async-load-store-lds.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-async-load-store-lds.cl
@@ -2,6 +2,89 @@
 // REQUIRES: amdgpu-registered-target
 // RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1250 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX1250
 
+typedef int    v2i   __attribute__((ext_vector_type(2)));
+typedef int    v4i   __attribute__((ext_vector_type(4)));
+
+// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_async_to_lds_b8(
+// CHECK-GFX1250-NEXT:  entry:
+// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.global.load.async.to.lds.b8(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0)
+// CHECK-GFX1250-NEXT:    ret void
+//
+void test_amdgcn_global_load_async_to_lds_b8( global char* gaddr, local char* laddr)
+{
+  __builtin_amdgcn_global_load_async_to_lds_b8(gaddr, laddr, 16, 0);
+}
+
+// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_async_to_lds_b32(
+// CHECK-GFX1250-NEXT:  entry:
+// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.global.load.async.to.lds.b32(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0)
+// CHECK-GFX1250-NEXT:    ret void
+//
+void test_amdgcn_global_load_async_to_lds_b32(global int* gaddr, local int* laddr)
+{
+  __builtin_amdgcn_global_load_async_to_lds_b32(gaddr, laddr, 16, 0);
+}
+
+// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_async_to_lds_b64(
+// CHECK-GFX1250-NEXT:  entry:
+// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.global.load.async.to.lds.b64(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0)
+// CHECK-GFX1250-NEXT:    ret void
+//
+void test_amdgcn_global_load_async_to_lds_b64(global v2i* gaddr, local v2i* laddr)
+{
+  __builtin_amdgcn_global_load_async_to_lds_b64(gaddr, laddr, 16, 0);
+}
+
+// CHECK-GFX1250-LABEL: @test_amdgcn_global_load_async_to_lds_b128(
+// CHECK-GFX1250-NEXT:  entry:
+// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.global.load.async.to.lds.b128(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0)
+// CHECK-GFX1250-NEXT:    ret void
+//
+void test_amdgcn_global_load_async_to_lds_b128( global v4i* gaddr, local v4i* laddr)
+{
+  __builtin_amdgcn_global_load_async_to_lds_b128(gaddr, laddr, 16, 0);
+}
+
+// CHECK-GFX1250-LABEL: @test_amdgcn_global_store_async_from_lds_b8(
+// CHECK-GFX1250-NEXT:  entry:
+// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.global.store.async.from.lds.b8(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0)
+// CHECK-GFX1250-NEXT:    ret void
+//
+void test_amdgcn_global_store_async_from_lds_b8(global char* gaddr, local char* laddr)
+{
+  __builtin_amdgcn_global_store_async_from_lds_b8(gaddr, laddr, 16, 0);
+}
+
+// CHECK-GFX1250-LABEL: @test_amdgcn_global_store_async_from_lds_b32(
+// CHECK-GFX1250-NEXT:  entry:
+// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.global.store.async.from.lds.b32(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0)
+// CHECK-GFX1250-NEXT:    ret void
+//
+void test_amdgcn_global_store_async_from_lds_b32(global int* gaddr, local int* laddr)
+{
+  __builtin_amdgcn_global_store_async_from_lds_b32(gaddr, laddr, 16, 0);
+}
+
+// CHECK-GFX1250-LABEL: @test_amdgcn_global_store_async_from_lds_b64(
+// CHECK-GFX1250-NEXT:  entry:
+// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.global.store.async.from.lds.b64(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0)
+// CHECK-GFX1250-NEXT:    ret void
+//
+void test_amdgcn_global_store_async_from_lds_b64(global v2i* gaddr, local v2i* laddr)
+{
+  __builtin_amdgcn_global_store_async_from_lds_b64(gaddr, laddr, 16, 0);
+}
+
+// CHECK-GFX1250-LABEL: @test_amdgcn_global_store_async_from_lds_b128(
+// CHECK-GFX1250-NEXT:  entry:
+// CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.global.store.async.from.lds.b128(ptr addrspace(1) [[GADDR:%.*]], ptr addrspace(3) [[LADDR:%.*]], i32 16, i32 0)
+// CHECK-GFX1250-NEXT:    ret void
+//
+void test_amdgcn_global_store_async_from_lds_b128(global v4i* gaddr, local v4i* laddr)
+{
+  __builtin_amdgcn_global_store_async_from_lds_b128(gaddr, laddr, 16, 0);
+}
+
 // CHECK-GFX1250-LABEL: @test_amdgcn_ds_atomic_async_barrier_arrive_b64(
 // CHECK-GFX1250-NEXT:  entry:
 // CHECK-GFX1250-NEXT:    tail call void @llvm.amdgcn.ds.atomic.async.barrier.arrive.b64(ptr addrspace(3) [[ADDR:%.*]])
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
index 81f39f9..51ab970 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
@@ -7,7 +7,20 @@
 typedef unsigned int uint;
 typedef unsigned short int ushort;
 typedef unsigned int __attribute__((ext_vector_type(2))) uint2;
+typedef unsigned int __attribute__((ext_vector_type(3))) uint3;
+typedef unsigned int __attribute__((ext_vector_type(4))) uint4;
+typedef __bf16 __attribute__((ext_vector_type(2))) bfloat2;
+typedef __bf16 __attribute__((ext_vector_type(8))) bfloat8;
+typedef __bf16 __attribute__((ext_vector_type(16))) bfloat16;
+typedef __bf16 __attribute__((ext_vector_type(32))) bfloat32;
 typedef half __attribute__((ext_vector_type(2))) half2;
+typedef half __attribute__((ext_vector_type(8))) half8;
+typedef half __attribute__((ext_vector_type(16))) half16;
+typedef half __attribute__((ext_vector_type(32))) half32;
+typedef float __attribute__((ext_vector_type(8))) float8;
+typedef float __attribute__((ext_vector_type(16))) float16;
+typedef float __attribute__((ext_vector_type(32))) float32;
+typedef short __attribute__((ext_vector_type(2))) short2;
 
 // CHECK-LABEL: @test_setprio_inc_wg(
 // CHECK-NEXT:  entry:
@@ -254,6 +267,60 @@ void test_cos_bf16(global __bf16* out, __bf16 a)
   *out = __builtin_amdgcn_cos_bf16(a);
 }
 
+// CHECK-LABEL: @test_cvt_sr_pk_bf16_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT:    [[SR_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:    [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
+// CHECK-NEXT:    [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT:    [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK-NEXT:    [[SR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SR_ADDR]] to ptr
+// CHECK-NEXT:    store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store float [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    store float [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    store i32 [[SR:%.*]], ptr [[SR_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[B_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[SR_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = call <2 x bfloat> @llvm.amdgcn.cvt.sr.pk.bf16.f32(float [[TMP0]], float [[TMP1]], i32 [[TMP2]])
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store <2 x bfloat> [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
+// CHECK-NEXT:    ret void
+//
+void test_cvt_sr_pk_bf16_f32(global bfloat2* out, float a, float b, uint sr)
+{
+  *out = __builtin_amdgcn_cvt_sr_pk_bf16_f32(a, b, sr);
+}
+
+// CHECK-LABEL: @test_cvt_sr_pk_f16_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT:    [[SR_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:    [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
+// CHECK-NEXT:    [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT:    [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK-NEXT:    [[SR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SR_ADDR]] to ptr
+// CHECK-NEXT:    store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store float [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    store float [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    store i32 [[SR:%.*]], ptr [[SR_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[B_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[SR_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = call <2 x half> @llvm.amdgcn.cvt.sr.pk.f16.f32(float [[TMP0]], float [[TMP1]], i32 [[TMP2]])
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store <2 x half> [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
+// CHECK-NEXT:    ret void
+//
+void test_cvt_sr_pk_f16_f32(global half2* out, float a, float b, uint sr)
+{
+  *out = __builtin_amdgcn_cvt_sr_pk_f16_f32(a, b, sr);
+}
+
 // CHECK-LABEL: @test_cvt_f16_fp8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
@@ -370,6 +437,243 @@ void test_cvt_pk_f16_bf8(global half2* out, short a)
   out[0] = __builtin_amdgcn_cvt_pk_f16_bf8(a);
 }
 
+// CHECK-LABEL: @test_cvt_pk_bf8_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
+// CHECK-NEXT:    [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
+// CHECK-NEXT:    [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT:    store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store <2 x half> [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <2 x half>, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = call i16 @llvm.amdgcn.cvt.pk.bf8.f16(<2 x half> [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i16 [[TMP1]], ptr addrspace(1) [[TMP2]], align 2
+// CHECK-NEXT:    ret void
+//
+void test_cvt_pk_bf8_f16(global short* out, half2 a)
+{
+  *out = __builtin_amdgcn_cvt_pk_bf8_f16(a);
+}
+
+// CHECK-LABEL: @test_cvt_pk_fp8_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <2 x half>, align 4, addrspace(5)
+// CHECK-NEXT:    [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
+// CHECK-NEXT:    [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT:    store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store <2 x half> [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <2 x half>, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = call i16 @llvm.amdgcn.cvt.pk.fp8.f16(<2 x half> [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i16 [[TMP1]], ptr addrspace(1) [[TMP2]], align 2
+// CHECK-NEXT:    ret void
+//
+void test_cvt_pk_fp8_f16(global short* out, half2 a)
+{
+  *out = __builtin_amdgcn_cvt_pk_fp8_f16(a);
+}
+
+// CHECK-LABEL: @test_cvt_sr_bf8_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca half, align 2, addrspace(5)
+// CHECK-NEXT:    [[SR_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:    [[OLD_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:    [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
+// CHECK-NEXT:    [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT:    [[SR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SR_ADDR]] to ptr
+// CHECK-NEXT:    [[OLD_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OLD_ADDR]] to ptr
+// CHECK-NEXT:    store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store half [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 2
+// CHECK-NEXT:    store i32 [[SR:%.*]], ptr [[SR_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    store i32 [[OLD:%.*]], ptr [[OLD_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[SR_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
+// CHECK-NEXT:    [[TMP5:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2
+// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[SR_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half [[TMP5]], i32 [[TMP6]], i32 [[TMP7]], i32 1)
+// CHECK-NEXT:    [[TMP9:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i32 [[TMP8]], ptr addrspace(1) [[TMP9]], align 4
+// CHECK-NEXT:    [[TMP10:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2
+// CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[SR_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP13:%.*]] = call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half [[TMP10]], i32 [[TMP11]], i32 [[TMP12]], i32 2)
+// CHECK-NEXT:    [[TMP14:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i32 [[TMP13]], ptr addrspace(1) [[TMP14]], align 4
+// CHECK-NEXT:    [[TMP15:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2
+// CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[SR_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP18:%.*]] = call i32 @llvm.amdgcn.cvt.sr.bf8.f16(half [[TMP15]], i32 [[TMP16]], i32 [[TMP17]], i32 3)
+// CHECK-NEXT:    [[TMP19:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i32 [[TMP18]], ptr addrspace(1) [[TMP19]], align 4
+// CHECK-NEXT:    ret void
+//
+void test_cvt_sr_bf8_f16(global int* out, half a, uint sr, int old)
+{
+  *out = __builtin_amdgcn_cvt_sr_bf8_f16(a, sr, old, 0);
+  *out = __builtin_amdgcn_cvt_sr_bf8_f16(a, sr, old, 1);
+  *out = __builtin_amdgcn_cvt_sr_bf8_f16(a, sr, old, 2);
+  *out = __builtin_amdgcn_cvt_sr_bf8_f16(a, sr, old, 3);
+}
+
+// CHECK-LABEL: @test_cvt_sr_fp8_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca half, align 2, addrspace(5)
+// CHECK-NEXT:    [[SR_ADDR:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-NEXT:    [[OLD_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:    [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
+// CHECK-NEXT:    [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT:    [[SR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SR_ADDR]] to ptr
+// CHECK-NEXT:    [[OLD_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OLD_ADDR]] to ptr
+// CHECK-NEXT:    store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store half [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 2
+// CHECK-NEXT:    store i16 [[SR:%.*]], ptr [[SR_ADDR_ASCAST]], align 2
+// CHECK-NEXT:    store i32 [[OLD:%.*]], ptr [[OLD_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2
+// CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[SR_ADDR_ASCAST]], align 2
+// CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP1]] to i32
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half [[TMP0]], i32 [[CONV]], i32 [[TMP2]], i32 0)
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
+// CHECK-NEXT:    [[TMP5:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2
+// CHECK-NEXT:    [[TMP6:%.*]] = load i16, ptr [[SR_ADDR_ASCAST]], align 2
+// CHECK-NEXT:    [[CONV1:%.*]] = sext i16 [[TMP6]] to i32
+// CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP8:%.*]] = call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half [[TMP5]], i32 [[CONV1]], i32 [[TMP7]], i32 1)
+// CHECK-NEXT:    [[TMP9:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i32 [[TMP8]], ptr addrspace(1) [[TMP9]], align 4
+// CHECK-NEXT:    [[TMP10:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2
+// CHECK-NEXT:    [[TMP11:%.*]] = load i16, ptr [[SR_ADDR_ASCAST]], align 2
+// CHECK-NEXT:    [[CONV2:%.*]] = sext i16 [[TMP11]] to i32
+// CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP13:%.*]] = call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half [[TMP10]], i32 [[CONV2]], i32 [[TMP12]], i32 2)
+// CHECK-NEXT:    [[TMP14:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i32 [[TMP13]], ptr addrspace(1) [[TMP14]], align 4
+// CHECK-NEXT:    [[TMP15:%.*]] = load half, ptr [[A_ADDR_ASCAST]], align 2
+// CHECK-NEXT:    [[TMP16:%.*]] = load i16, ptr [[SR_ADDR_ASCAST]], align 2
+// CHECK-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP16]] to i32
+// CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP18:%.*]] = call i32 @llvm.amdgcn.cvt.sr.fp8.f16(half [[TMP15]], i32 [[CONV3]], i32 [[TMP17]], i32 3)
+// CHECK-NEXT:    [[TMP19:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i32 [[TMP18]], ptr addrspace(1) [[TMP19]], align 4
+// CHECK-NEXT:    ret void
+//
+void test_cvt_sr_fp8_f16(global int* out, half a, short sr, int old)
+{
+  *out = __builtin_amdgcn_cvt_sr_fp8_f16(a, sr, old, 0);
+  *out = __builtin_amdgcn_cvt_sr_fp8_f16(a, sr, old, 1);
+  *out = __builtin_amdgcn_cvt_sr_fp8_f16(a, sr, old, 2);
+  *out = __builtin_amdgcn_cvt_sr_fp8_f16(a, sr, old, 3);
+}
+
+// CHECK-LABEL: @test_cvt_scale_pk(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[OUTH8_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT:    [[OUTY8_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT:    [[SRC2_ADDR:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
+// CHECK-NEXT:    [[OUTF32_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT:    [[OUTF8_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT:    [[OUTH16_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT:    [[OUTY16_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT:    [[OUTF16_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT:    [[SRC3_ADDR:%.*]] = alloca <3 x i32>, align 16, addrspace(5)
+// CHECK-NEXT:    [[SRC1_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:    [[SCALE_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:    [[OUTH8_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUTH8_ADDR]] to ptr
+// CHECK-NEXT:    [[OUTY8_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUTY8_ADDR]] to ptr
+// CHECK-NEXT:    [[SRC2_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC2_ADDR]] to ptr
+// CHECK-NEXT:    [[OUTF32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUTF32_ADDR]] to ptr
+// CHECK-NEXT:    [[OUTF8_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUTF8_ADDR]] to ptr
+// CHECK-NEXT:    [[OUTH16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUTH16_ADDR]] to ptr
+// CHECK-NEXT:    [[OUTY16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUTY16_ADDR]] to ptr
+// CHECK-NEXT:    [[OUTF16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUTF16_ADDR]] to ptr
+// CHECK-NEXT:    [[SRC3_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC3_ADDR]] to ptr
+// CHECK-NEXT:    [[SRC1_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC1_ADDR]] to ptr
+// CHECK-NEXT:    [[SCALE_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SCALE_ADDR]] to ptr
+// CHECK-NEXT:    store ptr addrspace(1) [[OUTH8:%.*]], ptr [[OUTH8_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store ptr addrspace(1) [[OUTY8:%.*]], ptr [[OUTY8_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store <2 x i32> [[SRC2:%.*]], ptr [[SRC2_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store ptr addrspace(1) [[OUTF32:%.*]], ptr [[OUTF32_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store ptr addrspace(1) [[OUTF8:%.*]], ptr [[OUTF8_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store ptr addrspace(1) [[OUTH16:%.*]], ptr [[OUTH16_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store ptr addrspace(1) [[OUTY16:%.*]], ptr [[OUTY16_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store ptr addrspace(1) [[OUTF16:%.*]], ptr [[OUTF16_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store <3 x i32> [[SRC3:%.*]], ptr [[SRC3_ADDR_ASCAST]], align 16
+// CHECK-NEXT:    store i32 [[SRC1:%.*]], ptr [[SRC1_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    store i32 [[SCALE:%.*]], ptr [[SCALE_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[SRC2_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[SCALE_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.amdgcn.cvt.scale.pk8.f16.fp8(<2 x i32> [[TMP0]], i32 [[TMP1]], i32 4)
+// CHECK-NEXT:    [[TMP3:%.*]] = load ptr addrspace(1), ptr [[OUTH8_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store <8 x half> [[TMP2]], ptr addrspace(1) [[TMP3]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i32>, ptr [[SRC2_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[SCALE_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP6:%.*]] = call <8 x bfloat> @llvm.amdgcn.cvt.scale.pk8.bf16.fp8(<2 x i32> [[TMP4]], i32 [[TMP5]], i32 5)
+// CHECK-NEXT:    [[TMP7:%.*]] = load ptr addrspace(1), ptr [[OUTY8_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store <8 x bfloat> [[TMP6]], ptr addrspace(1) [[TMP7]], align 16
+// CHECK-NEXT:    [[TMP8:%.*]] = load <2 x i32>, ptr [[SRC2_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[SCALE_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP10:%.*]] = call <8 x half> @llvm.amdgcn.cvt.scale.pk8.f16.bf8(<2 x i32> [[TMP8]], i32 [[TMP9]], i32 6)
+// CHECK-NEXT:    [[TMP11:%.*]] = load ptr addrspace(1), ptr [[OUTH8_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store <8 x half> [[TMP10]], ptr addrspace(1) [[TMP11]], align 16
+// CHECK-NEXT:    [[TMP12:%.*]] = load <2 x i32>, ptr [[SRC2_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[SCALE_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP14:%.*]] = call <8 x bfloat> @llvm.amdgcn.cvt.scale.pk8.bf16.bf8(<2 x i32> [[TMP12]], i32 [[TMP13]], i32 7)
+// CHECK-NEXT:    [[TMP15:%.*]] = load ptr addrspace(1), ptr [[OUTY8_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store <8 x bfloat> [[TMP14]], ptr addrspace(1) [[TMP15]], align 16
+// CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[SRC1_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr [[SCALE_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP18:%.*]] = call <8 x half> @llvm.amdgcn.cvt.scale.pk8.f16.fp4(i32 [[TMP16]], i32 [[TMP17]], i32 1)
+// CHECK-NEXT:    [[TMP19:%.*]] = load ptr addrspace(1), ptr [[OUTH8_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store <8 x half> [[TMP18]], ptr addrspace(1) [[TMP19]], align 16
+// CHECK-NEXT:    [[TMP20:%.*]] = load i32, ptr [[SRC1_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[SCALE_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP22:%.*]] = call <8 x bfloat> @llvm.amdgcn.cvt.scale.pk8.bf16.fp4(i32 [[TMP20]], i32 [[TMP21]], i32 2)
+// CHECK-NEXT:    [[TMP23:%.*]] = load ptr addrspace(1), ptr [[OUTY8_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store <8 x bfloat> [[TMP22]], ptr addrspace(1) [[TMP23]], align 16
+// CHECK-NEXT:    [[TMP24:%.*]] = load <2 x i32>, ptr [[SRC2_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP25:%.*]] = load i32, ptr [[SCALE_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP26:%.*]] = call <8 x float> @llvm.amdgcn.cvt.scale.pk8.f32.fp8(<2 x i32> [[TMP24]], i32 [[TMP25]], i32 5)
+// CHECK-NEXT:    [[TMP27:%.*]] = load ptr addrspace(1), ptr [[OUTF8_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store <8 x float> [[TMP26]], ptr addrspace(1) [[TMP27]], align 32
+// CHECK-NEXT:    [[TMP28:%.*]] = load <2 x i32>, ptr [[SRC2_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP29:%.*]] = load i32, ptr [[SCALE_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP30:%.*]] = call <8 x float> @llvm.amdgcn.cvt.scale.pk8.f32.bf8(<2 x i32> [[TMP28]], i32 [[TMP29]], i32 6)
+// CHECK-NEXT:    [[TMP31:%.*]] = load ptr addrspace(1), ptr [[OUTF8_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store <8 x float> [[TMP30]], ptr addrspace(1) [[TMP31]], align 32
+// CHECK-NEXT:    [[TMP32:%.*]] = load i32, ptr [[SRC1_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[SCALE_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP34:%.*]] = call <8 x float> @llvm.amdgcn.cvt.scale.pk8.f32.fp4(i32 [[TMP32]], i32 [[TMP33]], i32 7)
+// CHECK-NEXT:    [[TMP35:%.*]] = load ptr addrspace(1), ptr [[OUTF8_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store <8 x float> [[TMP34]], ptr addrspace(1) [[TMP35]], align 32
+// CHECK-NEXT:    ret void
+//
+void test_cvt_scale_pk(global half8 *outh8, global bfloat8 *outy8, uint2 src2,
+                       global float32 *outf32, global float8 *outf8,
+                       global half16 *outh16, global bfloat16 *outy16,
+                       global float16 *outf16, uint3 src3,
+                       uint src1, uint scale)
+{
+  *outh8 = __builtin_amdgcn_cvt_scale_pk8_f16_fp8(src2, scale, 4);
+  *outy8 = __builtin_amdgcn_cvt_scale_pk8_bf16_fp8(src2, scale, 5);
+  *outh8 = __builtin_amdgcn_cvt_scale_pk8_f16_bf8(src2, scale, 6);
+  *outy8 = __builtin_amdgcn_cvt_scale_pk8_bf16_bf8(src2, scale, 7);
+  *outh8 = __builtin_amdgcn_cvt_scale_pk8_f16_fp4(src1, scale, 1);
+  *outy8 = __builtin_amdgcn_cvt_scale_pk8_bf16_fp4(src1, scale, 2);
+  *outf8 = __builtin_amdgcn_cvt_scale_pk8_f32_fp8(src2, scale, 5);
+  *outf8 = __builtin_amdgcn_cvt_scale_pk8_f32_bf8(src2, scale, 6);
+  *outf8 = __builtin_amdgcn_cvt_scale_pk8_f32_fp4(src1, scale, 7);
+}
+
 // CHECK-LABEL: @test_sat_pk4_i4_i8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -459,6 +763,60 @@ void test_prefetch(generic void *fptr, global void *gptr) {
   __builtin_amdgcn_global_prefetch(gptr, 8);
 }
 
+// CHECK-LABEL: @test_cvt_pk_fp8_f32_e5m3(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT:    [[OLD_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT:    [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
+// CHECK-NEXT:    [[OLD_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OLD_ADDR]] to ptr
+// CHECK-NEXT:    [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT:    [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK-NEXT:    store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i32 [[OLD:%.*]], ptr [[OLD_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    store float [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    store float [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[B_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.amdgcn.cvt.pk.fp8.f32.e5m3(float [[TMP0]], float [[TMP1]], i32 [[TMP2]], i1 true)
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
+// CHECK-NEXT:    ret void
+//
+void test_cvt_pk_fp8_f32_e5m3(global int* out, int old, float a, float b)
+{
+  *out = __builtin_amdgcn_cvt_pk_fp8_f32_e5m3(a, b, old, true);
+}
+
+// CHECK-LABEL: @test_cvt_sr_fp8_f32_e5m3(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT:    [[OLD_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:    [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
+// CHECK-NEXT:    [[OLD_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OLD_ADDR]] to ptr
+// CHECK-NEXT:    [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT:    [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK-NEXT:    store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i32 [[OLD:%.*]], ptr [[OLD_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    store float [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    store i32 [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[OLD_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.amdgcn.cvt.sr.fp8.f32.e5m3(float [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 3)
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
+// CHECK-NEXT:    ret void
+//
+void test_cvt_sr_fp8_f32_e5m3(global int* out, int old, float a, int b)
+{
+  *out = __builtin_amdgcn_cvt_sr_fp8_f32_e5m3(a, b, old, 3);
+}
+
 // CHECK-LABEL: @test_cvt_f32_fp8_e5m3(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
diff --git a/clang/test/Driver/aarch64-toolchain.c b/clang/test/Driver/aarch64-toolchain.c
index cfad4b8..512b5a8 100644
--- a/clang/test/Driver/aarch64-toolchain.c
+++ b/clang/test/Driver/aarch64-toolchain.c
@@ -11,7 +11,7 @@
 // LLD-AARCH64-BAREMETAL: "-isysroot" "{{.*}}Inputs/basic_aarch64_gcc_tree/aarch64-none-elf"
 // LLD-AARCH64-BAREMETAL: "-internal-isystem" "{{.*}}Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include"
 // LLD-AARCH64-BAREMETAL: "{{.*}}/Inputs/lld/ld.lld"
-// LLD-AARCH64-BAREMETAL: "-Bstatic" "-m" "aarch64linux" "-EL"
+// LLD-AARCH64-BAREMETAL: "-Bstatic" "-m" "aarch64elf" "-EL"
 // LLD-AARCH64-BAREMETAL: "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/lib/crt0.o"
 // LLD-AARCH64-BAREMETAL: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtbegin.o"
 // LLD-AARCH64-BAREMETAL: "-L{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1"
@@ -30,7 +30,7 @@
 // C-AARCH64-BAREMETAL: "-internal-isystem" "{{.*}}Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include"
 // C-AARCH64-BAREMETAL: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../bin/aarch64-none-elf-ld"
 // C-AARCH64-BAREMETAL: "--sysroot={{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf"
-// C-AARCH64-BAREMETAL: "-Bstatic" "-m" "aarch64linux" "-EL"
+// C-AARCH64-BAREMETAL: "-Bstatic" "-m" "aarch64elf" "-EL"
 // C-AARCH64-BAREMETAL: "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/lib/crt0.o"
 // C-AARCH64-BAREMETAL: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtbegin.o"
 // C-AARCH64-BAREMETAL: "-L{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1"
@@ -47,7 +47,7 @@
 // C-AARCH64-BAREMETAL-NOSYSROOT: "-cc1" "-triple" "aarch64-unknown-none-elf"
 // C-AARCH64-BAREMETAL-NOSYSROOT: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../aarch64-none-elf/include"
 // C-AARCH64-BAREMETAL-NOSYSROOT: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../bin/aarch64-none-elf-ld"
-// C-AARCH64-BAREMETAL-NOSYSROOT: "-Bstatic" "-m" "aarch64linux" "-EL"
+// C-AARCH64-BAREMETAL-NOSYSROOT: "-Bstatic" "-m" "aarch64elf" "-EL"
 // C-AARCH64-BAREMETAL-NOSYSROOT: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../aarch64-none-elf/lib/crt0.o"
 // C-AARCH64-BAREMETAL-NOSYSROOT: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtbegin.o"
 // C-AARCH64-BAREMETAL-NOSYSROOT: "-L{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1"
@@ -67,7 +67,7 @@
 // CXX-AARCH64-BAREMETAL: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include"
 // CXX-AARCH64-BAREMETAL: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../bin/aarch64-none-elf-ld"
 // CXX-AARCH64-BAREMETAL: "--sysroot={{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf"
-// CXX-AARCH64-BAREMETAL: "-Bstatic" "-m" "aarch64linux" "-EL"
+// CXX-AARCH64-BAREMETAL: "-Bstatic" "-m" "aarch64elf" "-EL"
 // CXX-AARCH64-BAREMETAL: "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/lib/crt0.o"
 // CXX-AARCH64-BAREMETAL: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtbegin.o"
 // CXX-AARCH64-BAREMETAL: "-L{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1"
@@ -86,7 +86,7 @@
 // CXX-AARCH64-BAREMETAL-NOSYSROOT: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../aarch64-none-elf/include/c++/8.2.1"
 // CXX-AARCH64-BAREMETAL-NOSYSROOT: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../aarch64-none-elf/include"
 // CXX-AARCH64-BAREMETAL-NOSYSROOT: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../bin/aarch64-none-elf-ld"
-// CXX-AARCH64-BAREMETAL-NOSYSROOT: "-Bstatic" "-m" "aarch64linux" "-EL"
+// CXX-AARCH64-BAREMETAL-NOSYSROOT: "-Bstatic" "-m" "aarch64elf" "-EL"
 // CXX-AARCH64-BAREMETAL-NOSYSROOT: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../aarch64-none-elf/lib/crt0.o"
 // CXX-AARCH64-BAREMETAL-NOSYSROOT: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtbegin.o"
 // CXX-AARCH64-BAREMETAL-NOSYSROOT: "-L{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1"
@@ -105,7 +105,7 @@
 // CXX-AARCH64-BAREMETAL-LIBCXX: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/include"
 // CXX-AARCH64-BAREMETAL-LIBCXX: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../bin/aarch64-none-elf-ld"
 // CXX-AARCH64-BAREMETAL-LIBCXX: "--sysroot={{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf"
-// CXX-AARCH64-BAREMETAL-LIBCXX: "-Bstatic" "-m" "aarch64linux" "-EL"
+// CXX-AARCH64-BAREMETAL-LIBCXX: "-Bstatic" "-m" "aarch64elf" "-EL"
 // CXX-AARCH64-BAREMETAL-LIBCXX: "{{.*}}/Inputs/basic_aarch64_gcc_tree/aarch64-none-elf/lib/crt0.o"
 // CXX-AARCH64-BAREMETAL-LIBCXX: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtbegin.o"
 // CXX-AARCH64-BAREMETAL-LIBCXX: "-L{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1"
@@ -122,7 +122,7 @@
 // CXX-AARCH64-BAREMETAL-NOSYSROOT-LIBCXX: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../aarch64-none-elf/include/c++/v1"
 // CXX-AARCH64-BAREMETAL-NOSYSROOT-LIBCXX: "-internal-isystem" "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../aarch64-none-elf/include"
 // CXX-AARCH64-BAREMETAL-NOSYSROOT-LIBCXX: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../bin/aarch64-none-elf-ld"
-// CXX-AARCH64-BAREMETAL-NOSYSROOT-LIBCXX: "-Bstatic" "-m" "aarch64linux" "-EL"
+// CXX-AARCH64-BAREMETAL-NOSYSROOT-LIBCXX: "-Bstatic" "-m" "aarch64elf" "-EL"
 // CXX-AARCH64-BAREMETAL-NOSYSROOT-LIBCXX: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/../../../../aarch64-none-elf/lib/crt0.o"
 // CXX-AARCH64-BAREMETAL-NOSYSROOT-LIBCXX: "{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1/crtbegin.o"
 // CXX-AARCH64-BAREMETAL-NOSYSROOT-LIBCXX: "-L{{.*}}/Inputs/basic_aarch64_gcc_tree/lib/gcc/aarch64-none-elf/8.2.1"
diff --git a/clang/test/Driver/arm-aarch64-multilib-invalid-arch.c b/clang/test/Driver/arm-aarch64-multilib-invalid-arch.c
new file mode 100644
index 0000000..2ef27ad
--- /dev/null
+++ b/clang/test/Driver/arm-aarch64-multilib-invalid-arch.c
@@ -0,0 +1,2 @@
+// RUN: not %clang -multi-lib-config=%S/Inputs/multilib/empty.yaml --target=arm-none-eabi -march=invalid
+// RUN: not %clang -multi-lib-config=%S/Inputs/multilib/empty.yaml --target=aarch64-none-elf -march=invalid
diff --git a/clang/test/Driver/arm-toolchain.c b/clang/test/Driver/arm-toolchain.c
index c367594..9005992 100644
--- a/clang/test/Driver/arm-toolchain.c
+++ b/clang/test/Driver/arm-toolchain.c
@@ -10,7 +10,7 @@
 // LLD-ARM-BAREMETAL: "-isysroot" "{{.*}}Inputs/basic_arm_gcc_tree/armv6m-none-eabi"
 // LLD-ARM-BAREMETAL: "-internal-isystem" "{{.*}}Inputs/basic_arm_gcc_tree/armv6m-none-eabi/include"
 // LLD-ARM-BAREMETAL: "{{.*}}/Inputs/lld/ld.lld"
-// LLD-ARM-BAREMETAL: "-Bstatic" "-m" "armelf_linux_eabi" "-EL"
+// LLD-ARM-BAREMETAL: "-Bstatic" "-m" "armelf" "-EL"
 // LLD-ARM-BAREMETAL: "{{.*}}/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/lib/crt0.o"
 // LLD-ARM-BAREMETAL: "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/crtbegin.o"
 // LLD-ARM-BAREMETAL: "-L{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1"
@@ -29,7 +29,7 @@
 // C-ARM-BAREMETAL: "-internal-isystem" "{{.*}}Inputs/basic_arm_gcc_tree/armv6m-none-eabi/include"
 // C-ARM-BAREMETAL: "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../bin/armv6m-none-eabi-ld"
 // C-ARM-BAREMETAL: "--sysroot={{.*}}/Inputs/basic_arm_gcc_tree/armv6m-none-eabi"
-// C-ARM-BAREMETAL: "-Bstatic" "-m" "armelf_linux_eabi" "-EL"
+// C-ARM-BAREMETAL: "-Bstatic" "-m" "armelf" "-EL"
 // C-ARM-BAREMETAL: "{{.*}}/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/lib/crt0.o"
 // C-ARM-BAREMETAL: "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/crtbegin.o"
 // C-ARM-BAREMETAL: "-L{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1"
@@ -46,7 +46,7 @@
 // C-ARM-BAREMETAL-NOSYSROOT: "-cc1" "-triple" "thumbv6m-unknown-none-eabi"
 // C-ARM-BAREMETAL-NOSYSROOT: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../armv6m-none-eabi/include"
 // C-ARM-BAREMETAL-NOSYSROOT: "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../bin/armv6m-none-eabi-ld"
-// C-ARM-BAREMETAL-NOSYSROOT: "-Bstatic" "-m" "armelf_linux_eabi" "-EL"
+// C-ARM-BAREMETAL-NOSYSROOT: "-Bstatic" "-m" "armelf" "-EL"
 // C-ARM-BAREMETAL-NOSYSROOT: "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../armv6m-none-eabi/lib/crt0.o"
 // C-ARM-BAREMETAL-NOSYSROOT: "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/crtbegin.o"
 // C-ARM-BAREMETAL-NOSYSROOT: "-L{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1"
@@ -67,7 +67,7 @@
 // CXX-ARM-BAREMETAL: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/include"
 // CXX-ARM-BAREMETAL: "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../bin/armv6m-none-eabi-ld"
 // CXX-ARM-BAREMETAL: "--sysroot={{.*}}/Inputs/basic_arm_gcc_tree/armv6m-none-eabi"
-// CXX-ARM-BAREMETAL: "-Bstatic" "-m" "armelf_linux_eabi" "-EL"
+// CXX-ARM-BAREMETAL: "-Bstatic" "-m" "armelf" "-EL"
 // CXX-ARM-BAREMETAL: "{{.*}}/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/lib/crt0.o"
 // CXX-ARM-BAREMETAL: "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/crtbegin.o"
 // CXX-ARM-BAREMETAL: "-L{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1"
@@ -87,7 +87,7 @@
 // CXX-ARM-BAREMETAL-NOSYSROOT: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../armv6m-none-eabi/include/c++/8.2.1"
 // CXX-ARM-BAREMETAL-NOSYSROOT: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../armv6m-none-eabi/include"
 // CXX-ARM-BAREMETAL-NOSYSROOT: "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../bin/armv6m-none-eabi-ld"
-// CXX-ARM-BAREMETAL-NOSYSROOT: "-Bstatic" "-m" "armelf_linux_eabi" "-EL"
+// CXX-ARM-BAREMETAL-NOSYSROOT: "-Bstatic" "-m" "armelf" "-EL"
 // CXX-ARM-BAREMETAL-NOSYSROOT: "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../armv6m-none-eabi/lib/crt0.o"
 // CXX-ARM-BAREMETAL-NOSYSROOT: "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/crtbegin.o"
 // CXX-ARM-BAREMETAL-NOSYSROOT: "-L{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1"
@@ -106,7 +106,7 @@
 // CXX-ARM-BAREMETAL-LIBCXX: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/include"
 // CXX-ARM-BAREMETAL-LIBCXX: "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../bin/armv6m-none-eabi-ld"
 // CXX-ARM-BAREMETAL-LIBCXX: "--sysroot={{.*}}/Inputs/basic_arm_gcc_tree/armv6m-none-eabi"
-// CXX-ARM-BAREMETAL-LIBCXX: "-Bstatic" "-m" "armelf_linux_eabi" "-EL"
+// CXX-ARM-BAREMETAL-LIBCXX: "-Bstatic" "-m" "armelf" "-EL"
 // CXX-ARM-BAREMETAL-LIBCXX: "{{.*}}/Inputs/basic_arm_gcc_tree/armv6m-none-eabi/lib/crt0.o"
 // CXX-ARM-BAREMETAL-LIBCXX: "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/crtbegin.o"
 // CXX-ARM-BAREMETAL-LIBCXX: "-L{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1"
@@ -123,7 +123,7 @@
 // CXX-ARM-BAREMETAL-NOSYSROOT-LIBCXX: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../armv6m-none-eabi/include/c++/v1"
 // CXX-ARM-BAREMETAL-NOSYSROOT-LIBCXX: "-internal-isystem" "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../armv6m-none-eabi/include"
 // CXX-ARM-BAREMETAL-NOSYSROOT-LIBCXX: "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../bin/armv6m-none-eabi-ld"
-// CXX-ARM-BAREMETAL-NOSYSROOT-LIBCXX: "-Bstatic" "-m" "armelf_linux_eabi" "-EL"
+// CXX-ARM-BAREMETAL-NOSYSROOT-LIBCXX: "-Bstatic" "-m" "armelf" "-EL"
 // CXX-ARM-BAREMETAL-NOSYSROOT-LIBCXX: "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/../../../../armv6m-none-eabi/lib/crt0.o"
 // CXX-ARM-BAREMETAL-NOSYSROOT-LIBCXX: "{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1/crtbegin.o"
 // CXX-ARM-BAREMETAL-NOSYSROOT-LIBCXX: "-L{{.*}}/Inputs/basic_arm_gcc_tree/lib/gcc/armv6m-none-eabi/8.2.1"
diff --git a/clang/test/Driver/baremetal.cpp b/clang/test/Driver/baremetal.cpp
index adb59e1..26f030d 100644
--- a/clang/test/Driver/baremetal.cpp
+++ b/clang/test/Driver/baremetal.cpp
@@ -17,7 +17,7 @@
 // CHECK-V6M-C-SAME: "-x" "c++" "{{.*}}baremetal.cpp"
 // CHECK-V6M-C-NEXT: ld{{(.exe)?}}"
 // CHECK-V6M-C-SAME: "--sysroot={{.*}}{{[/\\]+}}Inputs{{[/\\]+}}baremetal_arm"
-// CHECK-V6M-C-SAME: "-Bstatic" "-m" "armelf_linux_eabi" "-EL"
+// CHECK-V6M-C-SAME: "-Bstatic" "-m" "armelf" "-EL"
 // CHECK-V6M-C-SAME: "[[SYSROOT:[^"]+]]{{[/\\]+}}lib{{[/\\]+}}crt0.o"
 // CHECK-V6M-C-SAME: "-T" "semihosted.lds" "-Lsome{{[/\\]+}}directory{{[/\\]+}}user{{[/\\]+}}asked{{[/\\]+}}for"
 // CHECK-V6M-C-SAME: "-L[[SYSROOT:[^"]+]]{{[/\\]+}}lib"
@@ -43,7 +43,7 @@
 // CHECK-V6M-TREE-SAME: "-internal-isystem" "[[INSTALLED_DIR]]{{[/\\]+}}..{{[/\\]+}}include{{[/\\]+}}armv6m-unknown-none-eabi"
 // CHECK-V6M-TREE-SAME: "-x" "c++" "{{.*}}baremetal.cpp"
 // CHECK-V6M-TREE-NEXT: ld{{(.exe)?}}"
-// CHECK-V6M-TREE-SAME: "-Bstatic" "-m" "armelf_linux_eabi" "-EL"
+// CHECK-V6M-TREE-SAME: "-Bstatic" "-m" "armelf" "-EL"
 // CHECK-V6M-TREE-SAME: "[[INSTALLED_DIR]]{{[/\\]+}}..{{[/\\]+}}lib{{[/\\]+}}armv6m-unknown-none-eabi{{[/\\]+}}crt0.o"
 // CHECK-V6M-TREE-SAME: "-L[[INSTALLED_DIR]]{{[/\\]+}}..{{[/\\]+}}lib{{[/\\]+}}armv6m-unknown-none-eabi"
 // CHECK-V6M-TREE-SAME "{{.*}}.o"
@@ -60,7 +60,7 @@
 // CHECK-ARMV7M-PER-TARGET: "-x" "c++" "{{.*}}baremetal.cpp"
 // CHECK-ARMV7M-PER-TARGET: ld{{(.exe)?}}"
 // CHECK-ARMV7M-PER-TARGET: "--sysroot={{.*}}{{[/\\]+}}Inputs{{[/\\]+}}baremetal_arm"
-// CHECK-ARMV7M-PER-TARGET: "-Bstatic" "-m" "armelf_linux_eabi" "-EL"
+// CHECK-ARMV7M-PER-TARGET: "-Bstatic" "-m" "armelf" "-EL"
 // CHECK-ARMV7M-PER_TARGET: "[[SYSROOT:[^"]+]]{{[/\\]+}}lib{{[/\\]+}}crt0.o"
 // CHECK-ARMV7M-PER-TARGET: "-L[[SYSROOT:[^"]+]]{{[/\\]+}}lib"
 // CHECK-ARMV7M-PER-TARGET: "-L[[RESOURCE_DIR:[^"]+]]{{[/\\]+}}lib{{[/\\]+}}armv7m-vendor-none-eabi
@@ -73,7 +73,7 @@
 // CHECK-V6M-DEFAULTCXX: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // CHECK-V6M-DEFAULTCXX: ld{{(.exe)?}}"
 // CHECK-V6M-DEFAULTCXX: "--sysroot={{.*}}{{[/\\]+}}Inputs{{[/\\]+}}baremetal_arm"
-// CHECK-V6M-DEFAULTCXX: "-Bstatic" "-m" "armelf_linux_eabi" "-EL"
+// CHECK-V6M-DEFAULTCXX: "-Bstatic" "-m" "armelf" "-EL"
 // CHECK-V6M-DEFAULTCXX-SAME: "[[SYSROOT:[^"]+]]{{[/\\]+}}lib{{[/\\]+}}crt0.o"
 // CHECK-V6M-DEFAULTCXX-SAME: "-L{{[^"]*}}{{[/\\]+}}Inputs{{[/\\]+}}baremetal_arm{{[/\\]+}}lib"
 // CHECK-V6M-DEFAULTCXX-SAME: "{{.*}}.o"
@@ -90,7 +90,7 @@
 // CHECK-V6M-LIBCXX-SAME: "-internal-isystem" "{{[^"]+}}{{[/\\]+}}include{{[/\\]+}}c++{{[/\\]+}}v1"
 // CHECK-V6M-LIBCXX: ld{{(.exe)?}}"
 // CHECK-V6M-LIBCXX-SAME: "--sysroot={{.*}}{{[/\\]+}}Inputs{{[/\\]+}}baremetal_arm"
-// CHECK-V6M-LIBCXX-SAME: "-Bstatic" "-m" "armelf_linux_eabi" "-EL"
+// CHECK-V6M-LIBCXX-SAME: "-Bstatic" "-m" "armelf" "-EL"
 // CHECK-V6M-LIBCXX-SAME: "-L{{[^"]*}}{{[/\\]+}}Inputs{{[/\\]+}}baremetal_arm{{[/\\]+}}lib"
 // CHECK-V6M-LIBCXX-SAME: "{{.*}}.o"
 // CHECK-V6M-LIBCXX-SAME: "-lc++"
@@ -108,7 +108,7 @@
 // CHECK-V6M-LIBSTDCXX-SAME: "-internal-isystem" "{{[^"]+}}{{[/\\]+}}include{{[/\\]+}}c++{{[/\\]+}}6.0.0"
 // CHECK-V6M-LIBSTDCXX: ld{{(.exe)?}}"
 // CHECK-V6M-LIBSTDCXX-SAME: "--sysroot={{.*}}{{[/\\]+}}Inputs{{[/\\]+}}baremetal_arm"
-// CHECK-V6M-LIBSTDCXX-SAME: "-Bstatic" "-m" "armelf_linux_eabi" "-EL"
+// CHECK-V6M-LIBSTDCXX-SAME: "-Bstatic" "-m" "armelf" "-EL"
 // CHECK-V6M-LIBSTDCXX-SAME: "-L{{[^"]*}}{{[/\\]+}}Inputs{{[/\\]+}}baremetal_arm{{[/\\]+}}lib"
 // CHECK-V6M-LIBSTDCXX-SAME: "{{.*}}.o"
 // CHECK-V6M-LIBSTDCXX-SAME: "-lstdc++" "-lm"
@@ -123,7 +123,7 @@
 // CHECK-V6M-NDL: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]"
 // CHECK-V6M-NDL: ld{{(.exe)?}}"
 // CHECK-V6M-NDL: "--sysroot={{.*}}{{[/\\]+}}Inputs{{[/\\]+}}baremetal_arm"
-// CHECK-V6M-NDL: "-Bstatic" "-m" "armelf_linux_eabi" "-EL"
+// CHECK-V6M-NDL: "-Bstatic" "-m" "armelf" "-EL"
 // CHECK-V6M-NDL-SAME: "-L{{[^"]*}}{{[/\\]+}}Inputs{{[/\\]+}}baremetal_arm{{[/\\]+}}lib"
 
 // RUN: rm -rf %T/baremetal_cxx_sysroot
@@ -163,6 +163,16 @@
 // RUN:   | FileCheck %s --check-prefix=CHECK-RTLIB-GCC
 // CHECK-RTLIB-GCC: -lgcc
 
+// RUN: %clang -### --target=arm-none-eabi -nolibc -rtlib=compiler-rt %s 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-NOLIBC
+// CHECK-NOLIBC-NOT: "-lc"
+// CHECK-NOLIBC: "{{[^"]*}}libclang_rt.builtins.a"
+
+// RUN: %clang -### --target=arm-none-eabi -nostdlib -rtlib=compiler-rt %s 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-NOSTDLIB
+// CHECK-NOSTDLIB-NOT: "-lc"
+// CHECK-NOSTDLIB-NOT: "{{[^"]*}}libclang_rt.builtins.a"
+
 // RUN: %clang -### --target=arm-none-eabi -v %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=CHECK-SYSROOT-INC
 // CHECK-SYSROOT-INC-NOT: "-internal-isystem" "include"
@@ -171,7 +181,7 @@
 // RUN:   | FileCheck --check-prefix=CHECK-ARMV7EB %s
 // CHECK-ARMV7EB: "{{.*}}ld{{(.exe)?}}"
 // CHECK-ARMV7EB: "--sysroot={{.*}}{{[/\\]+}}Inputs{{[/\\]+}}baremetal_arm"
-// CHECK-ARMV7EB: "-Bstatic" "-m" "armelfb_linux_eabi" "--be8" "-EB"
+// CHECK-ARMV7EB: "-Bstatic" "-m" "armelfb" "--be8" "-EB"
 
 // RUN: %clang -### %s --target=armv7-none-eabi -mbig-endian --sysroot=%S/Inputs/baremetal_arm 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-ARMV7EB %s
@@ -183,7 +193,7 @@
 // RUN:   | FileCheck --check-prefix=CHECK-ARMV7EL %s
 // CHECK-ARMV7EL: "{{.*}}ld{{(.exe)?}}"
 // CHECK-ARMV7EL: "--sysroot={{.*}}{{[/\\]+}}Inputs{{[/\\]+}}baremetal_arm"
-// CHECK-ARMV7EL: "-Bstatic" "-m" "armelf_linux_eabi" "-EL"
+// CHECK-ARMV7EL: "-Bstatic" "-m" "armelf" "-EL"
 // CHECK-ARMV7EL-NOT: "--be8"
 
 // RUN: %clang -### %s --target=armebv7-none-eabi -mlittle-endian --sysroot=%S/Inputs/baremetal_arm 2>&1 \
@@ -196,7 +206,7 @@
 // RUN:   | FileCheck --check-prefix=CHECK-AARCH64BE %s
 // CHECK-AARCH64BE: "{{.*}}ld{{(.exe)?}}"
 // CHECK-AARCH64BE: sysroot={{.*}}{{[/\\]+}}Inputs{{[/\\]+}}baremetal_arm"
-// CHECK-AARCH64BE: "-Bstatic" "-m" "aarch64linuxb" "-EB"
+// CHECK-AARCH64BE: "-Bstatic" "-m" "aarch64elfb" "-EB"
 // CHECK-AARCH64BE-NOT: "--be8"
 
 // RUN: %clang -### %s --target=aarch64-none-elf -mbig-endian --sysroot=%S/Inputs/baremetal_arm 2>&1 \
@@ -209,7 +219,7 @@
 // RUN:   | FileCheck --check-prefix=CHECK-AARCH64LE %s
 // CHECK-AARCH64LE: "{{.*}}ld{{(.exe)?}}"
 // CHECK-AARCH64LE: "--sysroot={{.*}}{{[/\\]+}}Inputs{{[/\\]+}}baremetal_arm"
-// CHECK-AARCH64LE: "-Bstatic" "-m" "aarch64linux" "-EL"
+// CHECK-AARCH64LE: "-Bstatic" "-m" "aarch64elf" "-EL"
 // CHECK-AARCH64LE-NOT: "--be8"
 
 // RUN: %clang -### %s --target=aarch64_be-none-elf -mlittle-endian --sysroot=%S/Inputs/baremetal_arm 2>&1 \
diff --git a/clang/test/Driver/compilation-dir.c b/clang/test/Driver/compilation-dir.c
index dbe801c..70a117b 100644
--- a/clang/test/Driver/compilation-dir.c
+++ b/clang/test/Driver/compilation-dir.c
@@ -8,3 +8,8 @@
 // RUN: %clang -### -integrated-as -ffile-compilation-dir=. -x assembler %s 2>&1 | FileCheck -check-prefixes=CHECK-DEBUG-COMPILATION-DIR %s
 // CHECK-DEBUG-COMPILATION-DIR: "-fdebug-compilation-dir=."
 // CHECK-DEBUG-COMPILATION-DIR-NOT: "-ffile-compilation-dir=."
+
+// RUN: %clang -### -S %s -working-directory %S 2>&1 | FileCheck -check-prefix=CHECK-CWD %s
+// RUN: cd %S
+// RUN: %clang -### -S %s 2>&1 | FileCheck -check-prefix=CHECK-CWD %s
+// CHECK-CWD: -fdebug-compilation-dir={{.*}}Driver
diff --git a/clang/test/Driver/hip-dependent-options.hip b/clang/test/Driver/hip-dependent-options.hip
index b0dc7f2..e65fec2 100644
--- a/clang/test/Driver/hip-dependent-options.hip
+++ b/clang/test/Driver/hip-dependent-options.hip
@@ -4,6 +4,12 @@
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
 // RUN:   %S/Inputs/hip_multiple_inputs/b.hip --gpu-bundle-output \
 // RUN: 2>&1 | FileCheck -check-prefixes=RELOCRDC %s
+// RUN: not %clang -### --target=x86_64-linux-gnu --offload-new-driver \
+// RUN:   -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
+// RUN:   -c -fhip-emit-relocatable -nogpuinc -nogpulib --cuda-device-only -fgpu-rdc \
+// RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
+// RUN:   %S/Inputs/hip_multiple_inputs/b.hip --gpu-bundle-output \
+// RUN: 2>&1 | FileCheck -check-prefixes=RELOCRDC %s
 
 // RELOCRDC: error: option '-fhip-emit-relocatable' cannot be specified with '-fgpu-rdc'
 
@@ -13,5 +19,11 @@
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
 // RUN:   %S/Inputs/hip_multiple_inputs/b.hip --gpu-bundle-output \
 // RUN: 2>&1 | FileCheck -check-prefixes=RELOCHOST %s
+// RUN: not %clang -### --target=x86_64-linux-gnu --offload-new-driver \
+// RUN:   -x hip --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
+// RUN:   -c -fhip-emit-relocatable -nogpuinc -nogpulib \
+// RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
+// RUN:   %S/Inputs/hip_multiple_inputs/b.hip --gpu-bundle-output \
+// RUN: 2>&1 | FileCheck -check-prefixes=RELOCHOST %s
 
-// RELOCHOST: error: option '-fhip-emit-relocatable' cannot be specified without '--cuda-device-only'
+// RELOCHOST: error: option '-fhip-emit-relocatable' cannot be specified without '--offload-device-only'
diff --git a/clang/test/Driver/hip-phases.hip b/clang/test/Driver/hip-phases.hip
index 0ad5d76..6bac97a 100644
--- a/clang/test/Driver/hip-phases.hip
+++ b/clang/test/Driver/hip-phases.hip
@@ -275,10 +275,16 @@
 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases --no-offload-new-driver \
 // RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only -fhip-emit-relocatable 2>&1 \
 // RUN: | FileCheck -check-prefixes=RELOC %s
+// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases --offload-new-driver \
+// RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only -fhip-emit-relocatable 2>&1 \
+// RUN: | FileCheck -check-prefixes=RELOC %s
 //
 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases --no-offload-new-driver \
 // RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only -fhip-emit-relocatable -Wl,--disable-new-dtags \
 // RUN: 2>&1 | FileCheck -check-prefixes=RELOC %s
+// RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases --offload-new-driver \
+// RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only -fhip-emit-relocatable -Wl,--disable-new-dtags \
+// RUN: 2>&1 | FileCheck -check-prefixes=RELOC %s
 //
 // RELOC-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
 // RELOC-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
@@ -286,7 +292,7 @@
 // RELOC-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
 // RELOC-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]])
 // RELOC-NOT: linker
-// RELOC-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P4]]}, object
+// RELOC-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P4]]}
 // RELOC-NOT: host
 
 //
diff --git a/clang/test/Driver/linker-wrapper-libs.c b/clang/test/Driver/linker-wrapper-libs.c
index cb5c7c1..1404fe3 100644
--- a/clang/test/Driver/linker-wrapper-libs.c
+++ b/clang/test/Driver/linker-wrapper-libs.c
@@ -48,8 +48,8 @@ int bar() { return weak; }
 // RUN:   --linker-path=/usr/bin/ld %t.a %t.o -o a.out 2>&1 \
 // RUN: | FileCheck %s --check-prefix=LIBRARY-RESOLVES
 
-// LIBRARY-RESOLVES: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
-// LIBRARY-RESOLVES: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o
+// LIBRARY-RESOLVES: clang{{.*}} -o {{.*}}.img -dumpdir {{.*}}.img. --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
+// LIBRARY-RESOLVES: clang{{.*}} -o {{.*}}.img -dumpdir {{.*}}.img. --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o
 
 //
 // Check that we extract a static library that defines a global visibile to the
@@ -72,8 +72,8 @@ int bar() { return weak; }
 // RUN:   --linker-path=/usr/bin/ld %t.a %t.o -o a.out 2>&1 \
 // RUN: | FileCheck %s --check-prefix=LIBRARY-GLOBAL
 
-// LIBRARY-GLOBAL: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
-// LIBRARY-GLOBAL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o
+// LIBRARY-GLOBAL: clang{{.*}} -o {{.*}}.img -dumpdir {{.*}}.img. --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
+// LIBRARY-GLOBAL: clang{{.*}} -o {{.*}}.img -dumpdir {{.*}}.img. --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o
 
 //
 // Check that we do not extract a global symbol if the source file was not
@@ -95,8 +95,8 @@ int bar() { return weak; }
 // RUN:   --linker-path=/usr/bin/ld %t.o %t.a -o a.out 2>&1 \
 // RUN: | FileCheck %s --check-prefix=LIBRARY-GLOBAL-NONE
 
-// LIBRARY-GLOBAL-NONE-NOT: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o
-// LIBRARY-GLOBAL-NONE-NOT: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
+// LIBRARY-GLOBAL-NONE-NOT: clang{{.*}} -o {{.*}}.img -dumpdir {{.*}}.img. --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o
+// LIBRARY-GLOBAL-NONE-NOT: clang{{.*}} -o {{.*}}.img -dumpdir {{.*}}.img. --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
 
 //
 // Check that we do not extract an external weak symbol.
@@ -116,9 +116,9 @@ int bar() { return weak; }
 // RUN:   --linker-path=/usr/bin/ld %t.o %t.a -o a.out 2>&1 \
 // RUN: | FileCheck %s --check-prefix=LIBRARY-WEAK
 
-// LIBRARY-WEAK: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70
+// LIBRARY-WEAK: clang{{.*}} -o {{.*}}.img -dumpdir {{.*}}.img. --target=nvptx64-nvidia-cuda -march=sm_70
 // LIBRARY-WEAK-NOT: {{.*}}.o {{.*}}.o
-// LIBRARY-WEAK: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030
+// LIBRARY-WEAK: clang{{.*}} -o {{.*}}.img -dumpdir {{.*}}.img. --target=amdgcn-amd-amdhsa -mcpu=gfx1030
 
 //
 // Check that we do not extract an unneeded hidden symbol.
@@ -138,9 +138,9 @@ int bar() { return weak; }
 // RUN:   --linker-path=/usr/bin/ld %t.o %t.a -o a.out 2>&1 \
 // RUN: | FileCheck %s --check-prefix=LIBRARY-HIDDEN
 
-// LIBRARY-HIDDEN: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70
+// LIBRARY-HIDDEN: clang{{.*}} -o {{.*}}.img -dumpdir {{.*}}.img. --target=nvptx64-nvidia-cuda -march=sm_70
 // LIBRARY-HIDDEN-NOT: {{.*}}.o {{.*}}.o
-// LIBRARY-HIDDEN: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030
+// LIBRARY-HIDDEN: clang{{.*}} -o {{.*}}.img -dumpdir {{.*}}.img. --target=amdgcn-amd-amdhsa -mcpu=gfx1030
 
 //
 // Check that we do not extract a static library that defines a global visibile
@@ -161,9 +161,9 @@ int bar() { return weak; }
 // RUN:   --linker-path=/usr/bin/ld %t.o %t.a %t.a -o a.out 2>&1 \
 // RUN: | FileCheck %s --check-prefix=LIBRARY-GLOBAL-DEFINED
 
-// LIBRARY-GLOBAL-DEFINED: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
+// LIBRARY-GLOBAL-DEFINED: clang{{.*}} -o {{.*}}.img -dumpdir {{.*}}.img. --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
 // LIBRARY-GLOBAL-DEFINED-NOT: {{.*}}gfx1030{{.*}}.o
-// LIBRARY-GLOBAL-DEFINED: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o
+// LIBRARY-GLOBAL-DEFINED: clang{{.*}} -o {{.*}}.img -dumpdir {{.*}}.img. --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o
 
 //
 // Check that we can use --[no-]whole-archive to control extraction.
@@ -185,7 +185,7 @@ int bar() { return weak; }
 // RUN:   --linker-path=/usr/bin/ld %t.o --whole-archive %t.a -o a.out 2>&1 \
 // RUN: | FileCheck %s --check-prefix=LIBRARY-WHOLE-ARCHIVE
 
-// LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
-// LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o
-// LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_52 {{.*}}.o
-// LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a {{.*}}.o
+// LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img -dumpdir {{.*}}.img. --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
+// LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img -dumpdir {{.*}}.img. --target=amdgcn-amd-amdhsa -mcpu=gfx1030 {{.*}}.o {{.*}}.o
+// LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img -dumpdir {{.*}}.img. --target=nvptx64-nvidia-cuda -march=sm_52 {{.*}}.o
+// LIBRARY-WHOLE-ARCHIVE: clang{{.*}} -o {{.*}}.img -dumpdir {{.*}}.img. --target=amdgcn-amd-amdhsa -mcpu=gfx90a {{.*}}.o
diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c
index 80b1a57..e73fa5c 100644
--- a/clang/test/Driver/linker-wrapper.c
+++ b/clang/test/Driver/linker-wrapper.c
@@ -22,7 +22,7 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
 // RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=NVPTX-LINK
 
-// NVPTX-LINK: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
+// NVPTX-LINK: clang{{.*}} -o {{.*}}.img -dumpdir a.out.nvptx64.sm_70.img. --target=nvptx64-nvidia-cuda -march=sm_70 {{.*}}.o {{.*}}.o
 
 // RUN: clang-offload-packager -o %t.out \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70 \
@@ -40,7 +40,7 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
 // RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=AMDGPU-LINK
 
-// AMDGPU-LINK: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
+// AMDGPU-LINK: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx908.img. --target=amdgcn-amd-amdhsa -mcpu=gfx908 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
 
 // RUN: clang-offload-packager -o %t.out \
 // RUN:   --image=file=%t.amdgpu.bc,kind=openmp,triple=amdgcn-amd-amdhsa,arch=gfx1030 \
@@ -57,7 +57,7 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN: not clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
 // RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=SPIRV-LINK
 
-// SPIRV-LINK: clang{{.*}} -o {{.*}}.img --target=spirv64-unknown-unknown {{.*}}.o --sycl-link -Xlinker -triple=spirv64-unknown-unknown -Xlinker -arch=
+// SPIRV-LINK: clang{{.*}} -o {{.*}}.img -dumpdir a.out.spirv64..img. --target=spirv64-unknown-unknown {{.*}}.o --sycl-link -Xlinker -triple=spirv64-unknown-unknown -Xlinker -arch=
 
 // RUN: clang-offload-packager -o %t.out \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu \
@@ -68,7 +68,7 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN:   --linker-path=/usr/bin/ld.lld --whole-archive %t.a --no-whole-archive \
 // RUN:   %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CPU-LINK
 
-// CPU-LINK: clang{{.*}} -o {{.*}}.img --target=x86_64-unknown-linux-gnu -Wl,--no-undefined {{.*}}.o {{.*}}.o -Wl,-Bsymbolic -shared -Wl,--whole-archive {{.*}}.a -Wl,--no-whole-archive
+// CPU-LINK: clang{{.*}} -o {{.*}}.img -dumpdir a.out.x86_64..img. --target=x86_64-unknown-linux-gnu -Wl,--no-undefined {{.*}}.o {{.*}}.o -Wl,-Bsymbolic -shared -Wl,--whole-archive {{.*}}.a -Wl,--no-whole-archive
 
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -o %t.o
 // RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu -mllvm -openmp-opt-disable \
@@ -100,8 +100,8 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu \
 // RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CUDA
 
-// CUDA: clang{{.*}} -o [[IMG_SM70:.+]] --target=nvptx64-nvidia-cuda -march=sm_70
-// CUDA: clang{{.*}} -o [[IMG_SM52:.+]] --target=nvptx64-nvidia-cuda -march=sm_52
+// CUDA: clang{{.*}} -o [[IMG_SM70:.+]] -dumpdir a.out.nvptx64.sm_70.img. --target=nvptx64-nvidia-cuda -march=sm_70
+// CUDA: clang{{.*}} -o [[IMG_SM52:.+]] -dumpdir a.out.nvptx64.sm_52.img. --target=nvptx64-nvidia-cuda -march=sm_52
 // CUDA: fatbinary{{.*}}-64 --create {{.*}}.fatbin --image=profile=sm_70,file=[[IMG_SM70]] --image=profile=sm_52,file=[[IMG_SM52]]
 // CUDA: usr/bin/ld{{.*}} {{.*}}.openmp.image.{{.*}}.o {{.*}}.cuda.image.{{.*}}.o
 
@@ -127,8 +127,8 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN:   --compress --compression-level=6 \
 // RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=HIP
 
-// HIP: clang{{.*}} -o [[IMG_GFX90A:.+]] --target=amdgcn-amd-amdhsa -mcpu=gfx90a
-// HIP: clang{{.*}} -o [[IMG_GFX908:.+]] --target=amdgcn-amd-amdhsa -mcpu=gfx908
+// HIP: clang{{.*}} -o [[IMG_GFX90A:.+]] -dumpdir a.out.amdgcn.gfx90a.img. --target=amdgcn-amd-amdhsa -mcpu=gfx90a
+// HIP: clang{{.*}} -o [[IMG_GFX908:.+]] -dumpdir a.out.amdgcn.gfx908.img. --target=amdgcn-amd-amdhsa -mcpu=gfx908
 // HIP: clang-offload-bundler{{.*}}-type=o -bundle-align=4096 -compress -compression-level=6 -targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa--gfx90a,hip-amdgcn-amd-amdhsa--gfx908 -input={{/dev/null|NUL}} -input=[[IMG_GFX90A]] -input=[[IMG_GFX908]] -output={{.*}}.hipfb
 
 // RUN: clang-offload-packager -o %t.out \
@@ -157,7 +157,7 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run --clang-backend \
 // RUN:   --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CLANG-BACKEND
 
-// CLANG-BACKEND: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -flto -Wl,--no-undefined {{.*}}.o
+// CLANG-BACKEND: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx908.img. --target=amdgcn-amd-amdhsa -mcpu=gfx908 -flto -Wl,--no-undefined {{.*}}.o
 
 // RUN: clang-offload-packager -o %t.out \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=nvptx64-nvidia-cuda,arch=sm_70
@@ -180,8 +180,8 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
 // RUN:   --linker-path=/usr/bin/ld %t-on.o %t-off.o %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=AMD-TARGET-ID
 
-// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+ -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
-// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack- -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
+// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx90a:xnack+.img. --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack+ -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
+// AMD-TARGET-ID: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx90a:xnack-.img. --target=amdgcn-amd-amdhsa -mcpu=gfx90a:xnack- -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
 
 // RUN: clang-offload-packager -o %t-lib.out \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=amdgcn-amd-amdhsa,arch=generic
@@ -196,8 +196,8 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN: clang-linker-wrapper --host-triple=x86_64-unknown-linux-gnu --dry-run \
 // RUN:   --linker-path=/usr/bin/ld %t1.o %t2.o %t.a -o a.out 2>&1 | FileCheck %s --check-prefix=ARCH-ALL
 
-// ARCH-ALL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx90a -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
-// ARCH-ALL: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa -mcpu=gfx908 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
+// ARCH-ALL: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx90a.img. --target=amdgcn-amd-amdhsa -mcpu=gfx90a -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
+// ARCH-ALL: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx908.img. --target=amdgcn-amd-amdhsa -mcpu=gfx908 -flto -Wl,--no-undefined {{.*}}.o {{.*}}.o
 
 // RUN: clang-offload-packager -o %t.out \
 // RUN:   --image=file=%t.elf.o,kind=openmp,triple=x86_64-unknown-linux-gnu \
@@ -207,7 +207,7 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN:   --linker-path=/usr/bin/ld.lld -r %t.o \
 // RUN:   %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=RELOCATABLE-LINK
 
-// RELOCATABLE-LINK: clang{{.*}} -o {{.*}}.img --target=x86_64-unknown-linux-gnu
+// RELOCATABLE-LINK: clang{{.*}} -o {{.*}}.img -dumpdir a.out.x86_64..img. --target=x86_64-unknown-linux-gnu
 // RELOCATABLE-LINK: /usr/bin/ld.lld{{.*}}-r
 // RELOCATABLE-LINK: llvm-objcopy{{.*}}a.out --remove-section .llvm.offloading
 
@@ -219,7 +219,7 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN:   --linker-path=/usr/bin/ld.lld -r %t.o \
 // RUN:   %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=RELOCATABLE-LINK-HIP
 
-// RELOCATABLE-LINK-HIP: clang{{.*}} -o {{.*}}.img --target=amdgcn-amd-amdhsa
+// RELOCATABLE-LINK-HIP: clang{{.*}} -o {{.*}}.img -dumpdir a.out.amdgcn.gfx90a.img. --target=amdgcn-amd-amdhsa
 // RELOCATABLE-LINK-HIP: clang-offload-bundler{{.*}} -type=o -bundle-align=4096 -targets=host-x86_64-unknown-linux-gnu,hip-amdgcn-amd-amdhsa--gfx90a -input={{/dev/null|NUL}} -input={{.*}} -output={{.*}}
 // RELOCATABLE-LINK-HIP: /usr/bin/ld.lld{{.*}}-r
 // RELOCATABLE-LINK-HIP: llvm-objcopy{{.*}}a.out --remove-section .llvm.offloading
@@ -233,7 +233,7 @@ __attribute__((visibility("protected"), used)) int x;
 // RUN:   --linker-path=/usr/bin/ld.lld -r %t.o \
 // RUN:   %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=RELOCATABLE-LINK-CUDA
 
-// RELOCATABLE-LINK-CUDA: clang{{.*}} -o {{.*}}.img --target=nvptx64-nvidia-cuda
+// RELOCATABLE-LINK-CUDA: clang{{.*}} -o {{.*}}.img -dumpdir a.out.nvptx64.sm_89.img. --target=nvptx64-nvidia-cuda
 // RELOCATABLE-LINK-CUDA: fatbinary{{.*}} -64 --create {{.*}}.fatbin --image=profile=sm_89,file={{.*}}.img
 // RELOCATABLE-LINK-CUDA: /usr/bin/ld.lld{{.*}}-r
 // RELOCATABLE-LINK-CUDA: llvm-objcopy{{.*}}a.out --remove-section .llvm.offloading
diff --git a/clang/test/Driver/lto-dwo.c b/clang/test/Driver/lto-dwo.c
index 206d4cb..5072765 100644
--- a/clang/test/Driver/lto-dwo.c
+++ b/clang/test/Driver/lto-dwo.c
@@ -1,9 +1,25 @@
 // Confirm that -gsplit-dwarf=DIR is passed to linker
 
-// RUN: %clang --target=x86_64-unknown-linux -### %s -flto=thin -gsplit-dwarf -o a.out 2> %t
+// DEFINE: %{RUN-ELF} = %clang --target=x86_64-unknown-linux -### %s \
+// DEFINE:              -flto=thin -gsplit-dwarf
+
+// RUN: %{RUN-ELF} -o a.out 2> %t
 // RUN: FileCheck -check-prefix=CHECK-LINK-ELF-DWO-DIR-DEFAULT < %t %s
 // RUN: %clang_cl --target=x86_64-unknown-windows-msvc -### -fuse-ld=lld -flto -gsplit-dwarf -o a.out -- %s 2> %t
 // RUN: FileCheck -check-prefix=CHECK-LINK-COFF-DWO-DIR-DEFAULT < %t %s
 //
-// CHECK-LINK-ELF-DWO-DIR-DEFAULT:  "-plugin-opt=dwo_dir=a.out_dwo"
+// CHECK-LINK-ELF-DWO-DIR-DEFAULT:  "-plugin-opt=dwo_dir=a.out-dwo"
 // CHECK-LINK-COFF-DWO-DIR-DEFAULT: "/dwodir:a.out_dwo"
+
+// Check -dumpdir effect on -gsplit-dwarf.
+//
+// DEFINE: %{RUN-DUMPDIR} = %{RUN-ELF} -dumpdir /dir/file.ext
+//
+// RUN: %{RUN-ELF} 2>&1 | FileCheck %s -check-prefix=CHECK-NO-O
+// RUN: %{RUN-ELF} -o FOO 2>&1 | FileCheck %s -check-prefix=CHECK-O
+// RUN: %{RUN-DUMPDIR} 2>&1 | FileCheck %s -check-prefix=CHECK-DUMPDIR
+// RUN: %{RUN-DUMPDIR} -o FOO 2>&1 | FileCheck %s -check-prefix=CHECK-DUMPDIR
+//
+//    CHECK-NO-O: "-plugin-opt=dwo_dir=a-dwo"
+//       CHECK-O: "-plugin-opt=dwo_dir=FOO-dwo"
+// CHECK-DUMPDIR: "-plugin-opt=dwo_dir=/dir/file.extdwo"
diff --git a/clang/test/Driver/mingw-msvcrt.c b/clang/test/Driver/mingw-msvcrt.c
index 340ce1f..e164863 100644
--- a/clang/test/Driver/mingw-msvcrt.c
+++ b/clang/test/Driver/mingw-msvcrt.c
@@ -7,10 +7,10 @@
 // CHECK_DEFAULT: "-lmingwex" "-lmsvcrt" "-ladvapi32"
 // CHECK_DEFAULT-SAME: "-lmsvcrt" "-lkernel32" "{{.*}}crtend.o"
 // CHECK_MSVCR120: "-lmsvcr120"
-// CHECK_MSVCR120-SAME: "-lmingwex" "-ladvapi32"
+// CHECK_MSVCR120-SAME: "-lmingwex" "-lmsvcr120" "-ladvapi32"
 // CHECK_UCRTBASE: "-lucrtbase"
-// CHECK_UCRTBASE-SAME: "-lmingwex" "-ladvapi32"
+// CHECK_UCRTBASE-SAME: "-lmingwex" "-lucrtbase" "-ladvapi32"
 // CHECK_UCRT: "-lucrt"
-// CHECK_UCRT-SAME: "-lmingwex" "-ladvapi32"
+// CHECK_UCRT-SAME: "-lmingwex" "-lucrt" "-ladvapi32"
 // CHECK_CRTDLL: "-lcrtdll"
-// CHECK_CRTDLL-SAME: "-lmingwex" "-ladvapi32"
+// CHECK_CRTDLL-SAME: "-lmingwex" "-lcrtdll" "-ladvapi32"
diff --git a/clang/test/Driver/opt-record.c b/clang/test/Driver/opt-record.c
index 220f5db..86d00d5 100644
--- a/clang/test/Driver/opt-record.c
+++ b/clang/test/Driver/opt-record.c
@@ -58,12 +58,12 @@
 // CHECK-NOPASS-NOT: "-plugin-opt=opt-remarks-format=yaml"
 // CHECK-NOPASS-NOT: "-plugin-opt=opt-remarks-hotness-threshold=100"
 
-// CHECK-PASS-A:      "-plugin-opt=opt-remarks-filename=a.out.opt.ld.yaml"
+// CHECK-PASS-A:      "-plugin-opt=opt-remarks-filename=a-opt.ld.yaml"
 // CHECK-PASS-A-SAME: "-plugin-opt=opt-remarks-passes=inline"
 // CHECK-PASS-A-SAME: "-plugin-opt=opt-remarks-format=yaml"
 // CHECK-PASS-A-SAME: "-plugin-opt=opt-remarks-hotness-threshold=100"
 
-// CHECK-PASS:      "-plugin-opt=opt-remarks-filename=FOO.opt.ld.yaml"
+// CHECK-PASS:      "-plugin-opt=opt-remarks-filename=FOO-opt.ld.yaml"
 // CHECK-PASS-SAME: "-plugin-opt=opt-remarks-passes=inline"
 // CHECK-PASS-SAME: "-plugin-opt=opt-remarks-format=yaml"
 // CHECK-PASS-SAME: "-plugin-opt=opt-remarks-hotness-threshold=100"
@@ -78,3 +78,17 @@
 // CHECK-PASS-RPASS-SAME: "-plugin-opt=opt-remarks-hotness-threshold=100"
 
 // CHECK-PASS-AUTO:   "-plugin-opt=opt-remarks-hotness-threshold=auto"
+
+// Check -dumpdir effect on -foptimization-record-file.
+//
+// DEFINE: %{RUN-DUMPDIR} = \
+// DEFINE:   %clang --target=x86_64-linux -### -fuse-ld=lld -B%S/Inputs/lld \
+// DEFINE:       -flto -fsave-optimization-record -dumpdir /dir/file.ext %s
+//
+// RUN: %{RUN-DUMPDIR} 2>&1 | FileCheck %s -check-prefix=CHECK-DUMPDIR
+// RUN: %{RUN-DUMPDIR} -o FOO 2>&1 | FileCheck %s -check-prefix=CHECK-DUMPDIR
+// RUN: %{RUN-DUMPDIR} -foptimization-record-file=user-file.ext 2>&1 | \
+// RUN:   FileCheck %s -check-prefix=CHECK-DUMPDIR-IGNORE
+//
+//        CHECK-DUMPDIR: "-plugin-opt=opt-remarks-filename=/dir/file.extopt.ld.yaml"
+// CHECK-DUMPDIR-IGNORE: "-plugin-opt=opt-remarks-filename=user-file.ext.opt.ld.yaml"
diff --git a/clang/test/Driver/wasm-features.c b/clang/test/Driver/wasm-features.c
index 746bd7b..f0215ec 100644
--- a/clang/test/Driver/wasm-features.c
+++ b/clang/test/Driver/wasm-features.c
@@ -41,6 +41,12 @@
 // HALF-PRECISION: "-target-feature" "+fp16"
 // NO-HALF-PRECISION: "-target-feature" "-fp16"
 
+// RUN: %clang --target=wasm32-unknown-unknown -### %s -mgc 2>&1 | FileCheck %s -check-prefix=GC
+// RUN: %clang --target=wasm32-unknown-unknown -### %s -mno-gc 2>&1 | FileCheck %s -check-prefix=NO-GC
+
+// GC: "-target-feature" "+gc"
+// NO-GC: "-target-feature" "-gc"
+
 // RUN: %clang --target=wasm32-unknown-unknown -### %s -mmultimemory 2>&1 | FileCheck %s -check-prefix=MULTIMEMORY
 // RUN: %clang --target=wasm32-unknown-unknown -### %s -mno-multimemory 2>&1 | FileCheck %s -check-prefix=NO-MULTIMEMORY
 
diff --git a/clang/test/Frontend/dump-minimization-hints.cpp b/clang/test/Frontend/dump-minimization-hints.cpp
index 4843786..273fd7f 100644
--- a/clang/test/Frontend/dump-minimization-hints.cpp
+++ b/clang/test/Frontend/dump-minimization-hints.cpp
@@ -39,6 +39,26 @@
 // RANGE-NEXT:            "line": 15,
 // RANGE-NEXT:            "column": 2
 // RANGE-NEXT:          }
+// RANGE-NEXT:        },
+// RANGE-NEXT:        {
+// RANGE-NEXT:          "from": {
+// RANGE-NEXT:            "line": 19,
+// RANGE-NEXT:            "column": 1
+// RANGE-NEXT:          },
+// RANGE-NEXT:          "to": {
+// RANGE-NEXT:            "line": 19,
+// RANGE-NEXT:            "column": 41
+// RANGE-NEXT:          }
+// RANGE-NEXT:        },
+// RANGE-NEXT:        {
+// RANGE-NEXT:          "from": {
+// RANGE-NEXT:            "line": 20,
+// RANGE-NEXT:            "column": 1
+// RANGE-NEXT:          },
+// RANGE-NEXT:          "to": {
+// RANGE-NEXT:            "line": 23,
+// RANGE-NEXT:            "column": 2
+// RANGE-NEXT:          }
 // RANGE-NEXT:        }
 // RANGE-NEXT:      ]
 // RANGE-NEXT:    }
@@ -68,6 +88,16 @@ int multiply(int a, int b) {
     return a * b;
 }
 
+inline int unused_by_foo() {} // line 17
+
+inline void recursively_used_by_foo() {} // line 19
+inline int used_by_foo() { // line 20
+  recursively_used_by_foo();
+  return 1;
+}
+
+struct UnusedByFoo {};
+
 //--- foo.cpp
 #include "foo.h"
 int global_value = 5;
@@ -76,4 +106,6 @@ int main() {
   int current_value = data.getValue();
   int doubled_value = multiply(current_value, 2);
   int final_result = doubled_value + global_value;
+
+  return used_by_foo();
 }
diff --git a/clang/test/Headers/__clang_hip_math.hip b/clang/test/Headers/__clang_hip_math.hip
index 11c9cd3..81c5f43 100644
--- a/clang/test/Headers/__clang_hip_math.hip
+++ b/clang/test/Headers/__clang_hip_math.hip
@@ -49,30 +49,27 @@ typedef unsigned long long uint64_t;
 
 // CHECK-LABEL: @test___make_mantissa_base8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[WHILE_COND_I:%.*]]
-// CHECK:       while.cond.i:
-// CHECK-NEXT:    [[__TAGP_ADDR_0_I:%.*]] = phi ptr [ [[P:%.*]], [[ENTRY:%.*]] ], [ [[__TAGP_ADDR_1_I:%.*]], [[CLEANUP_I:%.*]] ]
-// CHECK-NEXT:    [[__R_0_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[__R_1_I:%.*]], [[CLEANUP_I]] ]
-// CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I]], align 1, !tbaa [[TBAA4:![0-9]+]]
-// CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP0]], 0
-// CHECK-NEXT:    br i1 [[CMP_NOT_I]], label [[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[P:%.*]], align 1, !tbaa [[TBAA4:![0-9]+]]
+// CHECK-NEXT:    [[CMP_NOT_I1:%.*]] = icmp eq i8 [[TMP0]], 0
+// CHECK-NEXT:    br i1 [[CMP_NOT_I1]], label [[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
 // CHECK:       while.body.i:
-// CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[TMP0]], -8
-// CHECK-NEXT:    [[OR_COND_I:%.*]] = icmp eq i8 [[TMP1]], 48
-// CHECK-NEXT:    br i1 [[OR_COND_I]], label [[IF_THEN_I:%.*]], label [[CLEANUP_I]]
+// CHECK-NEXT:    [[TMP1:%.*]] = phi i8 [ [[TMP3:%.*]], [[IF_THEN_I:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
+// CHECK-NEXT:    [[__R_0_I3:%.*]] = phi i64 [ [[SUB_I:%.*]], [[IF_THEN_I]] ], [ 0, [[ENTRY]] ]
+// CHECK-NEXT:    [[__TAGP_ADDR_0_I2:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[IF_THEN_I]] ], [ [[P]], [[ENTRY]] ]
+// CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[TMP1]], -8
+// CHECK-NEXT:    [[OR_COND_I:%.*]] = icmp eq i8 [[TMP2]], 48
+// CHECK-NEXT:    br i1 [[OR_COND_I]], label [[IF_THEN_I]], label [[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT]]
 // CHECK:       if.then.i:
-// CHECK-NEXT:    [[MUL_I:%.*]] = shl i64 [[__R_0_I]], 3
-// CHECK-NEXT:    [[CONV5_I:%.*]] = zext nneg i8 [[TMP0]] to i64
+// CHECK-NEXT:    [[MUL_I:%.*]] = shl i64 [[__R_0_I3]], 3
+// CHECK-NEXT:    [[CONV5_I:%.*]] = zext nneg i8 [[TMP1]] to i64
 // CHECK-NEXT:    [[ADD_I:%.*]] = add i64 [[MUL_I]], -48
-// CHECK-NEXT:    [[SUB_I:%.*]] = add i64 [[ADD_I]], [[CONV5_I]]
-// CHECK-NEXT:    [[INCDEC_PTR_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I]], i64 1
-// CHECK-NEXT:    br label [[CLEANUP_I]]
-// CHECK:       cleanup.i:
-// CHECK-NEXT:    [[__TAGP_ADDR_1_I]] = phi ptr [ [[INCDEC_PTR_I]], [[IF_THEN_I]] ], [ [[__TAGP_ADDR_0_I]], [[WHILE_BODY_I]] ]
-// CHECK-NEXT:    [[__R_1_I]] = phi i64 [ [[SUB_I]], [[IF_THEN_I]] ], [ [[__R_0_I]], [[WHILE_BODY_I]] ]
-// CHECK-NEXT:    br i1 [[OR_COND_I]], label [[WHILE_COND_I]], label [[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT]], !llvm.loop [[LOOP7:![0-9]+]]
+// CHECK-NEXT:    [[SUB_I]] = add i64 [[ADD_I]], [[CONV5_I]]
+// CHECK-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I2]], i64 1
+// CHECK-NEXT:    [[TMP3]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP3]], 0
+// CHECK-NEXT:    br i1 [[CMP_NOT_I]], label [[_ZL21__MAKE_MANTISSA_BASE8PKC_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP7:![0-9]+]]
 // CHECK:       _ZL21__make_mantissa_base8PKc.exit:
-// CHECK-NEXT:    [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[CLEANUP_I]] ], [ [[__R_0_I]], [[WHILE_COND_I]] ]
+// CHECK-NEXT:    [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ 0, [[WHILE_BODY_I]] ], [ [[SUB_I]], [[IF_THEN_I]] ]
 // CHECK-NEXT:    ret i64 [[RETVAL_2_I]]
 //
 // AMDGCNSPIRV-LABEL: @test___make_mantissa_base8(
@@ -105,30 +102,27 @@ extern "C" __device__ uint64_t test___make_mantissa_base8(const char *p) {
 
 // CHECK-LABEL: @test___make_mantissa_base10(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[WHILE_COND_I:%.*]]
-// CHECK:       while.cond.i:
-// CHECK-NEXT:    [[__TAGP_ADDR_0_I:%.*]] = phi ptr [ [[P:%.*]], [[ENTRY:%.*]] ], [ [[__TAGP_ADDR_1_I:%.*]], [[CLEANUP_I:%.*]] ]
-// CHECK-NEXT:    [[__R_0_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[__R_1_I:%.*]], [[CLEANUP_I]] ]
-// CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I]], align 1, !tbaa [[TBAA4]]
-// CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP0]], 0
-// CHECK-NEXT:    br i1 [[CMP_NOT_I]], label [[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[P:%.*]], align 1, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[CMP_NOT_I1:%.*]] = icmp eq i8 [[TMP0]], 0
+// CHECK-NEXT:    br i1 [[CMP_NOT_I1]], label [[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
 // CHECK:       while.body.i:
-// CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[TMP0]], -48
-// CHECK-NEXT:    [[OR_COND_I:%.*]] = icmp ult i8 [[TMP1]], 10
-// CHECK-NEXT:    br i1 [[OR_COND_I]], label [[IF_THEN_I:%.*]], label [[CLEANUP_I]]
+// CHECK-NEXT:    [[TMP1:%.*]] = phi i8 [ [[TMP3:%.*]], [[IF_THEN_I:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
+// CHECK-NEXT:    [[__R_0_I3:%.*]] = phi i64 [ [[SUB_I:%.*]], [[IF_THEN_I]] ], [ 0, [[ENTRY]] ]
+// CHECK-NEXT:    [[__TAGP_ADDR_0_I2:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[IF_THEN_I]] ], [ [[P]], [[ENTRY]] ]
+// CHECK-NEXT:    [[TMP2:%.*]] = add i8 [[TMP1]], -48
+// CHECK-NEXT:    [[OR_COND_I:%.*]] = icmp ult i8 [[TMP2]], 10
+// CHECK-NEXT:    br i1 [[OR_COND_I]], label [[IF_THEN_I]], label [[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT]]
 // CHECK:       if.then.i:
-// CHECK-NEXT:    [[MUL_I:%.*]] = mul i64 [[__R_0_I]], 10
-// CHECK-NEXT:    [[CONV5_I:%.*]] = zext nneg i8 [[TMP0]] to i64
+// CHECK-NEXT:    [[MUL_I:%.*]] = mul i64 [[__R_0_I3]], 10
+// CHECK-NEXT:    [[CONV5_I:%.*]] = zext nneg i8 [[TMP1]] to i64
 // CHECK-NEXT:    [[ADD_I:%.*]] = add i64 [[MUL_I]], -48
-// CHECK-NEXT:    [[SUB_I:%.*]] = add i64 [[ADD_I]], [[CONV5_I]]
-// CHECK-NEXT:    [[INCDEC_PTR_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I]], i64 1
-// CHECK-NEXT:    br label [[CLEANUP_I]]
-// CHECK:       cleanup.i:
-// CHECK-NEXT:    [[__TAGP_ADDR_1_I]] = phi ptr [ [[INCDEC_PTR_I]], [[IF_THEN_I]] ], [ [[__TAGP_ADDR_0_I]], [[WHILE_BODY_I]] ]
-// CHECK-NEXT:    [[__R_1_I]] = phi i64 [ [[SUB_I]], [[IF_THEN_I]] ], [ [[__R_0_I]], [[WHILE_BODY_I]] ]
-// CHECK-NEXT:    br i1 [[OR_COND_I]], label [[WHILE_COND_I]], label [[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT]], !llvm.loop [[LOOP10:![0-9]+]]
+// CHECK-NEXT:    [[SUB_I]] = add i64 [[ADD_I]], [[CONV5_I]]
+// CHECK-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I2]], i64 1
+// CHECK-NEXT:    [[TMP3]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP3]], 0
+// CHECK-NEXT:    br i1 [[CMP_NOT_I]], label [[_ZL22__MAKE_MANTISSA_BASE10PKC_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP10:![0-9]+]]
 // CHECK:       _ZL22__make_mantissa_base10PKc.exit:
-// CHECK-NEXT:    [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[CLEANUP_I]] ], [ [[__R_0_I]], [[WHILE_COND_I]] ]
+// CHECK-NEXT:    [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ 0, [[WHILE_BODY_I]] ], [ [[SUB_I]], [[IF_THEN_I]] ]
 // CHECK-NEXT:    ret i64 [[RETVAL_2_I]]
 //
 // AMDGCNSPIRV-LABEL: @test___make_mantissa_base10(
@@ -161,78 +155,70 @@ extern "C" __device__ uint64_t test___make_mantissa_base10(const char *p) {
 
 // CHECK-LABEL: @test___make_mantissa_base16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    br label [[WHILE_COND_I:%.*]]
-// CHECK:       while.cond.i:
-// CHECK-NEXT:    [[__TAGP_ADDR_0_I:%.*]] = phi ptr [ [[P:%.*]], [[ENTRY:%.*]] ], [ [[__TAGP_ADDR_1_I:%.*]], [[CLEANUP_I:%.*]] ]
-// CHECK-NEXT:    [[__R_0_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[__R_2_I:%.*]], [[CLEANUP_I]] ]
-// CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I]], align 1, !tbaa [[TBAA4]]
-// CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP0]], 0
-// CHECK-NEXT:    br i1 [[CMP_NOT_I]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[P:%.*]], align 1, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[CMP_NOT_I1:%.*]] = icmp eq i8 [[TMP0]], 0
+// CHECK-NEXT:    br i1 [[CMP_NOT_I1]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
 // CHECK:       while.body.i:
-// CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[TMP0]], -48
-// CHECK-NEXT:    [[OR_COND_I:%.*]] = icmp ult i8 [[TMP1]], 10
-// CHECK-NEXT:    br i1 [[OR_COND_I]], label [[IF_END31_I:%.*]], label [[IF_ELSE_I:%.*]]
+// CHECK-NEXT:    [[TMP1:%.*]] = phi i8 [ [[TMP5:%.*]], [[IF_END31_I:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
+// CHECK-NEXT:    [[__R_0_I3:%.*]] = phi i64 [ [[ADD28_I:%.*]], [[IF_END31_I]] ], [ 0, [[ENTRY]] ]
+// CHECK-NEXT:    [[__TAGP_ADDR_0_I2:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[IF_END31_I]] ], [ [[P]], [[ENTRY]] ]
+// CHECK-NEXT:    [[TMP2:%.*]] = add i8 [[TMP1]], -48
+// CHECK-NEXT:    [[OR_COND_I:%.*]] = icmp ult i8 [[TMP2]], 10
+// CHECK-NEXT:    br i1 [[OR_COND_I]], label [[IF_END31_I]], label [[IF_ELSE_I:%.*]]
 // CHECK:       if.else.i:
-// CHECK-NEXT:    [[TMP2:%.*]] = add i8 [[TMP0]], -97
-// CHECK-NEXT:    [[OR_COND33_I:%.*]] = icmp ult i8 [[TMP2]], 6
+// CHECK-NEXT:    [[TMP3:%.*]] = add i8 [[TMP1]], -97
+// CHECK-NEXT:    [[OR_COND33_I:%.*]] = icmp ult i8 [[TMP3]], 6
 // CHECK-NEXT:    br i1 [[OR_COND33_I]], label [[IF_END31_I]], label [[IF_ELSE17_I:%.*]]
 // CHECK:       if.else17.i:
-// CHECK-NEXT:    [[TMP3:%.*]] = add i8 [[TMP0]], -65
-// CHECK-NEXT:    [[OR_COND34_I:%.*]] = icmp ult i8 [[TMP3]], 6
-// CHECK-NEXT:    br i1 [[OR_COND34_I]], label [[IF_END31_I]], label [[CLEANUP_I]]
+// CHECK-NEXT:    [[TMP4:%.*]] = add i8 [[TMP1]], -65
+// CHECK-NEXT:    [[OR_COND34_I:%.*]] = icmp ult i8 [[TMP4]], 6
+// CHECK-NEXT:    br i1 [[OR_COND34_I]], label [[IF_END31_I]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]]
 // CHECK:       if.end31.i:
 // CHECK-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I]] ], [ -87, [[IF_ELSE_I]] ], [ -55, [[IF_ELSE17_I]] ]
-// CHECK-NEXT:    [[MUL24_I:%.*]] = shl i64 [[__R_0_I]], 4
-// CHECK-NEXT:    [[CONV25_I:%.*]] = zext nneg i8 [[TMP0]] to i64
+// CHECK-NEXT:    [[MUL24_I:%.*]] = shl i64 [[__R_0_I3]], 4
+// CHECK-NEXT:    [[CONV25_I:%.*]] = zext nneg i8 [[TMP1]] to i64
 // CHECK-NEXT:    [[ADD26_I:%.*]] = add i64 [[MUL24_I]], [[DOTSINK]]
-// CHECK-NEXT:    [[ADD28_I:%.*]] = add i64 [[ADD26_I]], [[CONV25_I]]
-// CHECK-NEXT:    [[INCDEC_PTR_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I]], i64 1
-// CHECK-NEXT:    br label [[CLEANUP_I]]
-// CHECK:       cleanup.i:
-// CHECK-NEXT:    [[__TAGP_ADDR_1_I]] = phi ptr [ [[INCDEC_PTR_I]], [[IF_END31_I]] ], [ [[__TAGP_ADDR_0_I]], [[IF_ELSE17_I]] ]
-// CHECK-NEXT:    [[__R_2_I]] = phi i64 [ [[ADD28_I]], [[IF_END31_I]] ], [ [[__R_0_I]], [[IF_ELSE17_I]] ]
-// CHECK-NEXT:    [[COND_I:%.*]] = phi i1 [ true, [[IF_END31_I]] ], [ false, [[IF_ELSE17_I]] ]
-// CHECK-NEXT:    br i1 [[COND_I]], label [[WHILE_COND_I]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]], !llvm.loop [[LOOP11:![0-9]+]]
+// CHECK-NEXT:    [[ADD28_I]] = add i64 [[ADD26_I]], [[CONV25_I]]
+// CHECK-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I2]], i64 1
+// CHECK-NEXT:    [[TMP5]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP5]], 0
+// CHECK-NEXT:    br i1 [[CMP_NOT_I]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP11:![0-9]+]]
 // CHECK:       _ZL22__make_mantissa_base16PKc.exit:
-// CHECK-NEXT:    [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[CLEANUP_I]] ], [ [[__R_0_I]], [[WHILE_COND_I]] ]
+// CHECK-NEXT:    [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ 0, [[IF_ELSE17_I]] ], [ [[ADD28_I]], [[IF_END31_I]] ]
 // CHECK-NEXT:    ret i64 [[RETVAL_2_I]]
 //
 // AMDGCNSPIRV-LABEL: @test___make_mantissa_base16(
 // AMDGCNSPIRV-NEXT:  entry:
-// AMDGCNSPIRV-NEXT:    br label [[WHILE_COND_I:%.*]]
-// AMDGCNSPIRV:       while.cond.i:
-// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I:%.*]] = phi ptr addrspace(4) [ [[P:%.*]], [[ENTRY:%.*]] ], [ [[__TAGP_ADDR_1_I:%.*]], [[CLEANUP_I:%.*]] ]
-// AMDGCNSPIRV-NEXT:    [[__R_0_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[__R_2_I:%.*]], [[CLEANUP_I]] ]
-// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], align 1, !tbaa [[TBAA5]]
-// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP0]], 0
-// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
+// AMDGCNSPIRV-NEXT:    [[TMP0:%.*]] = load i8, ptr addrspace(4) [[P:%.*]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I1:%.*]] = icmp eq i8 [[TMP0]], 0
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I1]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
 // AMDGCNSPIRV:       while.body.i:
-// AMDGCNSPIRV-NEXT:    [[TMP1:%.*]] = add i8 [[TMP0]], -48
-// AMDGCNSPIRV-NEXT:    [[OR_COND_I:%.*]] = icmp ult i8 [[TMP1]], 10
-// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I]], label [[IF_END31_I:%.*]], label [[IF_ELSE_I:%.*]]
+// AMDGCNSPIRV-NEXT:    [[TMP1:%.*]] = phi i8 [ [[TMP5:%.*]], [[IF_END31_I:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
+// AMDGCNSPIRV-NEXT:    [[__R_0_I3:%.*]] = phi i64 [ [[ADD28_I:%.*]], [[IF_END31_I]] ], [ 0, [[ENTRY]] ]
+// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I2:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I:%.*]], [[IF_END31_I]] ], [ [[P]], [[ENTRY]] ]
+// AMDGCNSPIRV-NEXT:    [[TMP2:%.*]] = add i8 [[TMP1]], -48
+// AMDGCNSPIRV-NEXT:    [[OR_COND_I:%.*]] = icmp ult i8 [[TMP2]], 10
+// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I]], label [[IF_END31_I]], label [[IF_ELSE_I:%.*]]
 // AMDGCNSPIRV:       if.else.i:
-// AMDGCNSPIRV-NEXT:    [[TMP2:%.*]] = add i8 [[TMP0]], -97
-// AMDGCNSPIRV-NEXT:    [[OR_COND33_I:%.*]] = icmp ult i8 [[TMP2]], 6
+// AMDGCNSPIRV-NEXT:    [[TMP3:%.*]] = add i8 [[TMP1]], -97
+// AMDGCNSPIRV-NEXT:    [[OR_COND33_I:%.*]] = icmp ult i8 [[TMP3]], 6
 // AMDGCNSPIRV-NEXT:    br i1 [[OR_COND33_I]], label [[IF_END31_I]], label [[IF_ELSE17_I:%.*]]
 // AMDGCNSPIRV:       if.else17.i:
-// AMDGCNSPIRV-NEXT:    [[TMP3:%.*]] = add i8 [[TMP0]], -65
-// AMDGCNSPIRV-NEXT:    [[OR_COND34_I:%.*]] = icmp ult i8 [[TMP3]], 6
-// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND34_I]], label [[IF_END31_I]], label [[CLEANUP_I]]
+// AMDGCNSPIRV-NEXT:    [[TMP4:%.*]] = add i8 [[TMP1]], -65
+// AMDGCNSPIRV-NEXT:    [[OR_COND34_I:%.*]] = icmp ult i8 [[TMP4]], 6
+// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND34_I]], label [[IF_END31_I]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]]
 // AMDGCNSPIRV:       if.end31.i:
 // AMDGCNSPIRV-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I]] ], [ -87, [[IF_ELSE_I]] ], [ -55, [[IF_ELSE17_I]] ]
-// AMDGCNSPIRV-NEXT:    [[MUL24_I:%.*]] = shl i64 [[__R_0_I]], 4
-// AMDGCNSPIRV-NEXT:    [[CONV25_I:%.*]] = zext nneg i8 [[TMP0]] to i64
+// AMDGCNSPIRV-NEXT:    [[MUL24_I:%.*]] = shl i64 [[__R_0_I3]], 4
+// AMDGCNSPIRV-NEXT:    [[CONV25_I:%.*]] = zext nneg i8 [[TMP1]] to i64
 // AMDGCNSPIRV-NEXT:    [[ADD26_I:%.*]] = add i64 [[MUL24_I]], [[DOTSINK]]
-// AMDGCNSPIRV-NEXT:    [[ADD28_I:%.*]] = add i64 [[ADD26_I]], [[CONV25_I]]
-// AMDGCNSPIRV-NEXT:    [[INCDEC_PTR_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I]], i64 1
-// AMDGCNSPIRV-NEXT:    br label [[CLEANUP_I]]
-// AMDGCNSPIRV:       cleanup.i:
-// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_1_I]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I]], [[IF_END31_I]] ], [ [[__TAGP_ADDR_0_I]], [[IF_ELSE17_I]] ]
-// AMDGCNSPIRV-NEXT:    [[__R_2_I]] = phi i64 [ [[ADD28_I]], [[IF_END31_I]] ], [ [[__R_0_I]], [[IF_ELSE17_I]] ]
-// AMDGCNSPIRV-NEXT:    [[COND_I:%.*]] = phi i1 [ true, [[IF_END31_I]] ], [ false, [[IF_ELSE17_I]] ]
-// AMDGCNSPIRV-NEXT:    br i1 [[COND_I]], label [[WHILE_COND_I]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]], !llvm.loop [[LOOP12:![0-9]+]]
+// AMDGCNSPIRV-NEXT:    [[ADD28_I]] = add i64 [[ADD26_I]], [[CONV25_I]]
+// AMDGCNSPIRV-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I2]], i64 1
+// AMDGCNSPIRV-NEXT:    [[TMP5]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i8 [[TMP5]], 0
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I]], label [[_ZL22__MAKE_MANTISSA_BASE16PKC_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP12:![0-9]+]]
 // AMDGCNSPIRV:       _ZL22__make_mantissa_base16PKc.exit:
-// AMDGCNSPIRV-NEXT:    [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[CLEANUP_I]] ], [ [[__R_0_I]], [[WHILE_COND_I]] ]
+// AMDGCNSPIRV-NEXT:    [[RETVAL_2_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ 0, [[IF_ELSE17_I]] ], [ [[ADD28_I]], [[IF_END31_I]] ]
 // AMDGCNSPIRV-NEXT:    ret i64 [[RETVAL_2_I]]
 //
 extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) {
@@ -243,91 +229,85 @@ extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[P:%.*]], align 1, !tbaa [[TBAA4]]
 // CHECK-NEXT:    [[CMP_I:%.*]] = icmp eq i8 [[TMP0]], 48
-// CHECK-NEXT:    br i1 [[CMP_I]], label [[IF_THEN_I:%.*]], label [[WHILE_COND_I14_I:%.*]]
+// CHECK-NEXT:    br i1 [[CMP_I]], label [[IF_THEN_I:%.*]], label [[WHILE_COND_I14_I_PREHEADER:%.*]]
+// CHECK:       while.cond.i14.i.preheader:
+// CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[P]], align 1, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[CMP_NOT_I17_I5:%.*]] = icmp eq i8 [[TMP1]], 0
+// CHECK-NEXT:    br i1 [[CMP_NOT_I17_I5]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT:%.*]], label [[WHILE_BODY_I18_I:%.*]]
 // CHECK:       if.then.i:
 // CHECK-NEXT:    [[INCDEC_PTR_I:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 1
-// CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA4]]
-// CHECK-NEXT:    switch i8 [[TMP1]], label [[WHILE_COND_I_I:%.*]] [
-// CHECK-NEXT:      i8 120, label [[WHILE_COND_I30_I_PREHEADER:%.*]]
-// CHECK-NEXT:      i8 88, label [[WHILE_COND_I30_I_PREHEADER]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA4]]
+// CHECK-NEXT:    switch i8 [[TMP2]], label [[WHILE_COND_I_I_PREHEADER:%.*]] [
+// CHECK-NEXT:      i8 120, label [[IF_THEN5_I:%.*]]
+// CHECK-NEXT:      i8 88, label [[IF_THEN5_I]]
 // CHECK-NEXT:    ]
-// CHECK:       while.cond.i30.i.preheader:
-// CHECK-NEXT:    br label [[WHILE_COND_I30_I:%.*]]
-// CHECK:       while.cond.i30.i:
-// CHECK-NEXT:    [[__TAGP_ADDR_0_I31_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I37_I:%.*]], [[CLEANUP_I36_I:%.*]] ], [ [[INCDEC_PTR_I]], [[WHILE_COND_I30_I_PREHEADER]] ]
-// CHECK-NEXT:    [[__R_0_I32_I:%.*]] = phi i64 [ [[__R_2_I_I:%.*]], [[CLEANUP_I36_I]] ], [ 0, [[WHILE_COND_I30_I_PREHEADER]] ]
-// CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I31_I]], align 1, !tbaa [[TBAA4]]
-// CHECK-NEXT:    [[CMP_NOT_I33_I:%.*]] = icmp eq i8 [[TMP2]], 0
-// CHECK-NEXT:    br i1 [[CMP_NOT_I33_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT:%.*]], label [[WHILE_BODY_I34_I:%.*]]
-// CHECK:       while.body.i34.i:
-// CHECK-NEXT:    [[TMP3:%.*]] = add i8 [[TMP2]], -48
-// CHECK-NEXT:    [[OR_COND_I35_I:%.*]] = icmp ult i8 [[TMP3]], 10
-// CHECK-NEXT:    br i1 [[OR_COND_I35_I]], label [[IF_END31_I_I:%.*]], label [[IF_ELSE_I_I:%.*]]
+// CHECK:       while.cond.i.i.preheader:
+// CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[CMP_NOT_I_I14:%.*]] = icmp eq i8 [[TMP3]], 0
+// CHECK-NEXT:    br i1 [[CMP_NOT_I_I14]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I_I:%.*]]
+// CHECK:       if.then5.i:
+// CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[CMP_NOT_I30_I9:%.*]] = icmp eq i8 [[TMP4]], 0
+// CHECK-NEXT:    br i1 [[CMP_NOT_I30_I9]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I31_I:%.*]]
+// CHECK:       while.body.i31.i:
+// CHECK-NEXT:    [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], [[IF_END31_I_I:%.*]] ], [ [[TMP4]], [[IF_THEN5_I]] ]
+// CHECK-NEXT:    [[__R_0_I29_I11:%.*]] = phi i64 [ [[ADD28_I_I:%.*]], [[IF_END31_I_I]] ], [ 0, [[IF_THEN5_I]] ]
+// CHECK-NEXT:    [[__TAGP_ADDR_0_I28_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I:%.*]], [[IF_END31_I_I]] ], [ [[INCDEC_PTR_I]], [[IF_THEN5_I]] ]
+// CHECK-NEXT:    [[TMP6:%.*]] = add i8 [[TMP5]], -48
+// CHECK-NEXT:    [[OR_COND_I32_I:%.*]] = icmp ult i8 [[TMP6]], 10
+// CHECK-NEXT:    br i1 [[OR_COND_I32_I]], label [[IF_END31_I_I]], label [[IF_ELSE_I_I:%.*]]
 // CHECK:       if.else.i.i:
-// CHECK-NEXT:    [[TMP4:%.*]] = add i8 [[TMP2]], -97
-// CHECK-NEXT:    [[OR_COND33_I_I:%.*]] = icmp ult i8 [[TMP4]], 6
+// CHECK-NEXT:    [[TMP7:%.*]] = add i8 [[TMP5]], -97
+// CHECK-NEXT:    [[OR_COND33_I_I:%.*]] = icmp ult i8 [[TMP7]], 6
 // CHECK-NEXT:    br i1 [[OR_COND33_I_I]], label [[IF_END31_I_I]], label [[IF_ELSE17_I_I:%.*]]
 // CHECK:       if.else17.i.i:
-// CHECK-NEXT:    [[TMP5:%.*]] = add i8 [[TMP2]], -65
-// CHECK-NEXT:    [[OR_COND34_I_I:%.*]] = icmp ult i8 [[TMP5]], 6
-// CHECK-NEXT:    br i1 [[OR_COND34_I_I]], label [[IF_END31_I_I]], label [[CLEANUP_I36_I]]
+// CHECK-NEXT:    [[TMP8:%.*]] = add i8 [[TMP5]], -65
+// CHECK-NEXT:    [[OR_COND34_I_I:%.*]] = icmp ult i8 [[TMP8]], 6
+// CHECK-NEXT:    br i1 [[OR_COND34_I_I]], label [[IF_END31_I_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]]
 // CHECK:       if.end31.i.i:
-// CHECK-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I34_I]] ], [ -87, [[IF_ELSE_I_I]] ], [ -55, [[IF_ELSE17_I_I]] ]
-// CHECK-NEXT:    [[MUL24_I_I:%.*]] = shl i64 [[__R_0_I32_I]], 4
-// CHECK-NEXT:    [[CONV25_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64
+// CHECK-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I31_I]] ], [ -87, [[IF_ELSE_I_I]] ], [ -55, [[IF_ELSE17_I_I]] ]
+// CHECK-NEXT:    [[MUL24_I_I:%.*]] = shl i64 [[__R_0_I29_I11]], 4
+// CHECK-NEXT:    [[CONV25_I_I:%.*]] = zext nneg i8 [[TMP5]] to i64
 // CHECK-NEXT:    [[ADD26_I_I:%.*]] = add i64 [[MUL24_I_I]], [[DOTSINK]]
-// CHECK-NEXT:    [[ADD28_I_I:%.*]] = add i64 [[ADD26_I_I]], [[CONV25_I_I]]
-// CHECK-NEXT:    [[INCDEC_PTR_I40_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I31_I]], i64 1
-// CHECK-NEXT:    br label [[CLEANUP_I36_I]]
-// CHECK:       cleanup.i36.i:
-// CHECK-NEXT:    [[__TAGP_ADDR_1_I37_I]] = phi ptr [ [[INCDEC_PTR_I40_I]], [[IF_END31_I_I]] ], [ [[__TAGP_ADDR_0_I31_I]], [[IF_ELSE17_I_I]] ]
-// CHECK-NEXT:    [[__R_2_I_I]] = phi i64 [ [[ADD28_I_I]], [[IF_END31_I_I]] ], [ [[__R_0_I32_I]], [[IF_ELSE17_I_I]] ]
-// CHECK-NEXT:    [[COND_I_I:%.*]] = phi i1 [ true, [[IF_END31_I_I]] ], [ false, [[IF_ELSE17_I_I]] ]
-// CHECK-NEXT:    br i1 [[COND_I_I]], label [[WHILE_COND_I30_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], !llvm.loop [[LOOP11]]
-// CHECK:       while.cond.i.i:
-// CHECK-NEXT:    [[__TAGP_ADDR_0_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I_I:%.*]], [[CLEANUP_I_I:%.*]] ], [ [[INCDEC_PTR_I]], [[IF_THEN_I]] ]
-// CHECK-NEXT:    [[__R_0_I_I:%.*]] = phi i64 [ [[__R_1_I_I:%.*]], [[CLEANUP_I_I]] ], [ 0, [[IF_THEN_I]] ]
-// CHECK-NEXT:    [[TMP6:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I_I]], align 1, !tbaa [[TBAA4]]
-// CHECK-NEXT:    [[CMP_NOT_I_I:%.*]] = icmp eq i8 [[TMP6]], 0
-// CHECK-NEXT:    br i1 [[CMP_NOT_I_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I_I:%.*]]
+// CHECK-NEXT:    [[ADD28_I_I]] = add i64 [[ADD26_I_I]], [[CONV25_I_I]]
+// CHECK-NEXT:    [[INCDEC_PTR_I34_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I28_I10]], i64 1
+// CHECK-NEXT:    [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I]], align 1, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[CMP_NOT_I30_I:%.*]] = icmp eq i8 [[TMP9]], 0
+// CHECK-NEXT:    br i1 [[CMP_NOT_I30_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I31_I]], !llvm.loop [[LOOP11]]
 // CHECK:       while.body.i.i:
-// CHECK-NEXT:    [[TMP7:%.*]] = and i8 [[TMP6]], -8
-// CHECK-NEXT:    [[OR_COND_I_I:%.*]] = icmp eq i8 [[TMP7]], 48
-// CHECK-NEXT:    br i1 [[OR_COND_I_I]], label [[IF_THEN_I_I:%.*]], label [[CLEANUP_I_I]]
+// CHECK-NEXT:    [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], [[IF_THEN_I_I:%.*]] ], [ [[TMP3]], [[WHILE_COND_I_I_PREHEADER]] ]
+// CHECK-NEXT:    [[__R_0_I_I16:%.*]] = phi i64 [ [[SUB_I_I:%.*]], [[IF_THEN_I_I]] ], [ 0, [[WHILE_COND_I_I_PREHEADER]] ]
+// CHECK-NEXT:    [[__TAGP_ADDR_0_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I:%.*]], [[IF_THEN_I_I]] ], [ [[INCDEC_PTR_I]], [[WHILE_COND_I_I_PREHEADER]] ]
+// CHECK-NEXT:    [[TMP11:%.*]] = and i8 [[TMP10]], -8
+// CHECK-NEXT:    [[OR_COND_I_I:%.*]] = icmp eq i8 [[TMP11]], 48
+// CHECK-NEXT:    br i1 [[OR_COND_I_I]], label [[IF_THEN_I_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]]
 // CHECK:       if.then.i.i:
-// CHECK-NEXT:    [[MUL_I_I:%.*]] = shl i64 [[__R_0_I_I]], 3
-// CHECK-NEXT:    [[CONV5_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64
+// CHECK-NEXT:    [[MUL_I_I:%.*]] = shl i64 [[__R_0_I_I16]], 3
+// CHECK-NEXT:    [[CONV5_I_I:%.*]] = zext nneg i8 [[TMP10]] to i64
 // CHECK-NEXT:    [[ADD_I_I:%.*]] = add i64 [[MUL_I_I]], -48
-// CHECK-NEXT:    [[SUB_I_I:%.*]] = add i64 [[ADD_I_I]], [[CONV5_I_I]]
-// CHECK-NEXT:    [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I]], i64 1
-// CHECK-NEXT:    br label [[CLEANUP_I_I]]
-// CHECK:       cleanup.i.i:
-// CHECK-NEXT:    [[__TAGP_ADDR_1_I_I]] = phi ptr [ [[INCDEC_PTR_I_I]], [[IF_THEN_I_I]] ], [ [[__TAGP_ADDR_0_I_I]], [[WHILE_BODY_I_I]] ]
-// CHECK-NEXT:    [[__R_1_I_I]] = phi i64 [ [[SUB_I_I]], [[IF_THEN_I_I]] ], [ [[__R_0_I_I]], [[WHILE_BODY_I_I]] ]
-// CHECK-NEXT:    br i1 [[OR_COND_I_I]], label [[WHILE_COND_I_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], !llvm.loop [[LOOP7]]
-// CHECK:       while.cond.i14.i:
-// CHECK-NEXT:    [[__TAGP_ADDR_0_I15_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I21_I:%.*]], [[CLEANUP_I20_I:%.*]] ], [ [[P]], [[ENTRY:%.*]] ]
-// CHECK-NEXT:    [[__R_0_I16_I:%.*]] = phi i64 [ [[__R_1_I22_I:%.*]], [[CLEANUP_I20_I]] ], [ 0, [[ENTRY]] ]
-// CHECK-NEXT:    [[TMP8:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I15_I]], align 1, !tbaa [[TBAA4]]
-// CHECK-NEXT:    [[CMP_NOT_I17_I:%.*]] = icmp eq i8 [[TMP8]], 0
-// CHECK-NEXT:    br i1 [[CMP_NOT_I17_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I18_I:%.*]]
+// CHECK-NEXT:    [[SUB_I_I]] = add i64 [[ADD_I_I]], [[CONV5_I_I]]
+// CHECK-NEXT:    [[INCDEC_PTR_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I15]], i64 1
+// CHECK-NEXT:    [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[CMP_NOT_I_I:%.*]] = icmp eq i8 [[TMP12]], 0
+// CHECK-NEXT:    br i1 [[CMP_NOT_I_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I_I]], !llvm.loop [[LOOP7]]
 // CHECK:       while.body.i18.i:
-// CHECK-NEXT:    [[TMP9:%.*]] = add i8 [[TMP8]], -48
-// CHECK-NEXT:    [[OR_COND_I19_I:%.*]] = icmp ult i8 [[TMP9]], 10
-// CHECK-NEXT:    br i1 [[OR_COND_I19_I]], label [[IF_THEN_I24_I:%.*]], label [[CLEANUP_I20_I]]
-// CHECK:       if.then.i24.i:
-// CHECK-NEXT:    [[MUL_I25_I:%.*]] = mul i64 [[__R_0_I16_I]], 10
-// CHECK-NEXT:    [[CONV5_I26_I:%.*]] = zext nneg i8 [[TMP8]] to i64
-// CHECK-NEXT:    [[ADD_I27_I:%.*]] = add i64 [[MUL_I25_I]], -48
-// CHECK-NEXT:    [[SUB_I28_I:%.*]] = add i64 [[ADD_I27_I]], [[CONV5_I26_I]]
-// CHECK-NEXT:    [[INCDEC_PTR_I29_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I]], i64 1
-// CHECK-NEXT:    br label [[CLEANUP_I20_I]]
-// CHECK:       cleanup.i20.i:
-// CHECK-NEXT:    [[__TAGP_ADDR_1_I21_I]] = phi ptr [ [[INCDEC_PTR_I29_I]], [[IF_THEN_I24_I]] ], [ [[__TAGP_ADDR_0_I15_I]], [[WHILE_BODY_I18_I]] ]
-// CHECK-NEXT:    [[__R_1_I22_I]] = phi i64 [ [[SUB_I28_I]], [[IF_THEN_I24_I]] ], [ [[__R_0_I16_I]], [[WHILE_BODY_I18_I]] ]
-// CHECK-NEXT:    br i1 [[OR_COND_I19_I]], label [[WHILE_COND_I14_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], !llvm.loop [[LOOP10]]
+// CHECK-NEXT:    [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], [[IF_THEN_I21_I:%.*]] ], [ [[TMP1]], [[WHILE_COND_I14_I_PREHEADER]] ]
+// CHECK-NEXT:    [[__R_0_I16_I7:%.*]] = phi i64 [ [[SUB_I25_I:%.*]], [[IF_THEN_I21_I]] ], [ 0, [[WHILE_COND_I14_I_PREHEADER]] ]
+// CHECK-NEXT:    [[__TAGP_ADDR_0_I15_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I:%.*]], [[IF_THEN_I21_I]] ], [ [[P]], [[WHILE_COND_I14_I_PREHEADER]] ]
+// CHECK-NEXT:    [[TMP14:%.*]] = add i8 [[TMP13]], -48
+// CHECK-NEXT:    [[OR_COND_I19_I:%.*]] = icmp ult i8 [[TMP14]], 10
+// CHECK-NEXT:    br i1 [[OR_COND_I19_I]], label [[IF_THEN_I21_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]]
+// CHECK:       if.then.i21.i:
+// CHECK-NEXT:    [[MUL_I22_I:%.*]] = mul i64 [[__R_0_I16_I7]], 10
+// CHECK-NEXT:    [[CONV5_I23_I:%.*]] = zext nneg i8 [[TMP13]] to i64
+// CHECK-NEXT:    [[ADD_I24_I:%.*]] = add i64 [[MUL_I22_I]], -48
+// CHECK-NEXT:    [[SUB_I25_I]] = add i64 [[ADD_I24_I]], [[CONV5_I23_I]]
+// CHECK-NEXT:    [[INCDEC_PTR_I26_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I6]], i64 1
+// CHECK-NEXT:    [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I]], align 1, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[CMP_NOT_I17_I:%.*]] = icmp eq i8 [[TMP15]], 0
+// CHECK-NEXT:    br i1 [[CMP_NOT_I17_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I18_I]], !llvm.loop [[LOOP10]]
 // CHECK:       _ZL15__make_mantissaPKc.exit:
-// CHECK-NEXT:    [[RETVAL_0_I:%.*]] = phi i64 [ 0, [[CLEANUP_I_I]] ], [ [[__R_0_I_I]], [[WHILE_COND_I_I]] ], [ 0, [[CLEANUP_I36_I]] ], [ [[__R_0_I32_I]], [[WHILE_COND_I30_I]] ], [ 0, [[CLEANUP_I20_I]] ], [ [[__R_0_I16_I]], [[WHILE_COND_I14_I]] ]
+// CHECK-NEXT:    [[RETVAL_0_I:%.*]] = phi i64 [ 0, [[WHILE_COND_I_I_PREHEADER]] ], [ 0, [[IF_THEN5_I]] ], [ 0, [[WHILE_COND_I14_I_PREHEADER]] ], [ [[SUB_I_I]], [[IF_THEN_I_I]] ], [ 0, [[WHILE_BODY_I_I]] ], [ [[ADD28_I_I]], [[IF_END31_I_I]] ], [ 0, [[IF_ELSE17_I_I]] ], [ [[SUB_I25_I]], [[IF_THEN_I21_I]] ], [ 0, [[WHILE_BODY_I18_I]] ]
 // CHECK-NEXT:    ret i64 [[RETVAL_0_I]]
 //
 // AMDGCNSPIRV-LABEL: @test___make_mantissa(
@@ -339,53 +319,49 @@ extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) {
 // AMDGCNSPIRV-NEXT:    [[INCDEC_PTR_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[P]], i64 1
 // AMDGCNSPIRV-NEXT:    [[TMP1:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA5]]
 // AMDGCNSPIRV-NEXT:    switch i8 [[TMP1]], label [[WHILE_COND_I_I:%.*]] [
-// AMDGCNSPIRV-NEXT:      i8 120, label [[WHILE_COND_I28_I_PREHEADER:%.*]]
-// AMDGCNSPIRV-NEXT:      i8 88, label [[WHILE_COND_I28_I_PREHEADER]]
+// AMDGCNSPIRV-NEXT:      i8 120, label [[IF_THEN5_I:%.*]]
+// AMDGCNSPIRV-NEXT:      i8 88, label [[IF_THEN5_I]]
 // AMDGCNSPIRV-NEXT:    ]
-// AMDGCNSPIRV:       while.cond.i28.i.preheader:
-// AMDGCNSPIRV-NEXT:    br label [[WHILE_COND_I28_I:%.*]]
-// AMDGCNSPIRV:       while.cond.i28.i:
-// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I29_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I34_I:%.*]], [[CLEANUP_I_I:%.*]] ], [ [[INCDEC_PTR_I]], [[WHILE_COND_I28_I_PREHEADER]] ]
-// AMDGCNSPIRV-NEXT:    [[__R_0_I30_I:%.*]] = phi i64 [ [[__R_2_I_I:%.*]], [[CLEANUP_I_I]] ], [ 0, [[WHILE_COND_I28_I_PREHEADER]] ]
-// AMDGCNSPIRV-NEXT:    [[TMP2:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I]], align 1, !tbaa [[TBAA5]]
-// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I31_I:%.*]] = icmp eq i8 [[TMP2]], 0
-// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I31_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT:%.*]], label [[WHILE_BODY_I32_I:%.*]]
+// AMDGCNSPIRV:       if.then5.i:
+// AMDGCNSPIRV-NEXT:    [[TMP2:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I31_I5:%.*]] = icmp eq i8 [[TMP2]], 0
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I31_I5]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT:%.*]], label [[WHILE_BODY_I32_I:%.*]]
 // AMDGCNSPIRV:       while.body.i32.i:
-// AMDGCNSPIRV-NEXT:    [[TMP3:%.*]] = add i8 [[TMP2]], -48
-// AMDGCNSPIRV-NEXT:    [[OR_COND_I33_I:%.*]] = icmp ult i8 [[TMP3]], 10
-// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I33_I]], label [[IF_END31_I_I:%.*]], label [[IF_ELSE_I_I:%.*]]
+// AMDGCNSPIRV-NEXT:    [[TMP3:%.*]] = phi i8 [ [[TMP7:%.*]], [[IF_END31_I_I:%.*]] ], [ [[TMP2]], [[IF_THEN5_I]] ]
+// AMDGCNSPIRV-NEXT:    [[__R_0_I30_I7:%.*]] = phi i64 [ [[ADD28_I_I:%.*]], [[IF_END31_I_I]] ], [ 0, [[IF_THEN5_I]] ]
+// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I29_I6:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I36_I:%.*]], [[IF_END31_I_I]] ], [ [[INCDEC_PTR_I]], [[IF_THEN5_I]] ]
+// AMDGCNSPIRV-NEXT:    [[TMP4:%.*]] = add i8 [[TMP3]], -48
+// AMDGCNSPIRV-NEXT:    [[OR_COND_I33_I:%.*]] = icmp ult i8 [[TMP4]], 10
+// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I33_I]], label [[IF_END31_I_I]], label [[IF_ELSE_I_I:%.*]]
 // AMDGCNSPIRV:       if.else.i.i:
-// AMDGCNSPIRV-NEXT:    [[TMP4:%.*]] = add i8 [[TMP2]], -97
-// AMDGCNSPIRV-NEXT:    [[OR_COND33_I_I:%.*]] = icmp ult i8 [[TMP4]], 6
+// AMDGCNSPIRV-NEXT:    [[TMP5:%.*]] = add i8 [[TMP3]], -97
+// AMDGCNSPIRV-NEXT:    [[OR_COND33_I_I:%.*]] = icmp ult i8 [[TMP5]], 6
 // AMDGCNSPIRV-NEXT:    br i1 [[OR_COND33_I_I]], label [[IF_END31_I_I]], label [[IF_ELSE17_I_I:%.*]]
 // AMDGCNSPIRV:       if.else17.i.i:
-// AMDGCNSPIRV-NEXT:    [[TMP5:%.*]] = add i8 [[TMP2]], -65
-// AMDGCNSPIRV-NEXT:    [[OR_COND34_I_I:%.*]] = icmp ult i8 [[TMP5]], 6
-// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND34_I_I]], label [[IF_END31_I_I]], label [[CLEANUP_I_I]]
+// AMDGCNSPIRV-NEXT:    [[TMP6:%.*]] = add i8 [[TMP3]], -65
+// AMDGCNSPIRV-NEXT:    [[OR_COND34_I_I:%.*]] = icmp ult i8 [[TMP6]], 6
+// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND34_I_I]], label [[IF_END31_I_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]]
 // AMDGCNSPIRV:       if.end31.i.i:
 // AMDGCNSPIRV-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I32_I]] ], [ -87, [[IF_ELSE_I_I]] ], [ -55, [[IF_ELSE17_I_I]] ]
-// AMDGCNSPIRV-NEXT:    [[MUL24_I_I:%.*]] = shl i64 [[__R_0_I30_I]], 4
-// AMDGCNSPIRV-NEXT:    [[CONV25_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64
+// AMDGCNSPIRV-NEXT:    [[MUL24_I_I:%.*]] = shl i64 [[__R_0_I30_I7]], 4
+// AMDGCNSPIRV-NEXT:    [[CONV25_I_I:%.*]] = zext nneg i8 [[TMP3]] to i64
 // AMDGCNSPIRV-NEXT:    [[ADD26_I_I:%.*]] = add i64 [[MUL24_I_I]], [[DOTSINK]]
-// AMDGCNSPIRV-NEXT:    [[ADD28_I_I:%.*]] = add i64 [[ADD26_I_I]], [[CONV25_I_I]]
-// AMDGCNSPIRV-NEXT:    [[INCDEC_PTR_I37_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I]], i64 1
-// AMDGCNSPIRV-NEXT:    br label [[CLEANUP_I_I]]
-// AMDGCNSPIRV:       cleanup.i.i:
-// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_1_I34_I]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I37_I]], [[IF_END31_I_I]] ], [ [[__TAGP_ADDR_0_I29_I]], [[IF_ELSE17_I_I]] ]
-// AMDGCNSPIRV-NEXT:    [[__R_2_I_I]] = phi i64 [ [[ADD28_I_I]], [[IF_END31_I_I]] ], [ [[__R_0_I30_I]], [[IF_ELSE17_I_I]] ]
-// AMDGCNSPIRV-NEXT:    [[COND_I_I:%.*]] = phi i1 [ true, [[IF_END31_I_I]] ], [ false, [[IF_ELSE17_I_I]] ]
-// AMDGCNSPIRV-NEXT:    br i1 [[COND_I_I]], label [[WHILE_COND_I28_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], !llvm.loop [[LOOP12]]
+// AMDGCNSPIRV-NEXT:    [[ADD28_I_I]] = add i64 [[ADD26_I_I]], [[CONV25_I_I]]
+// AMDGCNSPIRV-NEXT:    [[INCDEC_PTR_I36_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I6]], i64 1
+// AMDGCNSPIRV-NEXT:    [[TMP7]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I36_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I31_I:%.*]] = icmp eq i8 [[TMP7]], 0
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I31_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I32_I]], !llvm.loop [[LOOP12]]
 // AMDGCNSPIRV:       while.cond.i.i:
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I_I:%.*]], [[WHILE_BODY_I_I:%.*]] ], [ [[INCDEC_PTR_I]], [[IF_THEN_I]] ]
 // AMDGCNSPIRV-NEXT:    [[__R_0_I_I:%.*]] = phi i64 [ [[__R_1_I_I:%.*]], [[WHILE_BODY_I_I]] ], [ 0, [[IF_THEN_I]] ]
-// AMDGCNSPIRV-NEXT:    [[TMP6:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I]], align 1, !tbaa [[TBAA5]]
-// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I_I:%.*]] = icmp eq i8 [[TMP6]], 0
+// AMDGCNSPIRV-NEXT:    [[TMP8:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I_I:%.*]] = icmp eq i8 [[TMP8]], 0
 // AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I_I]]
 // AMDGCNSPIRV:       while.body.i.i:
-// AMDGCNSPIRV-NEXT:    [[TMP7:%.*]] = and i8 [[TMP6]], -8
-// AMDGCNSPIRV-NEXT:    [[OR_COND_I_I:%.*]] = icmp eq i8 [[TMP7]], 48
+// AMDGCNSPIRV-NEXT:    [[TMP9:%.*]] = and i8 [[TMP8]], -8
+// AMDGCNSPIRV-NEXT:    [[OR_COND_I_I:%.*]] = icmp eq i8 [[TMP9]], 48
 // AMDGCNSPIRV-NEXT:    [[MUL_I_I:%.*]] = shl i64 [[__R_0_I_I]], 3
-// AMDGCNSPIRV-NEXT:    [[CONV5_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64
+// AMDGCNSPIRV-NEXT:    [[CONV5_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64
 // AMDGCNSPIRV-NEXT:    [[ADD_I_I:%.*]] = add i64 [[MUL_I_I]], -48
 // AMDGCNSPIRV-NEXT:    [[SUB_I_I:%.*]] = add i64 [[ADD_I_I]], [[CONV5_I_I]]
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_1_I_I_IDX:%.*]] = zext i1 [[OR_COND_I_I]] to i64
@@ -395,14 +371,14 @@ extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) {
 // AMDGCNSPIRV:       while.cond.i14.i:
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I15_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I25_I:%.*]], [[WHILE_BODY_I18_I:%.*]] ], [ [[P]], [[ENTRY:%.*]] ]
 // AMDGCNSPIRV-NEXT:    [[__R_0_I16_I:%.*]] = phi i64 [ [[__R_1_I26_I:%.*]], [[WHILE_BODY_I18_I]] ], [ 0, [[ENTRY]] ]
-// AMDGCNSPIRV-NEXT:    [[TMP8:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I]], align 1, !tbaa [[TBAA5]]
-// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I17_I:%.*]] = icmp eq i8 [[TMP8]], 0
+// AMDGCNSPIRV-NEXT:    [[TMP10:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I17_I:%.*]] = icmp eq i8 [[TMP10]], 0
 // AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I17_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], label [[WHILE_BODY_I18_I]]
 // AMDGCNSPIRV:       while.body.i18.i:
-// AMDGCNSPIRV-NEXT:    [[TMP9:%.*]] = add i8 [[TMP8]], -48
-// AMDGCNSPIRV-NEXT:    [[OR_COND_I19_I:%.*]] = icmp ult i8 [[TMP9]], 10
+// AMDGCNSPIRV-NEXT:    [[TMP11:%.*]] = add i8 [[TMP10]], -48
+// AMDGCNSPIRV-NEXT:    [[OR_COND_I19_I:%.*]] = icmp ult i8 [[TMP11]], 10
 // AMDGCNSPIRV-NEXT:    [[MUL_I20_I:%.*]] = mul i64 [[__R_0_I16_I]], 10
-// AMDGCNSPIRV-NEXT:    [[CONV5_I21_I:%.*]] = zext nneg i8 [[TMP8]] to i64
+// AMDGCNSPIRV-NEXT:    [[CONV5_I21_I:%.*]] = zext nneg i8 [[TMP10]] to i64
 // AMDGCNSPIRV-NEXT:    [[ADD_I22_I:%.*]] = add i64 [[MUL_I20_I]], -48
 // AMDGCNSPIRV-NEXT:    [[SUB_I23_I:%.*]] = add i64 [[ADD_I22_I]], [[CONV5_I21_I]]
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_1_I25_I_IDX:%.*]] = zext i1 [[OR_COND_I19_I]] to i64
@@ -410,7 +386,7 @@ extern "C" __device__ uint64_t test___make_mantissa_base16(const char *p) {
 // AMDGCNSPIRV-NEXT:    [[__R_1_I26_I]] = select i1 [[OR_COND_I19_I]], i64 [[SUB_I23_I]], i64 [[__R_0_I16_I]]
 // AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I19_I]], label [[WHILE_COND_I14_I]], label [[_ZL15__MAKE_MANTISSAPKC_EXIT]], !llvm.loop [[LOOP11]]
 // AMDGCNSPIRV:       _ZL15__make_mantissaPKc.exit:
-// AMDGCNSPIRV-NEXT:    [[RETVAL_0_I:%.*]] = phi i64 [ 0, [[WHILE_BODY_I_I]] ], [ [[__R_0_I_I]], [[WHILE_COND_I_I]] ], [ 0, [[CLEANUP_I_I]] ], [ [[__R_0_I30_I]], [[WHILE_COND_I28_I]] ], [ 0, [[WHILE_BODY_I18_I]] ], [ [[__R_0_I16_I]], [[WHILE_COND_I14_I]] ]
+// AMDGCNSPIRV-NEXT:    [[RETVAL_0_I:%.*]] = phi i64 [ 0, [[IF_THEN5_I]] ], [ 0, [[WHILE_BODY_I_I]] ], [ [[__R_0_I_I]], [[WHILE_COND_I_I]] ], [ [[ADD28_I_I]], [[IF_END31_I_I]] ], [ 0, [[IF_ELSE17_I_I]] ], [ 0, [[WHILE_BODY_I18_I]] ], [ [[__R_0_I16_I]], [[WHILE_COND_I14_I]] ]
 // AMDGCNSPIRV-NEXT:    ret i64 [[RETVAL_0_I]]
 //
 extern "C" __device__ uint64_t test___make_mantissa(const char *p) {
@@ -461,22 +437,22 @@ extern "C" __device__ long long test_llabs(long x) {
 
 // DEFAULT-LABEL: @test_acosf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acos_f32(float noundef [[X:%.*]]) #[[ATTR12:[0-9]+]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acos_f32(float noundef [[X:%.*]]) #[[ATTR14:[0-9]+]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_acosf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_acos_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR12:[0-9]+]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_acos_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14:[0-9]+]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_acosf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acos_f32(float noundef [[X:%.*]]) #[[ATTR12:[0-9]+]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acos_f32(float noundef [[X:%.*]]) #[[ATTR14:[0-9]+]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_acosf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acos_f32(float noundef [[X:%.*]]) #[[ATTR12:[0-9]+]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acos_f32(float noundef [[X:%.*]]) #[[ATTR14:[0-9]+]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_acosf(
@@ -490,22 +466,22 @@ extern "C" __device__ float test_acosf(float x) {
 
 // DEFAULT-LABEL: @test_acos(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acos_f64(double noundef [[X:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acos_f64(double noundef [[X:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_acos(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_acos_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_acos_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_acos(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acos_f64(double noundef [[X:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acos_f64(double noundef [[X:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_acos(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acos_f64(double noundef [[X:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acos_f64(double noundef [[X:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_acos(
@@ -519,22 +495,22 @@ extern "C" __device__ double test_acos(double x) {
 
 // DEFAULT-LABEL: @test_acoshf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acosh_f32(float noundef [[X:%.*]]) #[[ATTR13:[0-9]+]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acosh_f32(float noundef [[X:%.*]]) #[[ATTR15:[0-9]+]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_acoshf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_acosh_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13:[0-9]+]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_acosh_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15:[0-9]+]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_acoshf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acosh_f32(float noundef [[X:%.*]]) #[[ATTR13:[0-9]+]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acosh_f32(float noundef [[X:%.*]]) #[[ATTR15:[0-9]+]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_acoshf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acosh_f32(float noundef [[X:%.*]]) #[[ATTR13:[0-9]+]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_acosh_f32(float noundef [[X:%.*]]) #[[ATTR15:[0-9]+]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_acoshf(
@@ -548,22 +524,22 @@ extern "C" __device__ float test_acoshf(float x) {
 
 // DEFAULT-LABEL: @test_acosh(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acosh_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acosh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_acosh(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_acosh_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_acosh_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_acosh(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acosh_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acosh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_acosh(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acosh_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_acosh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_acosh(
@@ -577,22 +553,22 @@ extern "C" __device__ double test_acosh(double x) {
 
 // DEFAULT-LABEL: @test_asinf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asin_f32(float noundef [[X:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asin_f32(float noundef [[X:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_asinf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_asin_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_asin_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_asinf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asin_f32(float noundef [[X:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asin_f32(float noundef [[X:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_asinf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asin_f32(float noundef [[X:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asin_f32(float noundef [[X:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_asinf(
@@ -606,22 +582,22 @@ extern "C" __device__ float test_asinf(float x) {
 
 // DEFAULT-LABEL: @test_asin(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asin_f64(double noundef [[X:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asin_f64(double noundef [[X:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_asin(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_asin_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_asin_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_asin(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asin_f64(double noundef [[X:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asin_f64(double noundef [[X:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_asin(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asin_f64(double noundef [[X:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asin_f64(double noundef [[X:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_asin(
@@ -636,22 +612,22 @@ extern "C" __device__ double test_asin(double x) {
 
 // DEFAULT-LABEL: @test_asinhf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asinh_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asinh_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_asinhf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_asinh_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_asinh_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_asinhf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asinh_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asinh_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_asinhf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asinh_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_asinh_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_asinhf(
@@ -665,22 +641,22 @@ extern "C" __device__ float test_asinhf(float x) {
 
 // DEFAULT-LABEL: @test_asinh(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asinh_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asinh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_asinh(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_asinh_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_asinh_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_asinh(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asinh_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asinh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_asinh(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asinh_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_asinh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_asinh(
@@ -694,22 +670,22 @@ extern "C" __device__ double test_asinh(double x) {
 
 // DEFAULT-LABEL: @test_atan2f(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan2_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan2_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_atan2f(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_atan2_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_atan2_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_atan2f(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan2_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan2_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_atan2f(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan2_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan2_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_atan2f(
@@ -723,22 +699,22 @@ extern "C" __device__ float test_atan2f(float x, float y) {
 
 // DEFAULT-LABEL: @test_atan2(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan2_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan2_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_atan2(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_atan2_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_atan2_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_atan2(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan2_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan2_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_atan2(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan2_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan2_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_atan2(
@@ -752,22 +728,22 @@ extern "C" __device__ double test_atan2(double x, double y) {
 
 // DEFAULT-LABEL: @test_atanf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan_f32(float noundef [[X:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan_f32(float noundef [[X:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_atanf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_atan_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_atan_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_atanf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan_f32(float noundef [[X:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan_f32(float noundef [[X:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_atanf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan_f32(float noundef [[X:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atan_f32(float noundef [[X:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_atanf(
@@ -781,22 +757,22 @@ extern "C" __device__ float test_atanf(float x) {
 
 // DEFAULT-LABEL: @test_atan(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan_f64(double noundef [[X:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan_f64(double noundef [[X:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_atan(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_atan_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_atan_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_atan(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan_f64(double noundef [[X:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan_f64(double noundef [[X:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_atan(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan_f64(double noundef [[X:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atan_f64(double noundef [[X:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_atan(
@@ -810,22 +786,22 @@ extern "C" __device__ double test_atan(double x) {
 
 // DEFAULT-LABEL: @test_atanhf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atanh_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atanh_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_atanhf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_atanh_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_atanh_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_atanhf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atanh_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atanh_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_atanhf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atanh_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_atanh_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_atanhf(
@@ -839,22 +815,22 @@ extern "C" __device__ float test_atanhf(float x) {
 
 // DEFAULT-LABEL: @test_atanh(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atanh_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atanh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_atanh(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_atanh_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_atanh_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_atanh(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atanh_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atanh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_atanh(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atanh_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_atanh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_atanh(
@@ -868,22 +844,22 @@ extern "C" __device__ double test_atanh(double x) {
 
 // DEFAULT-LABEL: @test_cbrtf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cbrt_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cbrt_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_cbrtf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cbrt_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cbrt_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_cbrtf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cbrt_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cbrt_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_cbrtf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cbrt_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cbrt_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_cbrtf(
@@ -897,22 +873,22 @@ extern "C" __device__ float test_cbrtf(float x) {
 
 // DEFAULT-LABEL: @test_cbrt(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cbrt_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cbrt_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_cbrt(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cbrt_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cbrt_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_cbrt(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cbrt_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cbrt_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_cbrt(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cbrt_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cbrt_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_cbrt(
@@ -1042,22 +1018,22 @@ extern "C" __device__ double test_copysign(double x, double y) {
 
 // DEFAULT-LABEL: @test_cosf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cos_f32(float noundef [[X:%.*]]) #[[ATTR14:[0-9]+]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cos_f32(float noundef [[X:%.*]]) #[[ATTR16:[0-9]+]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_cosf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cos_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14:[0-9]+]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cos_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16:[0-9]+]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_cosf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I1:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X:%.*]]) #[[ATTR14:[0-9]+]]
+// APPROX-NEXT:    [[CALL_I1:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X:%.*]]) #[[ATTR16:[0-9]+]]
 // APPROX-NEXT:    ret float [[CALL_I1]]
 //
 // NCRDIV-LABEL: @test_cosf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cos_f32(float noundef [[X:%.*]]) #[[ATTR14:[0-9]+]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cos_f32(float noundef [[X:%.*]]) #[[ATTR16:[0-9]+]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_cosf(
@@ -1071,22 +1047,22 @@ extern "C" __device__ float test_cosf(float x) {
 
 // DEFAULT-LABEL: @test_cos(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cos_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cos_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_cos(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cos_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cos_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_cos(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cos_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cos_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_cos(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cos_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cos_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_cos(
@@ -1100,22 +1076,22 @@ extern "C" __device__ double test_cos(double x) {
 
 // DEFAULT-LABEL: @test_coshf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cosh_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cosh_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_coshf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cosh_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cosh_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_coshf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cosh_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cosh_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_coshf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cosh_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cosh_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_coshf(
@@ -1129,22 +1105,22 @@ extern "C" __device__ float test_coshf(float x) {
 
 // DEFAULT-LABEL: @test_cosh(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cosh_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cosh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_cosh(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cosh_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cosh_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_cosh(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cosh_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cosh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_cosh(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cosh_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cosh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_cosh(
@@ -1158,22 +1134,22 @@ extern "C" __device__ double test_cosh(double x) {
 
 // DEFAULT-LABEL: @test_cospif(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cospi_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cospi_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_cospif(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cospi_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_cospi_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_cospif(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cospi_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cospi_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_cospif(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cospi_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_cospi_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_cospif(
@@ -1187,22 +1163,22 @@ extern "C" __device__ float test_cospif(float x) {
 
 // DEFAULT-LABEL: @test_cospi(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cospi_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cospi_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_cospi(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cospi_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_cospi_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_cospi(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cospi_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cospi_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_cospi(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cospi_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_cospi_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_cospi(
@@ -1216,22 +1192,22 @@ extern "C" __device__ double test_cospi(double x) {
 
 // DEFAULT-LABEL: @test_cyl_bessel_i0f(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i0_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i0_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_cyl_bessel_i0f(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_i0_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_i0_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_cyl_bessel_i0f(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i0_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i0_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_cyl_bessel_i0f(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i0_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i0_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_cyl_bessel_i0f(
@@ -1245,22 +1221,22 @@ extern "C" __device__ float test_cyl_bessel_i0f(float x) {
 
 // DEFAULT-LABEL: @test_cyl_bessel_i0(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i0_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i0_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_cyl_bessel_i0(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_i0_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_i0_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_cyl_bessel_i0(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i0_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i0_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_cyl_bessel_i0(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i0_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i0_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_cyl_bessel_i0(
@@ -1274,22 +1250,22 @@ extern "C" __device__ double test_cyl_bessel_i0(double x) {
 
 // DEFAULT-LABEL: @test_cyl_bessel_i1f(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i1_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i1_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_cyl_bessel_i1f(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_i1_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_i1_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_cyl_bessel_i1f(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i1_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i1_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_cyl_bessel_i1f(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i1_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_i1_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_cyl_bessel_i1f(
@@ -1303,22 +1279,22 @@ extern "C" __device__ float test_cyl_bessel_i1f(float x) {
 
 // DEFAULT-LABEL: @test_cyl_bessel_i1(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i1_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i1_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_cyl_bessel_i1(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_i1_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_i1_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_cyl_bessel_i1(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i1_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i1_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_cyl_bessel_i1(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i1_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_i1_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_cyl_bessel_i1(
@@ -1332,22 +1308,22 @@ extern "C" __device__ double test_cyl_bessel_i1(double x) {
 
 // DEFAULT-LABEL: @test_erfcf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfc_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfc_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_erfcf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_erfc_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_erfc_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_erfcf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfc_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfc_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_erfcf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfc_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfc_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_erfcf(
@@ -1361,22 +1337,22 @@ extern "C" __device__ float test_erfcf(float x) {
 
 // DEFAULT-LABEL: @test_erfc(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfc_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfc_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_erfc(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_erfc_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_erfc_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_erfc(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfc_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfc_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_erfc(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfc_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfc_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_erfc(
@@ -1390,22 +1366,22 @@ extern "C" __device__ double test_erfc(double x) {
 
 // DEFAULT-LABEL: @test_erfinvf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfinv_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfinv_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_erfinvf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_erfinv_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_erfinv_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_erfinvf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfinv_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfinv_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_erfinvf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfinv_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_erfinv_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_erfinvf(
@@ -1419,22 +1395,22 @@ extern "C" __device__ float test_erfinvf(float x) {
 
 // DEFAULT-LABEL: @test_erfinv(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfinv_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfinv_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_erfinv(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_erfinv_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_erfinv_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_erfinv(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfinv_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfinv_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_erfinv(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfinv_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_erfinv_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_erfinv(
@@ -1477,22 +1453,22 @@ extern "C" __device__ float test_exp10f(float x) {
 
 // DEFAULT-LABEL: @test_exp10(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp10_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp10_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_exp10(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_exp10_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_exp10_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_exp10(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp10_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp10_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_exp10(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp10_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp10_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_exp10(
@@ -1535,22 +1511,22 @@ extern "C" __device__ float test_exp2f(float x) {
 
 // DEFAULT-LABEL: @test_exp2(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp2_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp2_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_exp2(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_exp2_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_exp2_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_exp2(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp2_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp2_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_exp2(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp2_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp2_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_exp2(
@@ -1593,22 +1569,22 @@ extern "C" __device__ float test_expf(float x) {
 
 // DEFAULT-LABEL: @test_exp(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_exp(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_exp_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_exp_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_exp(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_exp(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_exp_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_exp(
@@ -1622,22 +1598,22 @@ extern "C" __device__ double test_exp(double x) {
 
 // DEFAULT-LABEL: @test_expm1f(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_expm1_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_expm1_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_expm1f(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_expm1_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_expm1_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_expm1f(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_expm1_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_expm1_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_expm1f(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_expm1_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_expm1_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_expm1f(
@@ -1651,22 +1627,22 @@ extern "C" __device__ float test_expm1f(float x) {
 
 // DEFAULT-LABEL: @test_expm1(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_expm1_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_expm1_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_expm1(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_expm1_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_expm1_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_expm1(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_expm1_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_expm1_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_expm1(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_expm1_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_expm1_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_expm1(
@@ -1738,22 +1714,22 @@ extern "C" __device__ double test_fabs(double x) {
 
 // DEFAULT-LABEL: @test_fdimf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fdim_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fdim_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_fdimf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_fdim_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_fdim_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_fdimf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fdim_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fdim_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_fdimf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fdim_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fdim_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_fdimf(
@@ -1767,22 +1743,22 @@ extern "C" __device__ float test_fdimf(float x, float y) {
 
 // DEFAULT-LABEL: @test_fdim(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fdim_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fdim_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_fdim(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_fdim_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_fdim_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_fdim(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fdim_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fdim_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_fdim(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fdim_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fdim_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_fdim(
@@ -2086,22 +2062,22 @@ extern "C" __device__ double test_fmin(double x, double y) {
 
 // DEFAULT-LABEL: @test_fmodf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fmod_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fmod_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_fmodf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_fmod_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_fmod_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_fmodf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fmod_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fmod_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_fmodf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fmod_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_fmod_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_fmodf(
@@ -2115,22 +2091,22 @@ extern "C" __device__ float test_fmodf(float x, float y) {
 
 // DEFAULT-LABEL: @test_fmod(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fmod_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fmod_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_fmod(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_fmod_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_fmod_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_fmod(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fmod_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fmod_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_fmod(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fmod_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_fmod_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_fmod(
@@ -2232,22 +2208,22 @@ extern "C" __device__ double test_frexp(double x, int* y) {
 
 // DEFAULT-LABEL: @test_hypotf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_hypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_hypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_hypotf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_hypot_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_hypot_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_hypotf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_hypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_hypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_hypotf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_hypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_hypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_hypotf(
@@ -2261,22 +2237,22 @@ extern "C" __device__ float test_hypotf(float x, float y) {
 
 // DEFAULT-LABEL: @test_hypot(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_hypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_hypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_hypot(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_hypot_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_hypot_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_hypot(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_hypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_hypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_hypot(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_hypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_hypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_hypot(
@@ -2290,22 +2266,22 @@ extern "C" __device__ double test_hypot(double x, double y) {
 
 // DEFAULT-LABEL: @test_ilogbf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef [[X:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef [[X:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret i32 [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_ilogbf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret i32 [[CALL_I]]
 //
 // APPROX-LABEL: @test_ilogbf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef [[X:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef [[X:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret i32 [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_ilogbf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef [[X:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f32(float noundef [[X:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret i32 [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_ilogbf(
@@ -2319,22 +2295,22 @@ extern "C" __device__ int test_ilogbf(float x) {
 
 // DEFAULT-LABEL: @test_ilogb(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef [[X:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef [[X:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret i32 [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_ilogb(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret i32 [[CALL_I]]
 //
 // APPROX-LABEL: @test_ilogb(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef [[X:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef [[X:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret i32 [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_ilogb(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef [[X:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call noundef i32 @__ocml_ilogb_f64(double noundef [[X:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret i32 [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_ilogb(
@@ -2556,22 +2532,22 @@ extern "C" __device__ BOOL_TYPE test___isnan(double x) {
 
 // DEFAULT-LABEL: @test_j0f(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_j0f(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j0_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j0_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_j0f(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_j0f(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_j0f(
@@ -2585,22 +2561,22 @@ extern "C" __device__ float test_j0f(float x) {
 
 // DEFAULT-LABEL: @test_j0(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_j0(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j0_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j0_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_j0(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_j0(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_j0(
@@ -2614,22 +2590,22 @@ extern "C" __device__ double test_j0(double x) {
 
 // DEFAULT-LABEL: @test_j1f(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_j1f(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j1_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j1_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_j1f(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_j1f(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_j1f(
@@ -2643,22 +2619,22 @@ extern "C" __device__ float test_j1f(float x) {
 
 // DEFAULT-LABEL: @test_j1(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_j1(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j1_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j1_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_j1(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_j1(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_j1(
@@ -2677,14 +2653,14 @@ extern "C" __device__ double test_j1(double x) {
 // DEFAULT-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
 // DEFAULT-NEXT:    ]
 // DEFAULT:       if.then.i:
-// DEFAULT-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    br label [[_ZL3JNFIF_EXIT:%.*]]
 // DEFAULT:       if.then2.i:
-// DEFAULT-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR16]]
 // DEFAULT-NEXT:    br label [[_ZL3JNFIF_EXIT]]
 // DEFAULT:       if.end4.i:
-// DEFAULT-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR14]]
-// DEFAULT-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR16]]
+// DEFAULT-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR16]]
 // DEFAULT-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
 // DEFAULT-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3JNFIF_EXIT]]
 // DEFAULT:       for.body.i:
@@ -2710,14 +2686,14 @@ extern "C" __device__ double test_j1(double x) {
 // FINITEONLY-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
 // FINITEONLY-NEXT:    ]
 // FINITEONLY:       if.then.i:
-// FINITEONLY-NEXT:    [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j0_f32(float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j0_f32(float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    br label [[_ZL3JNFIF_EXIT:%.*]]
 // FINITEONLY:       if.then2.i:
-// FINITEONLY-NEXT:    [[CALL_I22_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j1_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I22_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j1_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    br label [[_ZL3JNFIF_EXIT]]
 // FINITEONLY:       if.end4.i:
-// FINITEONLY-NEXT:    [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j0_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
-// FINITEONLY-NEXT:    [[CALL_I21_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j1_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j0_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
+// FINITEONLY-NEXT:    [[CALL_I21_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_j1_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
 // FINITEONLY-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3JNFIF_EXIT]]
 // FINITEONLY:       for.body.i:
@@ -2743,14 +2719,14 @@ extern "C" __device__ double test_j1(double x) {
 // APPROX-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
 // APPROX-NEXT:    ]
 // APPROX:       if.then.i:
-// APPROX-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    br label [[_ZL3JNFIF_EXIT:%.*]]
 // APPROX:       if.then2.i:
-// APPROX-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR16]]
 // APPROX-NEXT:    br label [[_ZL3JNFIF_EXIT]]
 // APPROX:       if.end4.i:
-// APPROX-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR14]]
-// APPROX-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR16]]
+// APPROX-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR16]]
 // APPROX-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
 // APPROX-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3JNFIF_EXIT]]
 // APPROX:       for.body.i:
@@ -2776,14 +2752,14 @@ extern "C" __device__ double test_j1(double x) {
 // NCRDIV-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
 // NCRDIV-NEXT:    ]
 // NCRDIV:       if.then.i:
-// NCRDIV-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    br label [[_ZL3JNFIF_EXIT:%.*]]
 // NCRDIV:       if.then2.i:
-// NCRDIV-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR16]]
 // NCRDIV-NEXT:    br label [[_ZL3JNFIF_EXIT]]
 // NCRDIV:       if.end4.i:
-// NCRDIV-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR14]]
-// NCRDIV-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_j0_f32(float noundef [[Y]]) #[[ATTR16]]
+// NCRDIV-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_j1_f32(float noundef [[Y]]) #[[ATTR16]]
 // NCRDIV-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
 // NCRDIV-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3JNFIF_EXIT]]
 // NCRDIV:       for.body.i:
@@ -2846,14 +2822,14 @@ extern "C" __device__ float test_jnf(int x, float y) {
 // DEFAULT-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
 // DEFAULT-NEXT:    ]
 // DEFAULT:       if.then.i:
-// DEFAULT-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    br label [[_ZL2JNID_EXIT:%.*]]
 // DEFAULT:       if.then2.i:
-// DEFAULT-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR16]]
 // DEFAULT-NEXT:    br label [[_ZL2JNID_EXIT]]
 // DEFAULT:       if.end4.i:
-// DEFAULT-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR14]]
-// DEFAULT-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR16]]
+// DEFAULT-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR16]]
 // DEFAULT-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
 // DEFAULT-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2JNID_EXIT]]
 // DEFAULT:       for.body.i:
@@ -2879,14 +2855,14 @@ extern "C" __device__ float test_jnf(int x, float y) {
 // FINITEONLY-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
 // FINITEONLY-NEXT:    ]
 // FINITEONLY:       if.then.i:
-// FINITEONLY-NEXT:    [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j0_f64(double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j0_f64(double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    br label [[_ZL2JNID_EXIT:%.*]]
 // FINITEONLY:       if.then2.i:
-// FINITEONLY-NEXT:    [[CALL_I22_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j1_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I22_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j1_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    br label [[_ZL2JNID_EXIT]]
 // FINITEONLY:       if.end4.i:
-// FINITEONLY-NEXT:    [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j0_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
-// FINITEONLY-NEXT:    [[CALL_I21_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j1_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j0_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
+// FINITEONLY-NEXT:    [[CALL_I21_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_j1_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
 // FINITEONLY-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2JNID_EXIT]]
 // FINITEONLY:       for.body.i:
@@ -2912,14 +2888,14 @@ extern "C" __device__ float test_jnf(int x, float y) {
 // APPROX-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
 // APPROX-NEXT:    ]
 // APPROX:       if.then.i:
-// APPROX-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    br label [[_ZL2JNID_EXIT:%.*]]
 // APPROX:       if.then2.i:
-// APPROX-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR16]]
 // APPROX-NEXT:    br label [[_ZL2JNID_EXIT]]
 // APPROX:       if.end4.i:
-// APPROX-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR14]]
-// APPROX-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR16]]
+// APPROX-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR16]]
 // APPROX-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
 // APPROX-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2JNID_EXIT]]
 // APPROX:       for.body.i:
@@ -2945,14 +2921,14 @@ extern "C" __device__ float test_jnf(int x, float y) {
 // NCRDIV-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
 // NCRDIV-NEXT:    ]
 // NCRDIV:       if.then.i:
-// NCRDIV-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    br label [[_ZL2JNID_EXIT:%.*]]
 // NCRDIV:       if.then2.i:
-// NCRDIV-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR16]]
 // NCRDIV-NEXT:    br label [[_ZL2JNID_EXIT]]
 // NCRDIV:       if.end4.i:
-// NCRDIV-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR14]]
-// NCRDIV-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_j0_f64(double noundef [[Y]]) #[[ATTR16]]
+// NCRDIV-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_j1_f64(double noundef [[Y]]) #[[ATTR16]]
 // NCRDIV-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
 // NCRDIV-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2JNID_EXIT]]
 // NCRDIV:       for.body.i:
@@ -3068,22 +3044,22 @@ extern "C" __device__ double test_ldexp(double x, int y) {
 
 // DEFAULT-LABEL: @test_lgammaf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_lgamma_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_lgamma_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_lgammaf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_lgamma_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_lgamma_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_lgammaf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_lgamma_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_lgamma_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_lgammaf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_lgamma_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_lgamma_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_lgammaf(
@@ -3097,22 +3073,22 @@ extern "C" __device__ float test_lgammaf(float x) {
 
 // DEFAULT-LABEL: @test_lgamma(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_lgamma_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_lgamma_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_lgamma(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_lgamma_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_lgamma_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_lgamma(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_lgamma_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_lgamma_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_lgamma(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_lgamma_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_lgamma_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_lgamma(
@@ -3291,22 +3267,22 @@ extern "C" __device__ float test_log10f(float x) {
 
 // DEFAULT-LABEL: @test_log10(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log10_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log10_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_log10(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_log10_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_log10_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_log10(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log10_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log10_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_log10(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log10_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log10_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_log10(
@@ -3320,22 +3296,22 @@ extern "C" __device__ double test_log10(double x) {
 
 // DEFAULT-LABEL: @test_log1pf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_log1p_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_log1p_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_log1pf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_log1p_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_log1p_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_log1pf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_log1p_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_log1p_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_log1pf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_log1p_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_log1p_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_log1pf(
@@ -3349,22 +3325,22 @@ extern "C" __device__ float test_log1pf(float x) {
 
 // DEFAULT-LABEL: @test_log1p(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log1p_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log1p_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_log1p(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_log1p_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_log1p_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_log1p(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log1p_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log1p_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_log1p(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log1p_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log1p_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_log1p(
@@ -3407,22 +3383,22 @@ extern "C" __device__ float test_log2f(float x) {
 
 // DEFAULT-LABEL: @test_log2(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log2_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log2_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_log2(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_log2_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_log2_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_log2(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log2_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log2_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_log2(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log2_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_log2_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_log2(
@@ -3436,22 +3412,22 @@ extern "C" __device__ double test_log2(double x) {
 
 // DEFAULT-LABEL: @test_logbf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_logb_f32(float noundef [[X:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_logb_f32(float noundef [[X:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_logbf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_logb_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_logb_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_logbf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_logb_f32(float noundef [[X:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_logb_f32(float noundef [[X:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_logbf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_logb_f32(float noundef [[X:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_logb_f32(float noundef [[X:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_logbf(
@@ -3465,22 +3441,22 @@ extern "C" __device__ float test_logbf(float x) {
 
 // DEFAULT-LABEL: @test_logb(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_logb_f64(double noundef [[X:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_logb_f64(double noundef [[X:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_logb(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_logb_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_logb_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_logb(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_logb_f64(double noundef [[X:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_logb_f64(double noundef [[X:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_logb(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_logb_f64(double noundef [[X:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_logb_f64(double noundef [[X:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_logb(
@@ -3660,41 +3636,41 @@ extern "C" __device__ long int test_lround(double x) {
 // DEFAULT-LABEL: @test_modff(
 // DEFAULT-NEXT:  entry:
 // DEFAULT-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5)
-// DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15:[0-9]+]]
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract noundef float @__ocml_modf_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17:[0-9]+]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract noundef float @__ocml_modf_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // DEFAULT-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA16:![0-9]+]]
 // DEFAULT-NEXT:    store float [[TMP0]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]]
-// DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_modff(
 // FINITEONLY-NEXT:  entry:
 // FINITEONLY-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5)
-// FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15:[0-9]+]]
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_modf_f32(float noundef nofpclass(nan inf) [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17:[0-9]+]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_modf_f32(float noundef nofpclass(nan inf) [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA16:![0-9]+]]
 // FINITEONLY-NEXT:    store float [[TMP0]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]]
-// FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_modff(
 // APPROX-NEXT:  entry:
 // APPROX-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5)
-// APPROX-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15:[0-9]+]]
-// APPROX-NEXT:    [[CALL_I:%.*]] = call contract noundef float @__ocml_modf_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// APPROX-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17:[0-9]+]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = call contract noundef float @__ocml_modf_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // APPROX-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA16:![0-9]+]]
 // APPROX-NEXT:    store float [[TMP0]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]]
-// APPROX-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// APPROX-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_modff(
 // NCRDIV-NEXT:  entry:
 // NCRDIV-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5)
-// NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15:[0-9]+]]
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract noundef float @__ocml_modf_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17:[0-9]+]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract noundef float @__ocml_modf_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // NCRDIV-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA17:![0-9]+]]
 // NCRDIV-NEXT:    store float [[TMP0]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA17]]
-// NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_modff(
@@ -3715,41 +3691,41 @@ extern "C" __device__ float test_modff(float x, float* y) {
 // DEFAULT-LABEL: @test_modf(
 // DEFAULT-NEXT:  entry:
 // DEFAULT-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5)
-// DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract noundef double @__ocml_modf_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract noundef double @__ocml_modf_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // DEFAULT-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA18:![0-9]+]]
 // DEFAULT-NEXT:    store double [[TMP0]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA18]]
-// DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_modf(
 // FINITEONLY-NEXT:  entry:
 // FINITEONLY-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5)
-// FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_modf_f64(double noundef nofpclass(nan inf) [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_modf_f64(double noundef nofpclass(nan inf) [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA18:![0-9]+]]
 // FINITEONLY-NEXT:    store double [[TMP0]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA18]]
-// FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_modf(
 // APPROX-NEXT:  entry:
 // APPROX-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5)
-// APPROX-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// APPROX-NEXT:    [[CALL_I:%.*]] = call contract noundef double @__ocml_modf_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// APPROX-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = call contract noundef double @__ocml_modf_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // APPROX-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA18:![0-9]+]]
 // APPROX-NEXT:    store double [[TMP0]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA18]]
-// APPROX-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// APPROX-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_modf(
 // NCRDIV-NEXT:  entry:
 // NCRDIV-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5)
-// NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract noundef double @__ocml_modf_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract noundef double @__ocml_modf_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // NCRDIV-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA19:![0-9]+]]
 // NCRDIV-NEXT:    store double [[TMP0]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA19]]
-// NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_modf(
@@ -3771,96 +3747,90 @@ extern "C" __device__ double test_modf(double x, double* y) {
 // DEFAULT-NEXT:  entry:
 // DEFAULT-NEXT:    [[TMP0:%.*]] = load i8, ptr [[TAG:%.*]], align 1, !tbaa [[TBAA4]]
 // DEFAULT-NEXT:    [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48
-// DEFAULT-NEXT:    br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I:%.*]]
+// DEFAULT-NEXT:    br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I_PREHEADER:%.*]]
+// DEFAULT:       while.cond.i14.i.i.preheader:
+// DEFAULT-NEXT:    [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    [[CMP_NOT_I17_I_I5:%.*]] = icmp eq i8 [[TMP1]], 0
+// DEFAULT-NEXT:    br i1 [[CMP_NOT_I17_I_I5]], label [[_ZL4NANFPKC_EXIT:%.*]], label [[WHILE_BODY_I18_I_I:%.*]]
 // DEFAULT:       if.then.i.i:
 // DEFAULT-NEXT:    [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TAG]], i64 1
-// DEFAULT-NEXT:    [[TMP1:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
-// DEFAULT-NEXT:    switch i8 [[TMP1]], label [[WHILE_COND_I_I_I:%.*]] [
-// DEFAULT-NEXT:      i8 120, label [[WHILE_COND_I30_I_I_PREHEADER:%.*]]
-// DEFAULT-NEXT:      i8 88, label [[WHILE_COND_I30_I_I_PREHEADER]]
+// DEFAULT-NEXT:    [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    switch i8 [[TMP2]], label [[WHILE_COND_I_I_I_PREHEADER:%.*]] [
+// DEFAULT-NEXT:      i8 120, label [[IF_THEN5_I_I:%.*]]
+// DEFAULT-NEXT:      i8 88, label [[IF_THEN5_I_I]]
 // DEFAULT-NEXT:    ]
-// DEFAULT:       while.cond.i30.i.i.preheader:
-// DEFAULT-NEXT:    br label [[WHILE_COND_I30_I_I:%.*]]
-// DEFAULT:       while.cond.i30.i.i:
-// DEFAULT-NEXT:    [[__TAGP_ADDR_0_I31_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I37_I_I:%.*]], [[CLEANUP_I36_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I30_I_I_PREHEADER]] ]
-// DEFAULT-NEXT:    [[__R_0_I32_I_I:%.*]] = phi i64 [ [[__R_2_I_I_I:%.*]], [[CLEANUP_I36_I_I]] ], [ 0, [[WHILE_COND_I30_I_I_PREHEADER]] ]
-// DEFAULT-NEXT:    [[TMP2:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I31_I_I]], align 1, !tbaa [[TBAA4]]
-// DEFAULT-NEXT:    [[CMP_NOT_I33_I_I:%.*]] = icmp eq i8 [[TMP2]], 0
-// DEFAULT-NEXT:    br i1 [[CMP_NOT_I33_I_I]], label [[_ZL4NANFPKC_EXIT:%.*]], label [[WHILE_BODY_I34_I_I:%.*]]
-// DEFAULT:       while.body.i34.i.i:
-// DEFAULT-NEXT:    [[TMP3:%.*]] = add i8 [[TMP2]], -48
-// DEFAULT-NEXT:    [[OR_COND_I35_I_I:%.*]] = icmp ult i8 [[TMP3]], 10
-// DEFAULT-NEXT:    br i1 [[OR_COND_I35_I_I]], label [[IF_END31_I_I_I:%.*]], label [[IF_ELSE_I_I_I:%.*]]
+// DEFAULT:       while.cond.i.i.i.preheader:
+// DEFAULT-NEXT:    [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    [[CMP_NOT_I_I_I14:%.*]] = icmp eq i8 [[TMP3]], 0
+// DEFAULT-NEXT:    br i1 [[CMP_NOT_I_I_I14]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]]
+// DEFAULT:       if.then5.i.i:
+// DEFAULT-NEXT:    [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    [[CMP_NOT_I30_I_I9:%.*]] = icmp eq i8 [[TMP4]], 0
+// DEFAULT-NEXT:    br i1 [[CMP_NOT_I30_I_I9]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I31_I_I:%.*]]
+// DEFAULT:       while.body.i31.i.i:
+// DEFAULT-NEXT:    [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], [[IF_END31_I_I_I:%.*]] ], [ [[TMP4]], [[IF_THEN5_I_I]] ]
+// DEFAULT-NEXT:    [[__R_0_I29_I_I11:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], [[IF_END31_I_I_I]] ], [ 0, [[IF_THEN5_I_I]] ]
+// DEFAULT-NEXT:    [[__TAGP_ADDR_0_I28_I_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I_I:%.*]], [[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN5_I_I]] ]
+// DEFAULT-NEXT:    [[TMP6:%.*]] = add i8 [[TMP5]], -48
+// DEFAULT-NEXT:    [[OR_COND_I32_I_I:%.*]] = icmp ult i8 [[TMP6]], 10
+// DEFAULT-NEXT:    br i1 [[OR_COND_I32_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE_I_I_I:%.*]]
 // DEFAULT:       if.else.i.i.i:
-// DEFAULT-NEXT:    [[TMP4:%.*]] = add i8 [[TMP2]], -97
-// DEFAULT-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP4]], 6
+// DEFAULT-NEXT:    [[TMP7:%.*]] = add i8 [[TMP5]], -97
+// DEFAULT-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP7]], 6
 // DEFAULT-NEXT:    br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]]
 // DEFAULT:       if.else17.i.i.i:
-// DEFAULT-NEXT:    [[TMP5:%.*]] = add i8 [[TMP2]], -65
-// DEFAULT-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP5]], 6
-// DEFAULT-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[CLEANUP_I36_I_I]]
+// DEFAULT-NEXT:    [[TMP8:%.*]] = add i8 [[TMP5]], -65
+// DEFAULT-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP8]], 6
+// DEFAULT-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[_ZL4NANFPKC_EXIT]]
 // DEFAULT:       if.end31.i.i.i:
-// DEFAULT-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I34_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ]
-// DEFAULT-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I32_I_I]], 4
-// DEFAULT-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64
+// DEFAULT-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I31_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ]
+// DEFAULT-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I29_I_I11]], 4
+// DEFAULT-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP5]] to i64
 // DEFAULT-NEXT:    [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]]
-// DEFAULT-NEXT:    [[ADD28_I_I_I:%.*]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
-// DEFAULT-NEXT:    [[INCDEC_PTR_I40_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I31_I_I]], i64 1
-// DEFAULT-NEXT:    br label [[CLEANUP_I36_I_I]]
-// DEFAULT:       cleanup.i36.i.i:
-// DEFAULT-NEXT:    [[__TAGP_ADDR_1_I37_I_I]] = phi ptr [ [[INCDEC_PTR_I40_I_I]], [[IF_END31_I_I_I]] ], [ [[__TAGP_ADDR_0_I31_I_I]], [[IF_ELSE17_I_I_I]] ]
-// DEFAULT-NEXT:    [[__R_2_I_I_I]] = phi i64 [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ [[__R_0_I32_I_I]], [[IF_ELSE17_I_I_I]] ]
-// DEFAULT-NEXT:    [[COND_I_I_I:%.*]] = phi i1 [ true, [[IF_END31_I_I_I]] ], [ false, [[IF_ELSE17_I_I_I]] ]
-// DEFAULT-NEXT:    br i1 [[COND_I_I_I]], label [[WHILE_COND_I30_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP11]]
-// DEFAULT:       while.cond.i.i.i:
-// DEFAULT-NEXT:    [[__TAGP_ADDR_0_I_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I_I_I:%.*]], [[CLEANUP_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN_I_I]] ]
-// DEFAULT-NEXT:    [[__R_0_I_I_I:%.*]] = phi i64 [ [[__R_1_I_I_I:%.*]], [[CLEANUP_I_I_I]] ], [ 0, [[IF_THEN_I_I]] ]
-// DEFAULT-NEXT:    [[TMP6:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[TBAA4]]
-// DEFAULT-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP6]], 0
-// DEFAULT-NEXT:    br i1 [[CMP_NOT_I_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]]
+// DEFAULT-NEXT:    [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
+// DEFAULT-NEXT:    [[INCDEC_PTR_I34_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I28_I_I10]], i64 1
+// DEFAULT-NEXT:    [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    [[CMP_NOT_I30_I_I:%.*]] = icmp eq i8 [[TMP9]], 0
+// DEFAULT-NEXT:    br i1 [[CMP_NOT_I30_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]]
 // DEFAULT:       while.body.i.i.i:
-// DEFAULT-NEXT:    [[TMP7:%.*]] = and i8 [[TMP6]], -8
-// DEFAULT-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP7]], 48
-// DEFAULT-NEXT:    br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I:%.*]], label [[CLEANUP_I_I_I]]
+// DEFAULT-NEXT:    [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], [[IF_THEN_I_I_I:%.*]] ], [ [[TMP3]], [[WHILE_COND_I_I_I_PREHEADER]] ]
+// DEFAULT-NEXT:    [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ]
+// DEFAULT-NEXT:    [[__TAGP_ADDR_0_I_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I_I_I_PREHEADER]] ]
+// DEFAULT-NEXT:    [[TMP11:%.*]] = and i8 [[TMP10]], -8
+// DEFAULT-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP11]], 48
+// DEFAULT-NEXT:    br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I]], label [[_ZL4NANFPKC_EXIT]]
 // DEFAULT:       if.then.i.i.i:
-// DEFAULT-NEXT:    [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I]], 3
-// DEFAULT-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64
+// DEFAULT-NEXT:    [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I16]], 3
+// DEFAULT-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP10]] to i64
 // DEFAULT-NEXT:    [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48
-// DEFAULT-NEXT:    [[SUB_I_I_I:%.*]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
-// DEFAULT-NEXT:    [[INCDEC_PTR_I_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I]], i64 1
-// DEFAULT-NEXT:    br label [[CLEANUP_I_I_I]]
-// DEFAULT:       cleanup.i.i.i:
-// DEFAULT-NEXT:    [[__TAGP_ADDR_1_I_I_I]] = phi ptr [ [[INCDEC_PTR_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__TAGP_ADDR_0_I_I_I]], [[WHILE_BODY_I_I_I]] ]
-// DEFAULT-NEXT:    [[__R_1_I_I_I]] = phi i64 [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_BODY_I_I_I]] ]
-// DEFAULT-NEXT:    br i1 [[OR_COND_I_I_I]], label [[WHILE_COND_I_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP7]]
-// DEFAULT:       while.cond.i14.i.i:
-// DEFAULT-NEXT:    [[__TAGP_ADDR_0_I15_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I21_I_I:%.*]], [[CLEANUP_I20_I_I:%.*]] ], [ [[TAG]], [[ENTRY:%.*]] ]
-// DEFAULT-NEXT:    [[__R_0_I16_I_I:%.*]] = phi i64 [ [[__R_1_I22_I_I:%.*]], [[CLEANUP_I20_I_I]] ], [ 0, [[ENTRY]] ]
-// DEFAULT-NEXT:    [[TMP8:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[TBAA4]]
-// DEFAULT-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP8]], 0
-// DEFAULT-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I18_I_I:%.*]]
+// DEFAULT-NEXT:    [[SUB_I_I_I]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
+// DEFAULT-NEXT:    [[INCDEC_PTR_I_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I15]], i64 1
+// DEFAULT-NEXT:    [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP12]], 0
+// DEFAULT-NEXT:    br i1 [[CMP_NOT_I_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]]
 // DEFAULT:       while.body.i18.i.i:
-// DEFAULT-NEXT:    [[TMP9:%.*]] = add i8 [[TMP8]], -48
-// DEFAULT-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP9]], 10
-// DEFAULT-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I24_I_I:%.*]], label [[CLEANUP_I20_I_I]]
-// DEFAULT:       if.then.i24.i.i:
-// DEFAULT-NEXT:    [[MUL_I25_I_I:%.*]] = mul i64 [[__R_0_I16_I_I]], 10
-// DEFAULT-NEXT:    [[CONV5_I26_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64
-// DEFAULT-NEXT:    [[ADD_I27_I_I:%.*]] = add i64 [[MUL_I25_I_I]], -48
-// DEFAULT-NEXT:    [[SUB_I28_I_I:%.*]] = add i64 [[ADD_I27_I_I]], [[CONV5_I26_I_I]]
-// DEFAULT-NEXT:    [[INCDEC_PTR_I29_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I]], i64 1
-// DEFAULT-NEXT:    br label [[CLEANUP_I20_I_I]]
-// DEFAULT:       cleanup.i20.i.i:
-// DEFAULT-NEXT:    [[__TAGP_ADDR_1_I21_I_I]] = phi ptr [ [[INCDEC_PTR_I29_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__TAGP_ADDR_0_I15_I_I]], [[WHILE_BODY_I18_I_I]] ]
-// DEFAULT-NEXT:    [[__R_1_I22_I_I]] = phi i64 [ [[SUB_I28_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_BODY_I18_I_I]] ]
-// DEFAULT-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[WHILE_COND_I14_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP10]]
+// DEFAULT-NEXT:    [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], [[IF_THEN_I21_I_I:%.*]] ], [ [[TMP1]], [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// DEFAULT-NEXT:    [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// DEFAULT-NEXT:    [[__TAGP_ADDR_0_I15_I_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ [[TAG]], [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// DEFAULT-NEXT:    [[TMP14:%.*]] = add i8 [[TMP13]], -48
+// DEFAULT-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP14]], 10
+// DEFAULT-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I21_I_I]], label [[_ZL4NANFPKC_EXIT]]
+// DEFAULT:       if.then.i21.i.i:
+// DEFAULT-NEXT:    [[MUL_I22_I_I:%.*]] = mul i64 [[__R_0_I16_I_I7]], 10
+// DEFAULT-NEXT:    [[CONV5_I23_I_I:%.*]] = zext nneg i8 [[TMP13]] to i64
+// DEFAULT-NEXT:    [[ADD_I24_I_I:%.*]] = add i64 [[MUL_I22_I_I]], -48
+// DEFAULT-NEXT:    [[SUB_I25_I_I]] = add i64 [[ADD_I24_I_I]], [[CONV5_I23_I_I]]
+// DEFAULT-NEXT:    [[INCDEC_PTR_I26_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I6]], i64 1
+// DEFAULT-NEXT:    [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP15]], 0
+// DEFAULT-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]]
 // DEFAULT:       _ZL4nanfPKc.exit:
-// DEFAULT-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[CLEANUP_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_COND_I_I_I]] ], [ 0, [[CLEANUP_I36_I_I]] ], [ [[__R_0_I32_I_I]], [[WHILE_COND_I30_I_I]] ], [ 0, [[CLEANUP_I20_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_COND_I14_I_I]] ]
+// DEFAULT-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, [[IF_THEN5_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ 0, [[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ]
 // DEFAULT-NEXT:    [[CONV_I:%.*]] = trunc i64 [[RETVAL_0_I_I]] to i32
 // DEFAULT-NEXT:    [[BF_VALUE_I:%.*]] = and i32 [[CONV_I]], 4194303
 // DEFAULT-NEXT:    [[BF_SET9_I:%.*]] = or disjoint i32 [[BF_VALUE_I]], 2143289344
-// DEFAULT-NEXT:    [[TMP10:%.*]] = bitcast i32 [[BF_SET9_I]] to float
-// DEFAULT-NEXT:    ret float [[TMP10]]
+// DEFAULT-NEXT:    [[TMP16:%.*]] = bitcast i32 [[BF_SET9_I]] to float
+// DEFAULT-NEXT:    ret float [[TMP16]]
 //
 // FINITEONLY-LABEL: @test_nanf(
 // FINITEONLY-NEXT:  entry:
@@ -3870,191 +3840,179 @@ extern "C" __device__ double test_modf(double x, double* y) {
 // APPROX-NEXT:  entry:
 // APPROX-NEXT:    [[TMP0:%.*]] = load i8, ptr [[TAG:%.*]], align 1, !tbaa [[TBAA4]]
 // APPROX-NEXT:    [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48
-// APPROX-NEXT:    br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I:%.*]]
+// APPROX-NEXT:    br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I_PREHEADER:%.*]]
+// APPROX:       while.cond.i14.i.i.preheader:
+// APPROX-NEXT:    [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    [[CMP_NOT_I17_I_I5:%.*]] = icmp eq i8 [[TMP1]], 0
+// APPROX-NEXT:    br i1 [[CMP_NOT_I17_I_I5]], label [[_ZL4NANFPKC_EXIT:%.*]], label [[WHILE_BODY_I18_I_I:%.*]]
 // APPROX:       if.then.i.i:
 // APPROX-NEXT:    [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TAG]], i64 1
-// APPROX-NEXT:    [[TMP1:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
-// APPROX-NEXT:    switch i8 [[TMP1]], label [[WHILE_COND_I_I_I:%.*]] [
-// APPROX-NEXT:      i8 120, label [[WHILE_COND_I30_I_I_PREHEADER:%.*]]
-// APPROX-NEXT:      i8 88, label [[WHILE_COND_I30_I_I_PREHEADER]]
+// APPROX-NEXT:    [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    switch i8 [[TMP2]], label [[WHILE_COND_I_I_I_PREHEADER:%.*]] [
+// APPROX-NEXT:      i8 120, label [[IF_THEN5_I_I:%.*]]
+// APPROX-NEXT:      i8 88, label [[IF_THEN5_I_I]]
 // APPROX-NEXT:    ]
-// APPROX:       while.cond.i30.i.i.preheader:
-// APPROX-NEXT:    br label [[WHILE_COND_I30_I_I:%.*]]
-// APPROX:       while.cond.i30.i.i:
-// APPROX-NEXT:    [[__TAGP_ADDR_0_I31_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I37_I_I:%.*]], [[CLEANUP_I36_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I30_I_I_PREHEADER]] ]
-// APPROX-NEXT:    [[__R_0_I32_I_I:%.*]] = phi i64 [ [[__R_2_I_I_I:%.*]], [[CLEANUP_I36_I_I]] ], [ 0, [[WHILE_COND_I30_I_I_PREHEADER]] ]
-// APPROX-NEXT:    [[TMP2:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I31_I_I]], align 1, !tbaa [[TBAA4]]
-// APPROX-NEXT:    [[CMP_NOT_I33_I_I:%.*]] = icmp eq i8 [[TMP2]], 0
-// APPROX-NEXT:    br i1 [[CMP_NOT_I33_I_I]], label [[_ZL4NANFPKC_EXIT:%.*]], label [[WHILE_BODY_I34_I_I:%.*]]
-// APPROX:       while.body.i34.i.i:
-// APPROX-NEXT:    [[TMP3:%.*]] = add i8 [[TMP2]], -48
-// APPROX-NEXT:    [[OR_COND_I35_I_I:%.*]] = icmp ult i8 [[TMP3]], 10
-// APPROX-NEXT:    br i1 [[OR_COND_I35_I_I]], label [[IF_END31_I_I_I:%.*]], label [[IF_ELSE_I_I_I:%.*]]
+// APPROX:       while.cond.i.i.i.preheader:
+// APPROX-NEXT:    [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    [[CMP_NOT_I_I_I14:%.*]] = icmp eq i8 [[TMP3]], 0
+// APPROX-NEXT:    br i1 [[CMP_NOT_I_I_I14]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]]
+// APPROX:       if.then5.i.i:
+// APPROX-NEXT:    [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    [[CMP_NOT_I30_I_I9:%.*]] = icmp eq i8 [[TMP4]], 0
+// APPROX-NEXT:    br i1 [[CMP_NOT_I30_I_I9]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I31_I_I:%.*]]
+// APPROX:       while.body.i31.i.i:
+// APPROX-NEXT:    [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], [[IF_END31_I_I_I:%.*]] ], [ [[TMP4]], [[IF_THEN5_I_I]] ]
+// APPROX-NEXT:    [[__R_0_I29_I_I11:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], [[IF_END31_I_I_I]] ], [ 0, [[IF_THEN5_I_I]] ]
+// APPROX-NEXT:    [[__TAGP_ADDR_0_I28_I_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I_I:%.*]], [[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN5_I_I]] ]
+// APPROX-NEXT:    [[TMP6:%.*]] = add i8 [[TMP5]], -48
+// APPROX-NEXT:    [[OR_COND_I32_I_I:%.*]] = icmp ult i8 [[TMP6]], 10
+// APPROX-NEXT:    br i1 [[OR_COND_I32_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE_I_I_I:%.*]]
 // APPROX:       if.else.i.i.i:
-// APPROX-NEXT:    [[TMP4:%.*]] = add i8 [[TMP2]], -97
-// APPROX-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP4]], 6
+// APPROX-NEXT:    [[TMP7:%.*]] = add i8 [[TMP5]], -97
+// APPROX-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP7]], 6
 // APPROX-NEXT:    br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]]
 // APPROX:       if.else17.i.i.i:
-// APPROX-NEXT:    [[TMP5:%.*]] = add i8 [[TMP2]], -65
-// APPROX-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP5]], 6
-// APPROX-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[CLEANUP_I36_I_I]]
+// APPROX-NEXT:    [[TMP8:%.*]] = add i8 [[TMP5]], -65
+// APPROX-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP8]], 6
+// APPROX-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[_ZL4NANFPKC_EXIT]]
 // APPROX:       if.end31.i.i.i:
-// APPROX-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I34_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ]
-// APPROX-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I32_I_I]], 4
-// APPROX-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64
+// APPROX-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I31_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ]
+// APPROX-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I29_I_I11]], 4
+// APPROX-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP5]] to i64
 // APPROX-NEXT:    [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]]
-// APPROX-NEXT:    [[ADD28_I_I_I:%.*]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
-// APPROX-NEXT:    [[INCDEC_PTR_I40_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I31_I_I]], i64 1
-// APPROX-NEXT:    br label [[CLEANUP_I36_I_I]]
-// APPROX:       cleanup.i36.i.i:
-// APPROX-NEXT:    [[__TAGP_ADDR_1_I37_I_I]] = phi ptr [ [[INCDEC_PTR_I40_I_I]], [[IF_END31_I_I_I]] ], [ [[__TAGP_ADDR_0_I31_I_I]], [[IF_ELSE17_I_I_I]] ]
-// APPROX-NEXT:    [[__R_2_I_I_I]] = phi i64 [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ [[__R_0_I32_I_I]], [[IF_ELSE17_I_I_I]] ]
-// APPROX-NEXT:    [[COND_I_I_I:%.*]] = phi i1 [ true, [[IF_END31_I_I_I]] ], [ false, [[IF_ELSE17_I_I_I]] ]
-// APPROX-NEXT:    br i1 [[COND_I_I_I]], label [[WHILE_COND_I30_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP11]]
-// APPROX:       while.cond.i.i.i:
-// APPROX-NEXT:    [[__TAGP_ADDR_0_I_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I_I_I:%.*]], [[CLEANUP_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN_I_I]] ]
-// APPROX-NEXT:    [[__R_0_I_I_I:%.*]] = phi i64 [ [[__R_1_I_I_I:%.*]], [[CLEANUP_I_I_I]] ], [ 0, [[IF_THEN_I_I]] ]
-// APPROX-NEXT:    [[TMP6:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[TBAA4]]
-// APPROX-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP6]], 0
-// APPROX-NEXT:    br i1 [[CMP_NOT_I_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]]
+// APPROX-NEXT:    [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
+// APPROX-NEXT:    [[INCDEC_PTR_I34_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I28_I_I10]], i64 1
+// APPROX-NEXT:    [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    [[CMP_NOT_I30_I_I:%.*]] = icmp eq i8 [[TMP9]], 0
+// APPROX-NEXT:    br i1 [[CMP_NOT_I30_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]]
 // APPROX:       while.body.i.i.i:
-// APPROX-NEXT:    [[TMP7:%.*]] = and i8 [[TMP6]], -8
-// APPROX-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP7]], 48
-// APPROX-NEXT:    br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I:%.*]], label [[CLEANUP_I_I_I]]
+// APPROX-NEXT:    [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], [[IF_THEN_I_I_I:%.*]] ], [ [[TMP3]], [[WHILE_COND_I_I_I_PREHEADER]] ]
+// APPROX-NEXT:    [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ]
+// APPROX-NEXT:    [[__TAGP_ADDR_0_I_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I_I_I_PREHEADER]] ]
+// APPROX-NEXT:    [[TMP11:%.*]] = and i8 [[TMP10]], -8
+// APPROX-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP11]], 48
+// APPROX-NEXT:    br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I]], label [[_ZL4NANFPKC_EXIT]]
 // APPROX:       if.then.i.i.i:
-// APPROX-NEXT:    [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I]], 3
-// APPROX-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64
+// APPROX-NEXT:    [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I16]], 3
+// APPROX-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP10]] to i64
 // APPROX-NEXT:    [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48
-// APPROX-NEXT:    [[SUB_I_I_I:%.*]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
-// APPROX-NEXT:    [[INCDEC_PTR_I_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I]], i64 1
-// APPROX-NEXT:    br label [[CLEANUP_I_I_I]]
-// APPROX:       cleanup.i.i.i:
-// APPROX-NEXT:    [[__TAGP_ADDR_1_I_I_I]] = phi ptr [ [[INCDEC_PTR_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__TAGP_ADDR_0_I_I_I]], [[WHILE_BODY_I_I_I]] ]
-// APPROX-NEXT:    [[__R_1_I_I_I]] = phi i64 [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_BODY_I_I_I]] ]
-// APPROX-NEXT:    br i1 [[OR_COND_I_I_I]], label [[WHILE_COND_I_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP7]]
-// APPROX:       while.cond.i14.i.i:
-// APPROX-NEXT:    [[__TAGP_ADDR_0_I15_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I21_I_I:%.*]], [[CLEANUP_I20_I_I:%.*]] ], [ [[TAG]], [[ENTRY:%.*]] ]
-// APPROX-NEXT:    [[__R_0_I16_I_I:%.*]] = phi i64 [ [[__R_1_I22_I_I:%.*]], [[CLEANUP_I20_I_I]] ], [ 0, [[ENTRY]] ]
-// APPROX-NEXT:    [[TMP8:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[TBAA4]]
-// APPROX-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP8]], 0
-// APPROX-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I18_I_I:%.*]]
+// APPROX-NEXT:    [[SUB_I_I_I]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
+// APPROX-NEXT:    [[INCDEC_PTR_I_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I15]], i64 1
+// APPROX-NEXT:    [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP12]], 0
+// APPROX-NEXT:    br i1 [[CMP_NOT_I_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]]
 // APPROX:       while.body.i18.i.i:
-// APPROX-NEXT:    [[TMP9:%.*]] = add i8 [[TMP8]], -48
-// APPROX-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP9]], 10
-// APPROX-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I24_I_I:%.*]], label [[CLEANUP_I20_I_I]]
-// APPROX:       if.then.i24.i.i:
-// APPROX-NEXT:    [[MUL_I25_I_I:%.*]] = mul i64 [[__R_0_I16_I_I]], 10
-// APPROX-NEXT:    [[CONV5_I26_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64
-// APPROX-NEXT:    [[ADD_I27_I_I:%.*]] = add i64 [[MUL_I25_I_I]], -48
-// APPROX-NEXT:    [[SUB_I28_I_I:%.*]] = add i64 [[ADD_I27_I_I]], [[CONV5_I26_I_I]]
-// APPROX-NEXT:    [[INCDEC_PTR_I29_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I]], i64 1
-// APPROX-NEXT:    br label [[CLEANUP_I20_I_I]]
-// APPROX:       cleanup.i20.i.i:
-// APPROX-NEXT:    [[__TAGP_ADDR_1_I21_I_I]] = phi ptr [ [[INCDEC_PTR_I29_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__TAGP_ADDR_0_I15_I_I]], [[WHILE_BODY_I18_I_I]] ]
-// APPROX-NEXT:    [[__R_1_I22_I_I]] = phi i64 [ [[SUB_I28_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_BODY_I18_I_I]] ]
-// APPROX-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[WHILE_COND_I14_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP10]]
+// APPROX-NEXT:    [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], [[IF_THEN_I21_I_I:%.*]] ], [ [[TMP1]], [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// APPROX-NEXT:    [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// APPROX-NEXT:    [[__TAGP_ADDR_0_I15_I_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ [[TAG]], [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// APPROX-NEXT:    [[TMP14:%.*]] = add i8 [[TMP13]], -48
+// APPROX-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP14]], 10
+// APPROX-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I21_I_I]], label [[_ZL4NANFPKC_EXIT]]
+// APPROX:       if.then.i21.i.i:
+// APPROX-NEXT:    [[MUL_I22_I_I:%.*]] = mul i64 [[__R_0_I16_I_I7]], 10
+// APPROX-NEXT:    [[CONV5_I23_I_I:%.*]] = zext nneg i8 [[TMP13]] to i64
+// APPROX-NEXT:    [[ADD_I24_I_I:%.*]] = add i64 [[MUL_I22_I_I]], -48
+// APPROX-NEXT:    [[SUB_I25_I_I]] = add i64 [[ADD_I24_I_I]], [[CONV5_I23_I_I]]
+// APPROX-NEXT:    [[INCDEC_PTR_I26_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I6]], i64 1
+// APPROX-NEXT:    [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP15]], 0
+// APPROX-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]]
 // APPROX:       _ZL4nanfPKc.exit:
-// APPROX-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[CLEANUP_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_COND_I_I_I]] ], [ 0, [[CLEANUP_I36_I_I]] ], [ [[__R_0_I32_I_I]], [[WHILE_COND_I30_I_I]] ], [ 0, [[CLEANUP_I20_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_COND_I14_I_I]] ]
+// APPROX-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, [[IF_THEN5_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ 0, [[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ]
 // APPROX-NEXT:    [[CONV_I:%.*]] = trunc i64 [[RETVAL_0_I_I]] to i32
 // APPROX-NEXT:    [[BF_VALUE_I:%.*]] = and i32 [[CONV_I]], 4194303
 // APPROX-NEXT:    [[BF_SET9_I:%.*]] = or disjoint i32 [[BF_VALUE_I]], 2143289344
-// APPROX-NEXT:    [[TMP10:%.*]] = bitcast i32 [[BF_SET9_I]] to float
-// APPROX-NEXT:    ret float [[TMP10]]
+// APPROX-NEXT:    [[TMP16:%.*]] = bitcast i32 [[BF_SET9_I]] to float
+// APPROX-NEXT:    ret float [[TMP16]]
 //
 // NCRDIV-LABEL: @test_nanf(
 // NCRDIV-NEXT:  entry:
 // NCRDIV-NEXT:    [[TMP0:%.*]] = load i8, ptr [[TAG:%.*]], align 1, !tbaa [[TBAA4]]
 // NCRDIV-NEXT:    [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48
-// NCRDIV-NEXT:    br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I:%.*]]
+// NCRDIV-NEXT:    br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I_PREHEADER:%.*]]
+// NCRDIV:       while.cond.i14.i.i.preheader:
+// NCRDIV-NEXT:    [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    [[CMP_NOT_I17_I_I5:%.*]] = icmp eq i8 [[TMP1]], 0
+// NCRDIV-NEXT:    br i1 [[CMP_NOT_I17_I_I5]], label [[_ZL4NANFPKC_EXIT:%.*]], label [[WHILE_BODY_I18_I_I:%.*]]
 // NCRDIV:       if.then.i.i:
 // NCRDIV-NEXT:    [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TAG]], i64 1
-// NCRDIV-NEXT:    [[TMP1:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
-// NCRDIV-NEXT:    switch i8 [[TMP1]], label [[WHILE_COND_I_I_I:%.*]] [
-// NCRDIV-NEXT:      i8 120, label [[WHILE_COND_I30_I_I_PREHEADER:%.*]]
-// NCRDIV-NEXT:      i8 88, label [[WHILE_COND_I30_I_I_PREHEADER]]
+// NCRDIV-NEXT:    [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    switch i8 [[TMP2]], label [[WHILE_COND_I_I_I_PREHEADER:%.*]] [
+// NCRDIV-NEXT:      i8 120, label [[IF_THEN5_I_I:%.*]]
+// NCRDIV-NEXT:      i8 88, label [[IF_THEN5_I_I]]
 // NCRDIV-NEXT:    ]
-// NCRDIV:       while.cond.i30.i.i.preheader:
-// NCRDIV-NEXT:    br label [[WHILE_COND_I30_I_I:%.*]]
-// NCRDIV:       while.cond.i30.i.i:
-// NCRDIV-NEXT:    [[__TAGP_ADDR_0_I31_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I37_I_I:%.*]], [[CLEANUP_I36_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I30_I_I_PREHEADER]] ]
-// NCRDIV-NEXT:    [[__R_0_I32_I_I:%.*]] = phi i64 [ [[__R_2_I_I_I:%.*]], [[CLEANUP_I36_I_I]] ], [ 0, [[WHILE_COND_I30_I_I_PREHEADER]] ]
-// NCRDIV-NEXT:    [[TMP2:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I31_I_I]], align 1, !tbaa [[TBAA4]]
-// NCRDIV-NEXT:    [[CMP_NOT_I33_I_I:%.*]] = icmp eq i8 [[TMP2]], 0
-// NCRDIV-NEXT:    br i1 [[CMP_NOT_I33_I_I]], label [[_ZL4NANFPKC_EXIT:%.*]], label [[WHILE_BODY_I34_I_I:%.*]]
-// NCRDIV:       while.body.i34.i.i:
-// NCRDIV-NEXT:    [[TMP3:%.*]] = add i8 [[TMP2]], -48
-// NCRDIV-NEXT:    [[OR_COND_I35_I_I:%.*]] = icmp ult i8 [[TMP3]], 10
-// NCRDIV-NEXT:    br i1 [[OR_COND_I35_I_I]], label [[IF_END31_I_I_I:%.*]], label [[IF_ELSE_I_I_I:%.*]]
+// NCRDIV:       while.cond.i.i.i.preheader:
+// NCRDIV-NEXT:    [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    [[CMP_NOT_I_I_I14:%.*]] = icmp eq i8 [[TMP3]], 0
+// NCRDIV-NEXT:    br i1 [[CMP_NOT_I_I_I14]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]]
+// NCRDIV:       if.then5.i.i:
+// NCRDIV-NEXT:    [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    [[CMP_NOT_I30_I_I9:%.*]] = icmp eq i8 [[TMP4]], 0
+// NCRDIV-NEXT:    br i1 [[CMP_NOT_I30_I_I9]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I31_I_I:%.*]]
+// NCRDIV:       while.body.i31.i.i:
+// NCRDIV-NEXT:    [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], [[IF_END31_I_I_I:%.*]] ], [ [[TMP4]], [[IF_THEN5_I_I]] ]
+// NCRDIV-NEXT:    [[__R_0_I29_I_I11:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], [[IF_END31_I_I_I]] ], [ 0, [[IF_THEN5_I_I]] ]
+// NCRDIV-NEXT:    [[__TAGP_ADDR_0_I28_I_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I_I:%.*]], [[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN5_I_I]] ]
+// NCRDIV-NEXT:    [[TMP6:%.*]] = add i8 [[TMP5]], -48
+// NCRDIV-NEXT:    [[OR_COND_I32_I_I:%.*]] = icmp ult i8 [[TMP6]], 10
+// NCRDIV-NEXT:    br i1 [[OR_COND_I32_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE_I_I_I:%.*]]
 // NCRDIV:       if.else.i.i.i:
-// NCRDIV-NEXT:    [[TMP4:%.*]] = add i8 [[TMP2]], -97
-// NCRDIV-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP4]], 6
+// NCRDIV-NEXT:    [[TMP7:%.*]] = add i8 [[TMP5]], -97
+// NCRDIV-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP7]], 6
 // NCRDIV-NEXT:    br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]]
 // NCRDIV:       if.else17.i.i.i:
-// NCRDIV-NEXT:    [[TMP5:%.*]] = add i8 [[TMP2]], -65
-// NCRDIV-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP5]], 6
-// NCRDIV-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[CLEANUP_I36_I_I]]
+// NCRDIV-NEXT:    [[TMP8:%.*]] = add i8 [[TMP5]], -65
+// NCRDIV-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP8]], 6
+// NCRDIV-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[_ZL4NANFPKC_EXIT]]
 // NCRDIV:       if.end31.i.i.i:
-// NCRDIV-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I34_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ]
-// NCRDIV-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I32_I_I]], 4
-// NCRDIV-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64
+// NCRDIV-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I31_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ]
+// NCRDIV-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I29_I_I11]], 4
+// NCRDIV-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP5]] to i64
 // NCRDIV-NEXT:    [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]]
-// NCRDIV-NEXT:    [[ADD28_I_I_I:%.*]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
-// NCRDIV-NEXT:    [[INCDEC_PTR_I40_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I31_I_I]], i64 1
-// NCRDIV-NEXT:    br label [[CLEANUP_I36_I_I]]
-// NCRDIV:       cleanup.i36.i.i:
-// NCRDIV-NEXT:    [[__TAGP_ADDR_1_I37_I_I]] = phi ptr [ [[INCDEC_PTR_I40_I_I]], [[IF_END31_I_I_I]] ], [ [[__TAGP_ADDR_0_I31_I_I]], [[IF_ELSE17_I_I_I]] ]
-// NCRDIV-NEXT:    [[__R_2_I_I_I]] = phi i64 [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ [[__R_0_I32_I_I]], [[IF_ELSE17_I_I_I]] ]
-// NCRDIV-NEXT:    [[COND_I_I_I:%.*]] = phi i1 [ true, [[IF_END31_I_I_I]] ], [ false, [[IF_ELSE17_I_I_I]] ]
-// NCRDIV-NEXT:    br i1 [[COND_I_I_I]], label [[WHILE_COND_I30_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP11]]
-// NCRDIV:       while.cond.i.i.i:
-// NCRDIV-NEXT:    [[__TAGP_ADDR_0_I_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I_I_I:%.*]], [[CLEANUP_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN_I_I]] ]
-// NCRDIV-NEXT:    [[__R_0_I_I_I:%.*]] = phi i64 [ [[__R_1_I_I_I:%.*]], [[CLEANUP_I_I_I]] ], [ 0, [[IF_THEN_I_I]] ]
-// NCRDIV-NEXT:    [[TMP6:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[TBAA4]]
-// NCRDIV-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP6]], 0
-// NCRDIV-NEXT:    br i1 [[CMP_NOT_I_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]]
+// NCRDIV-NEXT:    [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
+// NCRDIV-NEXT:    [[INCDEC_PTR_I34_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I28_I_I10]], i64 1
+// NCRDIV-NEXT:    [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    [[CMP_NOT_I30_I_I:%.*]] = icmp eq i8 [[TMP9]], 0
+// NCRDIV-NEXT:    br i1 [[CMP_NOT_I30_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]]
 // NCRDIV:       while.body.i.i.i:
-// NCRDIV-NEXT:    [[TMP7:%.*]] = and i8 [[TMP6]], -8
-// NCRDIV-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP7]], 48
-// NCRDIV-NEXT:    br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I:%.*]], label [[CLEANUP_I_I_I]]
+// NCRDIV-NEXT:    [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], [[IF_THEN_I_I_I:%.*]] ], [ [[TMP3]], [[WHILE_COND_I_I_I_PREHEADER]] ]
+// NCRDIV-NEXT:    [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ]
+// NCRDIV-NEXT:    [[__TAGP_ADDR_0_I_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I_I_I_PREHEADER]] ]
+// NCRDIV-NEXT:    [[TMP11:%.*]] = and i8 [[TMP10]], -8
+// NCRDIV-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP11]], 48
+// NCRDIV-NEXT:    br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I]], label [[_ZL4NANFPKC_EXIT]]
 // NCRDIV:       if.then.i.i.i:
-// NCRDIV-NEXT:    [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I]], 3
-// NCRDIV-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64
+// NCRDIV-NEXT:    [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I16]], 3
+// NCRDIV-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP10]] to i64
 // NCRDIV-NEXT:    [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48
-// NCRDIV-NEXT:    [[SUB_I_I_I:%.*]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
-// NCRDIV-NEXT:    [[INCDEC_PTR_I_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I]], i64 1
-// NCRDIV-NEXT:    br label [[CLEANUP_I_I_I]]
-// NCRDIV:       cleanup.i.i.i:
-// NCRDIV-NEXT:    [[__TAGP_ADDR_1_I_I_I]] = phi ptr [ [[INCDEC_PTR_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__TAGP_ADDR_0_I_I_I]], [[WHILE_BODY_I_I_I]] ]
-// NCRDIV-NEXT:    [[__R_1_I_I_I]] = phi i64 [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_BODY_I_I_I]] ]
-// NCRDIV-NEXT:    br i1 [[OR_COND_I_I_I]], label [[WHILE_COND_I_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP7]]
-// NCRDIV:       while.cond.i14.i.i:
-// NCRDIV-NEXT:    [[__TAGP_ADDR_0_I15_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I21_I_I:%.*]], [[CLEANUP_I20_I_I:%.*]] ], [ [[TAG]], [[ENTRY:%.*]] ]
-// NCRDIV-NEXT:    [[__R_0_I16_I_I:%.*]] = phi i64 [ [[__R_1_I22_I_I:%.*]], [[CLEANUP_I20_I_I]] ], [ 0, [[ENTRY]] ]
-// NCRDIV-NEXT:    [[TMP8:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[TBAA4]]
-// NCRDIV-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP8]], 0
-// NCRDIV-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I18_I_I:%.*]]
+// NCRDIV-NEXT:    [[SUB_I_I_I]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
+// NCRDIV-NEXT:    [[INCDEC_PTR_I_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I15]], i64 1
+// NCRDIV-NEXT:    [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP12]], 0
+// NCRDIV-NEXT:    br i1 [[CMP_NOT_I_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]]
 // NCRDIV:       while.body.i18.i.i:
-// NCRDIV-NEXT:    [[TMP9:%.*]] = add i8 [[TMP8]], -48
-// NCRDIV-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP9]], 10
-// NCRDIV-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I24_I_I:%.*]], label [[CLEANUP_I20_I_I]]
-// NCRDIV:       if.then.i24.i.i:
-// NCRDIV-NEXT:    [[MUL_I25_I_I:%.*]] = mul i64 [[__R_0_I16_I_I]], 10
-// NCRDIV-NEXT:    [[CONV5_I26_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64
-// NCRDIV-NEXT:    [[ADD_I27_I_I:%.*]] = add i64 [[MUL_I25_I_I]], -48
-// NCRDIV-NEXT:    [[SUB_I28_I_I:%.*]] = add i64 [[ADD_I27_I_I]], [[CONV5_I26_I_I]]
-// NCRDIV-NEXT:    [[INCDEC_PTR_I29_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I]], i64 1
-// NCRDIV-NEXT:    br label [[CLEANUP_I20_I_I]]
-// NCRDIV:       cleanup.i20.i.i:
-// NCRDIV-NEXT:    [[__TAGP_ADDR_1_I21_I_I]] = phi ptr [ [[INCDEC_PTR_I29_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__TAGP_ADDR_0_I15_I_I]], [[WHILE_BODY_I18_I_I]] ]
-// NCRDIV-NEXT:    [[__R_1_I22_I_I]] = phi i64 [ [[SUB_I28_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_BODY_I18_I_I]] ]
-// NCRDIV-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[WHILE_COND_I14_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP10]]
+// NCRDIV-NEXT:    [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], [[IF_THEN_I21_I_I:%.*]] ], [ [[TMP1]], [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// NCRDIV-NEXT:    [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// NCRDIV-NEXT:    [[__TAGP_ADDR_0_I15_I_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ [[TAG]], [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// NCRDIV-NEXT:    [[TMP14:%.*]] = add i8 [[TMP13]], -48
+// NCRDIV-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP14]], 10
+// NCRDIV-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I21_I_I]], label [[_ZL4NANFPKC_EXIT]]
+// NCRDIV:       if.then.i21.i.i:
+// NCRDIV-NEXT:    [[MUL_I22_I_I:%.*]] = mul i64 [[__R_0_I16_I_I7]], 10
+// NCRDIV-NEXT:    [[CONV5_I23_I_I:%.*]] = zext nneg i8 [[TMP13]] to i64
+// NCRDIV-NEXT:    [[ADD_I24_I_I:%.*]] = add i64 [[MUL_I22_I_I]], -48
+// NCRDIV-NEXT:    [[SUB_I25_I_I]] = add i64 [[ADD_I24_I_I]], [[CONV5_I23_I_I]]
+// NCRDIV-NEXT:    [[INCDEC_PTR_I26_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I6]], i64 1
+// NCRDIV-NEXT:    [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP15]], 0
+// NCRDIV-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]]
 // NCRDIV:       _ZL4nanfPKc.exit:
-// NCRDIV-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[CLEANUP_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_COND_I_I_I]] ], [ 0, [[CLEANUP_I36_I_I]] ], [ [[__R_0_I32_I_I]], [[WHILE_COND_I30_I_I]] ], [ 0, [[CLEANUP_I20_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_COND_I14_I_I]] ]
+// NCRDIV-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, [[IF_THEN5_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ 0, [[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ]
 // NCRDIV-NEXT:    [[CONV_I:%.*]] = trunc i64 [[RETVAL_0_I_I]] to i32
 // NCRDIV-NEXT:    [[BF_VALUE_I:%.*]] = and i32 [[CONV_I]], 4194303
 // NCRDIV-NEXT:    [[BF_SET9_I:%.*]] = or disjoint i32 [[BF_VALUE_I]], 2143289344
-// NCRDIV-NEXT:    [[TMP10:%.*]] = bitcast i32 [[BF_SET9_I]] to float
-// NCRDIV-NEXT:    ret float [[TMP10]]
+// NCRDIV-NEXT:    [[TMP16:%.*]] = bitcast i32 [[BF_SET9_I]] to float
+// NCRDIV-NEXT:    ret float [[TMP16]]
 //
 // AMDGCNSPIRV-LABEL: @test_nanf(
 // AMDGCNSPIRV-NEXT:  entry:
@@ -4065,53 +4023,49 @@ extern "C" __device__ double test_modf(double x, double* y) {
 // AMDGCNSPIRV-NEXT:    [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TAG]], i64 1
 // AMDGCNSPIRV-NEXT:    [[TMP1:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA5]]
 // AMDGCNSPIRV-NEXT:    switch i8 [[TMP1]], label [[WHILE_COND_I_I_I:%.*]] [
-// AMDGCNSPIRV-NEXT:      i8 120, label [[WHILE_COND_I28_I_I_PREHEADER:%.*]]
-// AMDGCNSPIRV-NEXT:      i8 88, label [[WHILE_COND_I28_I_I_PREHEADER]]
+// AMDGCNSPIRV-NEXT:      i8 120, label [[IF_THEN5_I_I:%.*]]
+// AMDGCNSPIRV-NEXT:      i8 88, label [[IF_THEN5_I_I]]
 // AMDGCNSPIRV-NEXT:    ]
-// AMDGCNSPIRV:       while.cond.i28.i.i.preheader:
-// AMDGCNSPIRV-NEXT:    br label [[WHILE_COND_I28_I_I:%.*]]
-// AMDGCNSPIRV:       while.cond.i28.i.i:
-// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I29_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I34_I_I:%.*]], [[CLEANUP_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I28_I_I_PREHEADER]] ]
-// AMDGCNSPIRV-NEXT:    [[__R_0_I30_I_I:%.*]] = phi i64 [ [[__R_2_I_I_I:%.*]], [[CLEANUP_I_I_I]] ], [ 0, [[WHILE_COND_I28_I_I_PREHEADER]] ]
-// AMDGCNSPIRV-NEXT:    [[TMP2:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I_I]], align 1, !tbaa [[TBAA5]]
-// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I31_I_I:%.*]] = icmp eq i8 [[TMP2]], 0
-// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I31_I_I]], label [[_ZL4NANFPKC_EXIT:%.*]], label [[WHILE_BODY_I32_I_I:%.*]]
+// AMDGCNSPIRV:       if.then5.i.i:
+// AMDGCNSPIRV-NEXT:    [[TMP2:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I31_I_I5:%.*]] = icmp eq i8 [[TMP2]], 0
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I31_I_I5]], label [[_ZL4NANFPKC_EXIT:%.*]], label [[WHILE_BODY_I32_I_I:%.*]]
 // AMDGCNSPIRV:       while.body.i32.i.i:
-// AMDGCNSPIRV-NEXT:    [[TMP3:%.*]] = add i8 [[TMP2]], -48
-// AMDGCNSPIRV-NEXT:    [[OR_COND_I33_I_I:%.*]] = icmp ult i8 [[TMP3]], 10
-// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I33_I_I]], label [[IF_END31_I_I_I:%.*]], label [[IF_ELSE_I_I_I:%.*]]
+// AMDGCNSPIRV-NEXT:    [[TMP3:%.*]] = phi i8 [ [[TMP7:%.*]], [[IF_END31_I_I_I:%.*]] ], [ [[TMP2]], [[IF_THEN5_I_I]] ]
+// AMDGCNSPIRV-NEXT:    [[__R_0_I30_I_I7:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], [[IF_END31_I_I_I]] ], [ 0, [[IF_THEN5_I_I]] ]
+// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I29_I_I6:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I36_I_I:%.*]], [[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN5_I_I]] ]
+// AMDGCNSPIRV-NEXT:    [[TMP4:%.*]] = add i8 [[TMP3]], -48
+// AMDGCNSPIRV-NEXT:    [[OR_COND_I33_I_I:%.*]] = icmp ult i8 [[TMP4]], 10
+// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I33_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE_I_I_I:%.*]]
 // AMDGCNSPIRV:       if.else.i.i.i:
-// AMDGCNSPIRV-NEXT:    [[TMP4:%.*]] = add i8 [[TMP2]], -97
-// AMDGCNSPIRV-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP4]], 6
+// AMDGCNSPIRV-NEXT:    [[TMP5:%.*]] = add i8 [[TMP3]], -97
+// AMDGCNSPIRV-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP5]], 6
 // AMDGCNSPIRV-NEXT:    br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]]
 // AMDGCNSPIRV:       if.else17.i.i.i:
-// AMDGCNSPIRV-NEXT:    [[TMP5:%.*]] = add i8 [[TMP2]], -65
-// AMDGCNSPIRV-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP5]], 6
-// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[CLEANUP_I_I_I]]
+// AMDGCNSPIRV-NEXT:    [[TMP6:%.*]] = add i8 [[TMP3]], -65
+// AMDGCNSPIRV-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP6]], 6
+// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[_ZL4NANFPKC_EXIT]]
 // AMDGCNSPIRV:       if.end31.i.i.i:
 // AMDGCNSPIRV-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I32_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ]
-// AMDGCNSPIRV-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I30_I_I]], 4
-// AMDGCNSPIRV-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64
+// AMDGCNSPIRV-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I30_I_I7]], 4
+// AMDGCNSPIRV-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP3]] to i64
 // AMDGCNSPIRV-NEXT:    [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]]
-// AMDGCNSPIRV-NEXT:    [[ADD28_I_I_I:%.*]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
-// AMDGCNSPIRV-NEXT:    [[INCDEC_PTR_I37_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I_I]], i64 1
-// AMDGCNSPIRV-NEXT:    br label [[CLEANUP_I_I_I]]
-// AMDGCNSPIRV:       cleanup.i.i.i:
-// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_1_I34_I_I]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I37_I_I]], [[IF_END31_I_I_I]] ], [ [[__TAGP_ADDR_0_I29_I_I]], [[IF_ELSE17_I_I_I]] ]
-// AMDGCNSPIRV-NEXT:    [[__R_2_I_I_I]] = phi i64 [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ [[__R_0_I30_I_I]], [[IF_ELSE17_I_I_I]] ]
-// AMDGCNSPIRV-NEXT:    [[COND_I_I_I:%.*]] = phi i1 [ true, [[IF_END31_I_I_I]] ], [ false, [[IF_ELSE17_I_I_I]] ]
-// AMDGCNSPIRV-NEXT:    br i1 [[COND_I_I_I]], label [[WHILE_COND_I28_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP12]]
+// AMDGCNSPIRV-NEXT:    [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
+// AMDGCNSPIRV-NEXT:    [[INCDEC_PTR_I36_I_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I_I6]], i64 1
+// AMDGCNSPIRV-NEXT:    [[TMP7]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I36_I_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I31_I_I:%.*]] = icmp eq i8 [[TMP7]], 0
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I31_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I32_I_I]], !llvm.loop [[LOOP12]]
 // AMDGCNSPIRV:       while.cond.i.i.i:
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I_I_I:%.*]], [[WHILE_BODY_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN_I_I]] ]
 // AMDGCNSPIRV-NEXT:    [[__R_0_I_I_I:%.*]] = phi i64 [ [[__R_1_I_I_I:%.*]], [[WHILE_BODY_I_I_I]] ], [ 0, [[IF_THEN_I_I]] ]
-// AMDGCNSPIRV-NEXT:    [[TMP6:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[TBAA5]]
-// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP6]], 0
+// AMDGCNSPIRV-NEXT:    [[TMP8:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP8]], 0
 // AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I_I_I]]
 // AMDGCNSPIRV:       while.body.i.i.i:
-// AMDGCNSPIRV-NEXT:    [[TMP7:%.*]] = and i8 [[TMP6]], -8
-// AMDGCNSPIRV-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP7]], 48
+// AMDGCNSPIRV-NEXT:    [[TMP9:%.*]] = and i8 [[TMP8]], -8
+// AMDGCNSPIRV-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP9]], 48
 // AMDGCNSPIRV-NEXT:    [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I]], 3
-// AMDGCNSPIRV-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64
+// AMDGCNSPIRV-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64
 // AMDGCNSPIRV-NEXT:    [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48
 // AMDGCNSPIRV-NEXT:    [[SUB_I_I_I:%.*]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_1_I_I_I_IDX:%.*]] = zext i1 [[OR_COND_I_I_I]] to i64
@@ -4121,14 +4075,14 @@ extern "C" __device__ double test_modf(double x, double* y) {
 // AMDGCNSPIRV:       while.cond.i14.i.i:
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I15_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I25_I_I:%.*]], [[WHILE_BODY_I18_I_I:%.*]] ], [ [[TAG]], [[ENTRY:%.*]] ]
 // AMDGCNSPIRV-NEXT:    [[__R_0_I16_I_I:%.*]] = phi i64 [ [[__R_1_I26_I_I:%.*]], [[WHILE_BODY_I18_I_I]] ], [ 0, [[ENTRY]] ]
-// AMDGCNSPIRV-NEXT:    [[TMP8:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[TBAA5]]
-// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP8]], 0
+// AMDGCNSPIRV-NEXT:    [[TMP10:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP10]], 0
 // AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label [[_ZL4NANFPKC_EXIT]], label [[WHILE_BODY_I18_I_I]]
 // AMDGCNSPIRV:       while.body.i18.i.i:
-// AMDGCNSPIRV-NEXT:    [[TMP9:%.*]] = add i8 [[TMP8]], -48
-// AMDGCNSPIRV-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP9]], 10
+// AMDGCNSPIRV-NEXT:    [[TMP11:%.*]] = add i8 [[TMP10]], -48
+// AMDGCNSPIRV-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP11]], 10
 // AMDGCNSPIRV-NEXT:    [[MUL_I20_I_I:%.*]] = mul i64 [[__R_0_I16_I_I]], 10
-// AMDGCNSPIRV-NEXT:    [[CONV5_I21_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64
+// AMDGCNSPIRV-NEXT:    [[CONV5_I21_I_I:%.*]] = zext nneg i8 [[TMP10]] to i64
 // AMDGCNSPIRV-NEXT:    [[ADD_I22_I_I:%.*]] = add i64 [[MUL_I20_I_I]], -48
 // AMDGCNSPIRV-NEXT:    [[SUB_I23_I_I:%.*]] = add i64 [[ADD_I22_I_I]], [[CONV5_I21_I_I]]
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_1_I25_I_I_IDX:%.*]] = zext i1 [[OR_COND_I19_I_I]] to i64
@@ -4136,12 +4090,12 @@ extern "C" __device__ double test_modf(double x, double* y) {
 // AMDGCNSPIRV-NEXT:    [[__R_1_I26_I_I]] = select i1 [[OR_COND_I19_I_I]], i64 [[SUB_I23_I_I]], i64 [[__R_0_I16_I_I]]
 // AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[WHILE_COND_I14_I_I]], label [[_ZL4NANFPKC_EXIT]], !llvm.loop [[LOOP11]]
 // AMDGCNSPIRV:       _ZL4nanfPKc.exit:
-// AMDGCNSPIRV-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[WHILE_BODY_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_COND_I_I_I]] ], [ 0, [[CLEANUP_I_I_I]] ], [ [[__R_0_I30_I_I]], [[WHILE_COND_I28_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_COND_I14_I_I]] ]
+// AMDGCNSPIRV-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[IF_THEN5_I_I]] ], [ 0, [[WHILE_BODY_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_COND_I_I_I]] ], [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ 0, [[IF_ELSE17_I_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_COND_I14_I_I]] ]
 // AMDGCNSPIRV-NEXT:    [[CONV_I:%.*]] = trunc i64 [[RETVAL_0_I_I]] to i32
 // AMDGCNSPIRV-NEXT:    [[BF_VALUE_I:%.*]] = and i32 [[CONV_I]], 4194303
 // AMDGCNSPIRV-NEXT:    [[BF_SET9_I:%.*]] = or disjoint i32 [[BF_VALUE_I]], 2143289344
-// AMDGCNSPIRV-NEXT:    [[TMP10:%.*]] = bitcast i32 [[BF_SET9_I]] to float
-// AMDGCNSPIRV-NEXT:    ret float [[TMP10]]
+// AMDGCNSPIRV-NEXT:    [[TMP12:%.*]] = bitcast i32 [[BF_SET9_I]] to float
+// AMDGCNSPIRV-NEXT:    ret float [[TMP12]]
 //
 extern "C" __device__ float test_nanf(const char *tag) {
   return nanf(tag);
@@ -4151,95 +4105,89 @@ extern "C" __device__ float test_nanf(const char *tag) {
 // DEFAULT-NEXT:  entry:
 // DEFAULT-NEXT:    [[TMP0:%.*]] = load i8, ptr [[TAG:%.*]], align 1, !tbaa [[TBAA4]]
 // DEFAULT-NEXT:    [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48
-// DEFAULT-NEXT:    br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I:%.*]]
+// DEFAULT-NEXT:    br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I_PREHEADER:%.*]]
+// DEFAULT:       while.cond.i14.i.i.preheader:
+// DEFAULT-NEXT:    [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    [[CMP_NOT_I17_I_I5:%.*]] = icmp eq i8 [[TMP1]], 0
+// DEFAULT-NEXT:    br i1 [[CMP_NOT_I17_I_I5]], label [[_ZL3NANPKC_EXIT:%.*]], label [[WHILE_BODY_I18_I_I:%.*]]
 // DEFAULT:       if.then.i.i:
 // DEFAULT-NEXT:    [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TAG]], i64 1
-// DEFAULT-NEXT:    [[TMP1:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
-// DEFAULT-NEXT:    switch i8 [[TMP1]], label [[WHILE_COND_I_I_I:%.*]] [
-// DEFAULT-NEXT:      i8 120, label [[WHILE_COND_I30_I_I_PREHEADER:%.*]]
-// DEFAULT-NEXT:      i8 88, label [[WHILE_COND_I30_I_I_PREHEADER]]
+// DEFAULT-NEXT:    [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    switch i8 [[TMP2]], label [[WHILE_COND_I_I_I_PREHEADER:%.*]] [
+// DEFAULT-NEXT:      i8 120, label [[IF_THEN5_I_I:%.*]]
+// DEFAULT-NEXT:      i8 88, label [[IF_THEN5_I_I]]
 // DEFAULT-NEXT:    ]
-// DEFAULT:       while.cond.i30.i.i.preheader:
-// DEFAULT-NEXT:    br label [[WHILE_COND_I30_I_I:%.*]]
-// DEFAULT:       while.cond.i30.i.i:
-// DEFAULT-NEXT:    [[__TAGP_ADDR_0_I31_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I37_I_I:%.*]], [[CLEANUP_I36_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I30_I_I_PREHEADER]] ]
-// DEFAULT-NEXT:    [[__R_0_I32_I_I:%.*]] = phi i64 [ [[__R_2_I_I_I:%.*]], [[CLEANUP_I36_I_I]] ], [ 0, [[WHILE_COND_I30_I_I_PREHEADER]] ]
-// DEFAULT-NEXT:    [[TMP2:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I31_I_I]], align 1, !tbaa [[TBAA4]]
-// DEFAULT-NEXT:    [[CMP_NOT_I33_I_I:%.*]] = icmp eq i8 [[TMP2]], 0
-// DEFAULT-NEXT:    br i1 [[CMP_NOT_I33_I_I]], label [[_ZL3NANPKC_EXIT:%.*]], label [[WHILE_BODY_I34_I_I:%.*]]
-// DEFAULT:       while.body.i34.i.i:
-// DEFAULT-NEXT:    [[TMP3:%.*]] = add i8 [[TMP2]], -48
-// DEFAULT-NEXT:    [[OR_COND_I35_I_I:%.*]] = icmp ult i8 [[TMP3]], 10
-// DEFAULT-NEXT:    br i1 [[OR_COND_I35_I_I]], label [[IF_END31_I_I_I:%.*]], label [[IF_ELSE_I_I_I:%.*]]
+// DEFAULT:       while.cond.i.i.i.preheader:
+// DEFAULT-NEXT:    [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    [[CMP_NOT_I_I_I14:%.*]] = icmp eq i8 [[TMP3]], 0
+// DEFAULT-NEXT:    br i1 [[CMP_NOT_I_I_I14]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]]
+// DEFAULT:       if.then5.i.i:
+// DEFAULT-NEXT:    [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    [[CMP_NOT_I30_I_I9:%.*]] = icmp eq i8 [[TMP4]], 0
+// DEFAULT-NEXT:    br i1 [[CMP_NOT_I30_I_I9]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I31_I_I:%.*]]
+// DEFAULT:       while.body.i31.i.i:
+// DEFAULT-NEXT:    [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], [[IF_END31_I_I_I:%.*]] ], [ [[TMP4]], [[IF_THEN5_I_I]] ]
+// DEFAULT-NEXT:    [[__R_0_I29_I_I11:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], [[IF_END31_I_I_I]] ], [ 0, [[IF_THEN5_I_I]] ]
+// DEFAULT-NEXT:    [[__TAGP_ADDR_0_I28_I_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I_I:%.*]], [[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN5_I_I]] ]
+// DEFAULT-NEXT:    [[TMP6:%.*]] = add i8 [[TMP5]], -48
+// DEFAULT-NEXT:    [[OR_COND_I32_I_I:%.*]] = icmp ult i8 [[TMP6]], 10
+// DEFAULT-NEXT:    br i1 [[OR_COND_I32_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE_I_I_I:%.*]]
 // DEFAULT:       if.else.i.i.i:
-// DEFAULT-NEXT:    [[TMP4:%.*]] = add i8 [[TMP2]], -97
-// DEFAULT-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP4]], 6
+// DEFAULT-NEXT:    [[TMP7:%.*]] = add i8 [[TMP5]], -97
+// DEFAULT-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP7]], 6
 // DEFAULT-NEXT:    br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]]
 // DEFAULT:       if.else17.i.i.i:
-// DEFAULT-NEXT:    [[TMP5:%.*]] = add i8 [[TMP2]], -65
-// DEFAULT-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP5]], 6
-// DEFAULT-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[CLEANUP_I36_I_I]]
+// DEFAULT-NEXT:    [[TMP8:%.*]] = add i8 [[TMP5]], -65
+// DEFAULT-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP8]], 6
+// DEFAULT-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[_ZL3NANPKC_EXIT]]
 // DEFAULT:       if.end31.i.i.i:
-// DEFAULT-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I34_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ]
-// DEFAULT-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I32_I_I]], 4
-// DEFAULT-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64
+// DEFAULT-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I31_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ]
+// DEFAULT-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I29_I_I11]], 4
+// DEFAULT-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP5]] to i64
 // DEFAULT-NEXT:    [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]]
-// DEFAULT-NEXT:    [[ADD28_I_I_I:%.*]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
-// DEFAULT-NEXT:    [[INCDEC_PTR_I40_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I31_I_I]], i64 1
-// DEFAULT-NEXT:    br label [[CLEANUP_I36_I_I]]
-// DEFAULT:       cleanup.i36.i.i:
-// DEFAULT-NEXT:    [[__TAGP_ADDR_1_I37_I_I]] = phi ptr [ [[INCDEC_PTR_I40_I_I]], [[IF_END31_I_I_I]] ], [ [[__TAGP_ADDR_0_I31_I_I]], [[IF_ELSE17_I_I_I]] ]
-// DEFAULT-NEXT:    [[__R_2_I_I_I]] = phi i64 [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ [[__R_0_I32_I_I]], [[IF_ELSE17_I_I_I]] ]
-// DEFAULT-NEXT:    [[COND_I_I_I:%.*]] = phi i1 [ true, [[IF_END31_I_I_I]] ], [ false, [[IF_ELSE17_I_I_I]] ]
-// DEFAULT-NEXT:    br i1 [[COND_I_I_I]], label [[WHILE_COND_I30_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP11]]
-// DEFAULT:       while.cond.i.i.i:
-// DEFAULT-NEXT:    [[__TAGP_ADDR_0_I_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I_I_I:%.*]], [[CLEANUP_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN_I_I]] ]
-// DEFAULT-NEXT:    [[__R_0_I_I_I:%.*]] = phi i64 [ [[__R_1_I_I_I:%.*]], [[CLEANUP_I_I_I]] ], [ 0, [[IF_THEN_I_I]] ]
-// DEFAULT-NEXT:    [[TMP6:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[TBAA4]]
-// DEFAULT-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP6]], 0
-// DEFAULT-NEXT:    br i1 [[CMP_NOT_I_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]]
+// DEFAULT-NEXT:    [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
+// DEFAULT-NEXT:    [[INCDEC_PTR_I34_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I28_I_I10]], i64 1
+// DEFAULT-NEXT:    [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    [[CMP_NOT_I30_I_I:%.*]] = icmp eq i8 [[TMP9]], 0
+// DEFAULT-NEXT:    br i1 [[CMP_NOT_I30_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]]
 // DEFAULT:       while.body.i.i.i:
-// DEFAULT-NEXT:    [[TMP7:%.*]] = and i8 [[TMP6]], -8
-// DEFAULT-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP7]], 48
-// DEFAULT-NEXT:    br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I:%.*]], label [[CLEANUP_I_I_I]]
+// DEFAULT-NEXT:    [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], [[IF_THEN_I_I_I:%.*]] ], [ [[TMP3]], [[WHILE_COND_I_I_I_PREHEADER]] ]
+// DEFAULT-NEXT:    [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ]
+// DEFAULT-NEXT:    [[__TAGP_ADDR_0_I_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I_I_I_PREHEADER]] ]
+// DEFAULT-NEXT:    [[TMP11:%.*]] = and i8 [[TMP10]], -8
+// DEFAULT-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP11]], 48
+// DEFAULT-NEXT:    br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I]], label [[_ZL3NANPKC_EXIT]]
 // DEFAULT:       if.then.i.i.i:
-// DEFAULT-NEXT:    [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I]], 3
-// DEFAULT-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64
+// DEFAULT-NEXT:    [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I16]], 3
+// DEFAULT-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP10]] to i64
 // DEFAULT-NEXT:    [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48
-// DEFAULT-NEXT:    [[SUB_I_I_I:%.*]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
-// DEFAULT-NEXT:    [[INCDEC_PTR_I_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I]], i64 1
-// DEFAULT-NEXT:    br label [[CLEANUP_I_I_I]]
-// DEFAULT:       cleanup.i.i.i:
-// DEFAULT-NEXT:    [[__TAGP_ADDR_1_I_I_I]] = phi ptr [ [[INCDEC_PTR_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__TAGP_ADDR_0_I_I_I]], [[WHILE_BODY_I_I_I]] ]
-// DEFAULT-NEXT:    [[__R_1_I_I_I]] = phi i64 [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_BODY_I_I_I]] ]
-// DEFAULT-NEXT:    br i1 [[OR_COND_I_I_I]], label [[WHILE_COND_I_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP7]]
-// DEFAULT:       while.cond.i14.i.i:
-// DEFAULT-NEXT:    [[__TAGP_ADDR_0_I15_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I21_I_I:%.*]], [[CLEANUP_I20_I_I:%.*]] ], [ [[TAG]], [[ENTRY:%.*]] ]
-// DEFAULT-NEXT:    [[__R_0_I16_I_I:%.*]] = phi i64 [ [[__R_1_I22_I_I:%.*]], [[CLEANUP_I20_I_I]] ], [ 0, [[ENTRY]] ]
-// DEFAULT-NEXT:    [[TMP8:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[TBAA4]]
-// DEFAULT-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP8]], 0
-// DEFAULT-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I18_I_I:%.*]]
+// DEFAULT-NEXT:    [[SUB_I_I_I]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
+// DEFAULT-NEXT:    [[INCDEC_PTR_I_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I15]], i64 1
+// DEFAULT-NEXT:    [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP12]], 0
+// DEFAULT-NEXT:    br i1 [[CMP_NOT_I_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]]
 // DEFAULT:       while.body.i18.i.i:
-// DEFAULT-NEXT:    [[TMP9:%.*]] = add i8 [[TMP8]], -48
-// DEFAULT-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP9]], 10
-// DEFAULT-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I24_I_I:%.*]], label [[CLEANUP_I20_I_I]]
-// DEFAULT:       if.then.i24.i.i:
-// DEFAULT-NEXT:    [[MUL_I25_I_I:%.*]] = mul i64 [[__R_0_I16_I_I]], 10
-// DEFAULT-NEXT:    [[CONV5_I26_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64
-// DEFAULT-NEXT:    [[ADD_I27_I_I:%.*]] = add i64 [[MUL_I25_I_I]], -48
-// DEFAULT-NEXT:    [[SUB_I28_I_I:%.*]] = add i64 [[ADD_I27_I_I]], [[CONV5_I26_I_I]]
-// DEFAULT-NEXT:    [[INCDEC_PTR_I29_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I]], i64 1
-// DEFAULT-NEXT:    br label [[CLEANUP_I20_I_I]]
-// DEFAULT:       cleanup.i20.i.i:
-// DEFAULT-NEXT:    [[__TAGP_ADDR_1_I21_I_I]] = phi ptr [ [[INCDEC_PTR_I29_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__TAGP_ADDR_0_I15_I_I]], [[WHILE_BODY_I18_I_I]] ]
-// DEFAULT-NEXT:    [[__R_1_I22_I_I]] = phi i64 [ [[SUB_I28_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_BODY_I18_I_I]] ]
-// DEFAULT-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[WHILE_COND_I14_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP10]]
+// DEFAULT-NEXT:    [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], [[IF_THEN_I21_I_I:%.*]] ], [ [[TMP1]], [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// DEFAULT-NEXT:    [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// DEFAULT-NEXT:    [[__TAGP_ADDR_0_I15_I_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ [[TAG]], [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// DEFAULT-NEXT:    [[TMP14:%.*]] = add i8 [[TMP13]], -48
+// DEFAULT-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP14]], 10
+// DEFAULT-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I21_I_I]], label [[_ZL3NANPKC_EXIT]]
+// DEFAULT:       if.then.i21.i.i:
+// DEFAULT-NEXT:    [[MUL_I22_I_I:%.*]] = mul i64 [[__R_0_I16_I_I7]], 10
+// DEFAULT-NEXT:    [[CONV5_I23_I_I:%.*]] = zext nneg i8 [[TMP13]] to i64
+// DEFAULT-NEXT:    [[ADD_I24_I_I:%.*]] = add i64 [[MUL_I22_I_I]], -48
+// DEFAULT-NEXT:    [[SUB_I25_I_I]] = add i64 [[ADD_I24_I_I]], [[CONV5_I23_I_I]]
+// DEFAULT-NEXT:    [[INCDEC_PTR_I26_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I6]], i64 1
+// DEFAULT-NEXT:    [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[TBAA4]]
+// DEFAULT-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP15]], 0
+// DEFAULT-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]]
 // DEFAULT:       _ZL3nanPKc.exit:
-// DEFAULT-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[CLEANUP_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_COND_I_I_I]] ], [ 0, [[CLEANUP_I36_I_I]] ], [ [[__R_0_I32_I_I]], [[WHILE_COND_I30_I_I]] ], [ 0, [[CLEANUP_I20_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_COND_I14_I_I]] ]
+// DEFAULT-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, [[IF_THEN5_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ 0, [[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ]
 // DEFAULT-NEXT:    [[BF_VALUE_I:%.*]] = and i64 [[RETVAL_0_I_I]], 2251799813685247
 // DEFAULT-NEXT:    [[BF_SET9_I:%.*]] = or disjoint i64 [[BF_VALUE_I]], 9221120237041090560
-// DEFAULT-NEXT:    [[TMP10:%.*]] = bitcast i64 [[BF_SET9_I]] to double
-// DEFAULT-NEXT:    ret double [[TMP10]]
+// DEFAULT-NEXT:    [[TMP16:%.*]] = bitcast i64 [[BF_SET9_I]] to double
+// DEFAULT-NEXT:    ret double [[TMP16]]
 //
 // FINITEONLY-LABEL: @test_nan(
 // FINITEONLY-NEXT:  entry:
@@ -4249,189 +4197,177 @@ extern "C" __device__ float test_nanf(const char *tag) {
 // APPROX-NEXT:  entry:
 // APPROX-NEXT:    [[TMP0:%.*]] = load i8, ptr [[TAG:%.*]], align 1, !tbaa [[TBAA4]]
 // APPROX-NEXT:    [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48
-// APPROX-NEXT:    br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I:%.*]]
+// APPROX-NEXT:    br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I_PREHEADER:%.*]]
+// APPROX:       while.cond.i14.i.i.preheader:
+// APPROX-NEXT:    [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    [[CMP_NOT_I17_I_I5:%.*]] = icmp eq i8 [[TMP1]], 0
+// APPROX-NEXT:    br i1 [[CMP_NOT_I17_I_I5]], label [[_ZL3NANPKC_EXIT:%.*]], label [[WHILE_BODY_I18_I_I:%.*]]
 // APPROX:       if.then.i.i:
 // APPROX-NEXT:    [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TAG]], i64 1
-// APPROX-NEXT:    [[TMP1:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
-// APPROX-NEXT:    switch i8 [[TMP1]], label [[WHILE_COND_I_I_I:%.*]] [
-// APPROX-NEXT:      i8 120, label [[WHILE_COND_I30_I_I_PREHEADER:%.*]]
-// APPROX-NEXT:      i8 88, label [[WHILE_COND_I30_I_I_PREHEADER]]
+// APPROX-NEXT:    [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    switch i8 [[TMP2]], label [[WHILE_COND_I_I_I_PREHEADER:%.*]] [
+// APPROX-NEXT:      i8 120, label [[IF_THEN5_I_I:%.*]]
+// APPROX-NEXT:      i8 88, label [[IF_THEN5_I_I]]
 // APPROX-NEXT:    ]
-// APPROX:       while.cond.i30.i.i.preheader:
-// APPROX-NEXT:    br label [[WHILE_COND_I30_I_I:%.*]]
-// APPROX:       while.cond.i30.i.i:
-// APPROX-NEXT:    [[__TAGP_ADDR_0_I31_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I37_I_I:%.*]], [[CLEANUP_I36_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I30_I_I_PREHEADER]] ]
-// APPROX-NEXT:    [[__R_0_I32_I_I:%.*]] = phi i64 [ [[__R_2_I_I_I:%.*]], [[CLEANUP_I36_I_I]] ], [ 0, [[WHILE_COND_I30_I_I_PREHEADER]] ]
-// APPROX-NEXT:    [[TMP2:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I31_I_I]], align 1, !tbaa [[TBAA4]]
-// APPROX-NEXT:    [[CMP_NOT_I33_I_I:%.*]] = icmp eq i8 [[TMP2]], 0
-// APPROX-NEXT:    br i1 [[CMP_NOT_I33_I_I]], label [[_ZL3NANPKC_EXIT:%.*]], label [[WHILE_BODY_I34_I_I:%.*]]
-// APPROX:       while.body.i34.i.i:
-// APPROX-NEXT:    [[TMP3:%.*]] = add i8 [[TMP2]], -48
-// APPROX-NEXT:    [[OR_COND_I35_I_I:%.*]] = icmp ult i8 [[TMP3]], 10
-// APPROX-NEXT:    br i1 [[OR_COND_I35_I_I]], label [[IF_END31_I_I_I:%.*]], label [[IF_ELSE_I_I_I:%.*]]
+// APPROX:       while.cond.i.i.i.preheader:
+// APPROX-NEXT:    [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    [[CMP_NOT_I_I_I14:%.*]] = icmp eq i8 [[TMP3]], 0
+// APPROX-NEXT:    br i1 [[CMP_NOT_I_I_I14]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]]
+// APPROX:       if.then5.i.i:
+// APPROX-NEXT:    [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    [[CMP_NOT_I30_I_I9:%.*]] = icmp eq i8 [[TMP4]], 0
+// APPROX-NEXT:    br i1 [[CMP_NOT_I30_I_I9]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I31_I_I:%.*]]
+// APPROX:       while.body.i31.i.i:
+// APPROX-NEXT:    [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], [[IF_END31_I_I_I:%.*]] ], [ [[TMP4]], [[IF_THEN5_I_I]] ]
+// APPROX-NEXT:    [[__R_0_I29_I_I11:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], [[IF_END31_I_I_I]] ], [ 0, [[IF_THEN5_I_I]] ]
+// APPROX-NEXT:    [[__TAGP_ADDR_0_I28_I_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I_I:%.*]], [[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN5_I_I]] ]
+// APPROX-NEXT:    [[TMP6:%.*]] = add i8 [[TMP5]], -48
+// APPROX-NEXT:    [[OR_COND_I32_I_I:%.*]] = icmp ult i8 [[TMP6]], 10
+// APPROX-NEXT:    br i1 [[OR_COND_I32_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE_I_I_I:%.*]]
 // APPROX:       if.else.i.i.i:
-// APPROX-NEXT:    [[TMP4:%.*]] = add i8 [[TMP2]], -97
-// APPROX-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP4]], 6
+// APPROX-NEXT:    [[TMP7:%.*]] = add i8 [[TMP5]], -97
+// APPROX-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP7]], 6
 // APPROX-NEXT:    br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]]
 // APPROX:       if.else17.i.i.i:
-// APPROX-NEXT:    [[TMP5:%.*]] = add i8 [[TMP2]], -65
-// APPROX-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP5]], 6
-// APPROX-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[CLEANUP_I36_I_I]]
+// APPROX-NEXT:    [[TMP8:%.*]] = add i8 [[TMP5]], -65
+// APPROX-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP8]], 6
+// APPROX-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[_ZL3NANPKC_EXIT]]
 // APPROX:       if.end31.i.i.i:
-// APPROX-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I34_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ]
-// APPROX-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I32_I_I]], 4
-// APPROX-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64
+// APPROX-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I31_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ]
+// APPROX-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I29_I_I11]], 4
+// APPROX-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP5]] to i64
 // APPROX-NEXT:    [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]]
-// APPROX-NEXT:    [[ADD28_I_I_I:%.*]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
-// APPROX-NEXT:    [[INCDEC_PTR_I40_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I31_I_I]], i64 1
-// APPROX-NEXT:    br label [[CLEANUP_I36_I_I]]
-// APPROX:       cleanup.i36.i.i:
-// APPROX-NEXT:    [[__TAGP_ADDR_1_I37_I_I]] = phi ptr [ [[INCDEC_PTR_I40_I_I]], [[IF_END31_I_I_I]] ], [ [[__TAGP_ADDR_0_I31_I_I]], [[IF_ELSE17_I_I_I]] ]
-// APPROX-NEXT:    [[__R_2_I_I_I]] = phi i64 [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ [[__R_0_I32_I_I]], [[IF_ELSE17_I_I_I]] ]
-// APPROX-NEXT:    [[COND_I_I_I:%.*]] = phi i1 [ true, [[IF_END31_I_I_I]] ], [ false, [[IF_ELSE17_I_I_I]] ]
-// APPROX-NEXT:    br i1 [[COND_I_I_I]], label [[WHILE_COND_I30_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP11]]
-// APPROX:       while.cond.i.i.i:
-// APPROX-NEXT:    [[__TAGP_ADDR_0_I_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I_I_I:%.*]], [[CLEANUP_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN_I_I]] ]
-// APPROX-NEXT:    [[__R_0_I_I_I:%.*]] = phi i64 [ [[__R_1_I_I_I:%.*]], [[CLEANUP_I_I_I]] ], [ 0, [[IF_THEN_I_I]] ]
-// APPROX-NEXT:    [[TMP6:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[TBAA4]]
-// APPROX-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP6]], 0
-// APPROX-NEXT:    br i1 [[CMP_NOT_I_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]]
+// APPROX-NEXT:    [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
+// APPROX-NEXT:    [[INCDEC_PTR_I34_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I28_I_I10]], i64 1
+// APPROX-NEXT:    [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    [[CMP_NOT_I30_I_I:%.*]] = icmp eq i8 [[TMP9]], 0
+// APPROX-NEXT:    br i1 [[CMP_NOT_I30_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]]
 // APPROX:       while.body.i.i.i:
-// APPROX-NEXT:    [[TMP7:%.*]] = and i8 [[TMP6]], -8
-// APPROX-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP7]], 48
-// APPROX-NEXT:    br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I:%.*]], label [[CLEANUP_I_I_I]]
+// APPROX-NEXT:    [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], [[IF_THEN_I_I_I:%.*]] ], [ [[TMP3]], [[WHILE_COND_I_I_I_PREHEADER]] ]
+// APPROX-NEXT:    [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ]
+// APPROX-NEXT:    [[__TAGP_ADDR_0_I_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I_I_I_PREHEADER]] ]
+// APPROX-NEXT:    [[TMP11:%.*]] = and i8 [[TMP10]], -8
+// APPROX-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP11]], 48
+// APPROX-NEXT:    br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I]], label [[_ZL3NANPKC_EXIT]]
 // APPROX:       if.then.i.i.i:
-// APPROX-NEXT:    [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I]], 3
-// APPROX-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64
+// APPROX-NEXT:    [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I16]], 3
+// APPROX-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP10]] to i64
 // APPROX-NEXT:    [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48
-// APPROX-NEXT:    [[SUB_I_I_I:%.*]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
-// APPROX-NEXT:    [[INCDEC_PTR_I_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I]], i64 1
-// APPROX-NEXT:    br label [[CLEANUP_I_I_I]]
-// APPROX:       cleanup.i.i.i:
-// APPROX-NEXT:    [[__TAGP_ADDR_1_I_I_I]] = phi ptr [ [[INCDEC_PTR_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__TAGP_ADDR_0_I_I_I]], [[WHILE_BODY_I_I_I]] ]
-// APPROX-NEXT:    [[__R_1_I_I_I]] = phi i64 [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_BODY_I_I_I]] ]
-// APPROX-NEXT:    br i1 [[OR_COND_I_I_I]], label [[WHILE_COND_I_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP7]]
-// APPROX:       while.cond.i14.i.i:
-// APPROX-NEXT:    [[__TAGP_ADDR_0_I15_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I21_I_I:%.*]], [[CLEANUP_I20_I_I:%.*]] ], [ [[TAG]], [[ENTRY:%.*]] ]
-// APPROX-NEXT:    [[__R_0_I16_I_I:%.*]] = phi i64 [ [[__R_1_I22_I_I:%.*]], [[CLEANUP_I20_I_I]] ], [ 0, [[ENTRY]] ]
-// APPROX-NEXT:    [[TMP8:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[TBAA4]]
-// APPROX-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP8]], 0
-// APPROX-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I18_I_I:%.*]]
+// APPROX-NEXT:    [[SUB_I_I_I]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
+// APPROX-NEXT:    [[INCDEC_PTR_I_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I15]], i64 1
+// APPROX-NEXT:    [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP12]], 0
+// APPROX-NEXT:    br i1 [[CMP_NOT_I_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]]
 // APPROX:       while.body.i18.i.i:
-// APPROX-NEXT:    [[TMP9:%.*]] = add i8 [[TMP8]], -48
-// APPROX-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP9]], 10
-// APPROX-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I24_I_I:%.*]], label [[CLEANUP_I20_I_I]]
-// APPROX:       if.then.i24.i.i:
-// APPROX-NEXT:    [[MUL_I25_I_I:%.*]] = mul i64 [[__R_0_I16_I_I]], 10
-// APPROX-NEXT:    [[CONV5_I26_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64
-// APPROX-NEXT:    [[ADD_I27_I_I:%.*]] = add i64 [[MUL_I25_I_I]], -48
-// APPROX-NEXT:    [[SUB_I28_I_I:%.*]] = add i64 [[ADD_I27_I_I]], [[CONV5_I26_I_I]]
-// APPROX-NEXT:    [[INCDEC_PTR_I29_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I]], i64 1
-// APPROX-NEXT:    br label [[CLEANUP_I20_I_I]]
-// APPROX:       cleanup.i20.i.i:
-// APPROX-NEXT:    [[__TAGP_ADDR_1_I21_I_I]] = phi ptr [ [[INCDEC_PTR_I29_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__TAGP_ADDR_0_I15_I_I]], [[WHILE_BODY_I18_I_I]] ]
-// APPROX-NEXT:    [[__R_1_I22_I_I]] = phi i64 [ [[SUB_I28_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_BODY_I18_I_I]] ]
-// APPROX-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[WHILE_COND_I14_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP10]]
+// APPROX-NEXT:    [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], [[IF_THEN_I21_I_I:%.*]] ], [ [[TMP1]], [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// APPROX-NEXT:    [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// APPROX-NEXT:    [[__TAGP_ADDR_0_I15_I_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ [[TAG]], [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// APPROX-NEXT:    [[TMP14:%.*]] = add i8 [[TMP13]], -48
+// APPROX-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP14]], 10
+// APPROX-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I21_I_I]], label [[_ZL3NANPKC_EXIT]]
+// APPROX:       if.then.i21.i.i:
+// APPROX-NEXT:    [[MUL_I22_I_I:%.*]] = mul i64 [[__R_0_I16_I_I7]], 10
+// APPROX-NEXT:    [[CONV5_I23_I_I:%.*]] = zext nneg i8 [[TMP13]] to i64
+// APPROX-NEXT:    [[ADD_I24_I_I:%.*]] = add i64 [[MUL_I22_I_I]], -48
+// APPROX-NEXT:    [[SUB_I25_I_I]] = add i64 [[ADD_I24_I_I]], [[CONV5_I23_I_I]]
+// APPROX-NEXT:    [[INCDEC_PTR_I26_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I6]], i64 1
+// APPROX-NEXT:    [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[TBAA4]]
+// APPROX-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP15]], 0
+// APPROX-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]]
 // APPROX:       _ZL3nanPKc.exit:
-// APPROX-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[CLEANUP_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_COND_I_I_I]] ], [ 0, [[CLEANUP_I36_I_I]] ], [ [[__R_0_I32_I_I]], [[WHILE_COND_I30_I_I]] ], [ 0, [[CLEANUP_I20_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_COND_I14_I_I]] ]
+// APPROX-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, [[IF_THEN5_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ 0, [[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ]
 // APPROX-NEXT:    [[BF_VALUE_I:%.*]] = and i64 [[RETVAL_0_I_I]], 2251799813685247
 // APPROX-NEXT:    [[BF_SET9_I:%.*]] = or disjoint i64 [[BF_VALUE_I]], 9221120237041090560
-// APPROX-NEXT:    [[TMP10:%.*]] = bitcast i64 [[BF_SET9_I]] to double
-// APPROX-NEXT:    ret double [[TMP10]]
+// APPROX-NEXT:    [[TMP16:%.*]] = bitcast i64 [[BF_SET9_I]] to double
+// APPROX-NEXT:    ret double [[TMP16]]
 //
 // NCRDIV-LABEL: @test_nan(
 // NCRDIV-NEXT:  entry:
 // NCRDIV-NEXT:    [[TMP0:%.*]] = load i8, ptr [[TAG:%.*]], align 1, !tbaa [[TBAA4]]
 // NCRDIV-NEXT:    [[CMP_I_I:%.*]] = icmp eq i8 [[TMP0]], 48
-// NCRDIV-NEXT:    br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I:%.*]]
+// NCRDIV-NEXT:    br i1 [[CMP_I_I]], label [[IF_THEN_I_I:%.*]], label [[WHILE_COND_I14_I_I_PREHEADER:%.*]]
+// NCRDIV:       while.cond.i14.i.i.preheader:
+// NCRDIV-NEXT:    [[TMP1:%.*]] = load i8, ptr [[TAG]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    [[CMP_NOT_I17_I_I5:%.*]] = icmp eq i8 [[TMP1]], 0
+// NCRDIV-NEXT:    br i1 [[CMP_NOT_I17_I_I5]], label [[_ZL3NANPKC_EXIT:%.*]], label [[WHILE_BODY_I18_I_I:%.*]]
 // NCRDIV:       if.then.i.i:
 // NCRDIV-NEXT:    [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[TAG]], i64 1
-// NCRDIV-NEXT:    [[TMP1:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
-// NCRDIV-NEXT:    switch i8 [[TMP1]], label [[WHILE_COND_I_I_I:%.*]] [
-// NCRDIV-NEXT:      i8 120, label [[WHILE_COND_I30_I_I_PREHEADER:%.*]]
-// NCRDIV-NEXT:      i8 88, label [[WHILE_COND_I30_I_I_PREHEADER]]
+// NCRDIV-NEXT:    [[TMP2:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    switch i8 [[TMP2]], label [[WHILE_COND_I_I_I_PREHEADER:%.*]] [
+// NCRDIV-NEXT:      i8 120, label [[IF_THEN5_I_I:%.*]]
+// NCRDIV-NEXT:      i8 88, label [[IF_THEN5_I_I]]
 // NCRDIV-NEXT:    ]
-// NCRDIV:       while.cond.i30.i.i.preheader:
-// NCRDIV-NEXT:    br label [[WHILE_COND_I30_I_I:%.*]]
-// NCRDIV:       while.cond.i30.i.i:
-// NCRDIV-NEXT:    [[__TAGP_ADDR_0_I31_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I37_I_I:%.*]], [[CLEANUP_I36_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I30_I_I_PREHEADER]] ]
-// NCRDIV-NEXT:    [[__R_0_I32_I_I:%.*]] = phi i64 [ [[__R_2_I_I_I:%.*]], [[CLEANUP_I36_I_I]] ], [ 0, [[WHILE_COND_I30_I_I_PREHEADER]] ]
-// NCRDIV-NEXT:    [[TMP2:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I31_I_I]], align 1, !tbaa [[TBAA4]]
-// NCRDIV-NEXT:    [[CMP_NOT_I33_I_I:%.*]] = icmp eq i8 [[TMP2]], 0
-// NCRDIV-NEXT:    br i1 [[CMP_NOT_I33_I_I]], label [[_ZL3NANPKC_EXIT:%.*]], label [[WHILE_BODY_I34_I_I:%.*]]
-// NCRDIV:       while.body.i34.i.i:
-// NCRDIV-NEXT:    [[TMP3:%.*]] = add i8 [[TMP2]], -48
-// NCRDIV-NEXT:    [[OR_COND_I35_I_I:%.*]] = icmp ult i8 [[TMP3]], 10
-// NCRDIV-NEXT:    br i1 [[OR_COND_I35_I_I]], label [[IF_END31_I_I_I:%.*]], label [[IF_ELSE_I_I_I:%.*]]
+// NCRDIV:       while.cond.i.i.i.preheader:
+// NCRDIV-NEXT:    [[TMP3:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    [[CMP_NOT_I_I_I14:%.*]] = icmp eq i8 [[TMP3]], 0
+// NCRDIV-NEXT:    br i1 [[CMP_NOT_I_I_I14]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]]
+// NCRDIV:       if.then5.i.i:
+// NCRDIV-NEXT:    [[TMP4:%.*]] = load i8, ptr [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    [[CMP_NOT_I30_I_I9:%.*]] = icmp eq i8 [[TMP4]], 0
+// NCRDIV-NEXT:    br i1 [[CMP_NOT_I30_I_I9]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I31_I_I:%.*]]
+// NCRDIV:       while.body.i31.i.i:
+// NCRDIV-NEXT:    [[TMP5:%.*]] = phi i8 [ [[TMP9:%.*]], [[IF_END31_I_I_I:%.*]] ], [ [[TMP4]], [[IF_THEN5_I_I]] ]
+// NCRDIV-NEXT:    [[__R_0_I29_I_I11:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], [[IF_END31_I_I_I]] ], [ 0, [[IF_THEN5_I_I]] ]
+// NCRDIV-NEXT:    [[__TAGP_ADDR_0_I28_I_I10:%.*]] = phi ptr [ [[INCDEC_PTR_I34_I_I:%.*]], [[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN5_I_I]] ]
+// NCRDIV-NEXT:    [[TMP6:%.*]] = add i8 [[TMP5]], -48
+// NCRDIV-NEXT:    [[OR_COND_I32_I_I:%.*]] = icmp ult i8 [[TMP6]], 10
+// NCRDIV-NEXT:    br i1 [[OR_COND_I32_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE_I_I_I:%.*]]
 // NCRDIV:       if.else.i.i.i:
-// NCRDIV-NEXT:    [[TMP4:%.*]] = add i8 [[TMP2]], -97
-// NCRDIV-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP4]], 6
+// NCRDIV-NEXT:    [[TMP7:%.*]] = add i8 [[TMP5]], -97
+// NCRDIV-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP7]], 6
 // NCRDIV-NEXT:    br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]]
 // NCRDIV:       if.else17.i.i.i:
-// NCRDIV-NEXT:    [[TMP5:%.*]] = add i8 [[TMP2]], -65
-// NCRDIV-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP5]], 6
-// NCRDIV-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[CLEANUP_I36_I_I]]
+// NCRDIV-NEXT:    [[TMP8:%.*]] = add i8 [[TMP5]], -65
+// NCRDIV-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP8]], 6
+// NCRDIV-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[_ZL3NANPKC_EXIT]]
 // NCRDIV:       if.end31.i.i.i:
-// NCRDIV-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I34_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ]
-// NCRDIV-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I32_I_I]], 4
-// NCRDIV-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64
+// NCRDIV-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I31_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ]
+// NCRDIV-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I29_I_I11]], 4
+// NCRDIV-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP5]] to i64
 // NCRDIV-NEXT:    [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]]
-// NCRDIV-NEXT:    [[ADD28_I_I_I:%.*]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
-// NCRDIV-NEXT:    [[INCDEC_PTR_I40_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I31_I_I]], i64 1
-// NCRDIV-NEXT:    br label [[CLEANUP_I36_I_I]]
-// NCRDIV:       cleanup.i36.i.i:
-// NCRDIV-NEXT:    [[__TAGP_ADDR_1_I37_I_I]] = phi ptr [ [[INCDEC_PTR_I40_I_I]], [[IF_END31_I_I_I]] ], [ [[__TAGP_ADDR_0_I31_I_I]], [[IF_ELSE17_I_I_I]] ]
-// NCRDIV-NEXT:    [[__R_2_I_I_I]] = phi i64 [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ [[__R_0_I32_I_I]], [[IF_ELSE17_I_I_I]] ]
-// NCRDIV-NEXT:    [[COND_I_I_I:%.*]] = phi i1 [ true, [[IF_END31_I_I_I]] ], [ false, [[IF_ELSE17_I_I_I]] ]
-// NCRDIV-NEXT:    br i1 [[COND_I_I_I]], label [[WHILE_COND_I30_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP11]]
-// NCRDIV:       while.cond.i.i.i:
-// NCRDIV-NEXT:    [[__TAGP_ADDR_0_I_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I_I_I:%.*]], [[CLEANUP_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN_I_I]] ]
-// NCRDIV-NEXT:    [[__R_0_I_I_I:%.*]] = phi i64 [ [[__R_1_I_I_I:%.*]], [[CLEANUP_I_I_I]] ], [ 0, [[IF_THEN_I_I]] ]
-// NCRDIV-NEXT:    [[TMP6:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[TBAA4]]
-// NCRDIV-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP6]], 0
-// NCRDIV-NEXT:    br i1 [[CMP_NOT_I_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I:%.*]]
+// NCRDIV-NEXT:    [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
+// NCRDIV-NEXT:    [[INCDEC_PTR_I34_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I28_I_I10]], i64 1
+// NCRDIV-NEXT:    [[TMP9]] = load i8, ptr [[INCDEC_PTR_I34_I_I]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    [[CMP_NOT_I30_I_I:%.*]] = icmp eq i8 [[TMP9]], 0
+// NCRDIV-NEXT:    br i1 [[CMP_NOT_I30_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I31_I_I]], !llvm.loop [[LOOP11]]
 // NCRDIV:       while.body.i.i.i:
-// NCRDIV-NEXT:    [[TMP7:%.*]] = and i8 [[TMP6]], -8
-// NCRDIV-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP7]], 48
-// NCRDIV-NEXT:    br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I:%.*]], label [[CLEANUP_I_I_I]]
+// NCRDIV-NEXT:    [[TMP10:%.*]] = phi i8 [ [[TMP12:%.*]], [[IF_THEN_I_I_I:%.*]] ], [ [[TMP3]], [[WHILE_COND_I_I_I_PREHEADER]] ]
+// NCRDIV-NEXT:    [[__R_0_I_I_I16:%.*]] = phi i64 [ [[SUB_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ]
+// NCRDIV-NEXT:    [[__TAGP_ADDR_0_I_I_I15:%.*]] = phi ptr [ [[INCDEC_PTR_I_I_I:%.*]], [[IF_THEN_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I_I_I_PREHEADER]] ]
+// NCRDIV-NEXT:    [[TMP11:%.*]] = and i8 [[TMP10]], -8
+// NCRDIV-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP11]], 48
+// NCRDIV-NEXT:    br i1 [[OR_COND_I_I_I]], label [[IF_THEN_I_I_I]], label [[_ZL3NANPKC_EXIT]]
 // NCRDIV:       if.then.i.i.i:
-// NCRDIV-NEXT:    [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I]], 3
-// NCRDIV-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64
+// NCRDIV-NEXT:    [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I16]], 3
+// NCRDIV-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP10]] to i64
 // NCRDIV-NEXT:    [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48
-// NCRDIV-NEXT:    [[SUB_I_I_I:%.*]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
-// NCRDIV-NEXT:    [[INCDEC_PTR_I_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I]], i64 1
-// NCRDIV-NEXT:    br label [[CLEANUP_I_I_I]]
-// NCRDIV:       cleanup.i.i.i:
-// NCRDIV-NEXT:    [[__TAGP_ADDR_1_I_I_I]] = phi ptr [ [[INCDEC_PTR_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__TAGP_ADDR_0_I_I_I]], [[WHILE_BODY_I_I_I]] ]
-// NCRDIV-NEXT:    [[__R_1_I_I_I]] = phi i64 [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_BODY_I_I_I]] ]
-// NCRDIV-NEXT:    br i1 [[OR_COND_I_I_I]], label [[WHILE_COND_I_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP7]]
-// NCRDIV:       while.cond.i14.i.i:
-// NCRDIV-NEXT:    [[__TAGP_ADDR_0_I15_I_I:%.*]] = phi ptr [ [[__TAGP_ADDR_1_I21_I_I:%.*]], [[CLEANUP_I20_I_I:%.*]] ], [ [[TAG]], [[ENTRY:%.*]] ]
-// NCRDIV-NEXT:    [[__R_0_I16_I_I:%.*]] = phi i64 [ [[__R_1_I22_I_I:%.*]], [[CLEANUP_I20_I_I]] ], [ 0, [[ENTRY]] ]
-// NCRDIV-NEXT:    [[TMP8:%.*]] = load i8, ptr [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[TBAA4]]
-// NCRDIV-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP8]], 0
-// NCRDIV-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I18_I_I:%.*]]
+// NCRDIV-NEXT:    [[SUB_I_I_I]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
+// NCRDIV-NEXT:    [[INCDEC_PTR_I_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I_I_I15]], i64 1
+// NCRDIV-NEXT:    [[TMP12]] = load i8, ptr [[INCDEC_PTR_I_I_I]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP12]], 0
+// NCRDIV-NEXT:    br i1 [[CMP_NOT_I_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I]], !llvm.loop [[LOOP7]]
 // NCRDIV:       while.body.i18.i.i:
-// NCRDIV-NEXT:    [[TMP9:%.*]] = add i8 [[TMP8]], -48
-// NCRDIV-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP9]], 10
-// NCRDIV-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I24_I_I:%.*]], label [[CLEANUP_I20_I_I]]
-// NCRDIV:       if.then.i24.i.i:
-// NCRDIV-NEXT:    [[MUL_I25_I_I:%.*]] = mul i64 [[__R_0_I16_I_I]], 10
-// NCRDIV-NEXT:    [[CONV5_I26_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64
-// NCRDIV-NEXT:    [[ADD_I27_I_I:%.*]] = add i64 [[MUL_I25_I_I]], -48
-// NCRDIV-NEXT:    [[SUB_I28_I_I:%.*]] = add i64 [[ADD_I27_I_I]], [[CONV5_I26_I_I]]
-// NCRDIV-NEXT:    [[INCDEC_PTR_I29_I_I:%.*]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I]], i64 1
-// NCRDIV-NEXT:    br label [[CLEANUP_I20_I_I]]
-// NCRDIV:       cleanup.i20.i.i:
-// NCRDIV-NEXT:    [[__TAGP_ADDR_1_I21_I_I]] = phi ptr [ [[INCDEC_PTR_I29_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__TAGP_ADDR_0_I15_I_I]], [[WHILE_BODY_I18_I_I]] ]
-// NCRDIV-NEXT:    [[__R_1_I22_I_I]] = phi i64 [ [[SUB_I28_I_I]], [[IF_THEN_I24_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_BODY_I18_I_I]] ]
-// NCRDIV-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[WHILE_COND_I14_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP10]]
+// NCRDIV-NEXT:    [[TMP13:%.*]] = phi i8 [ [[TMP15:%.*]], [[IF_THEN_I21_I_I:%.*]] ], [ [[TMP1]], [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// NCRDIV-NEXT:    [[__R_0_I16_I_I7:%.*]] = phi i64 [ [[SUB_I25_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// NCRDIV-NEXT:    [[__TAGP_ADDR_0_I15_I_I6:%.*]] = phi ptr [ [[INCDEC_PTR_I26_I_I:%.*]], [[IF_THEN_I21_I_I]] ], [ [[TAG]], [[WHILE_COND_I14_I_I_PREHEADER]] ]
+// NCRDIV-NEXT:    [[TMP14:%.*]] = add i8 [[TMP13]], -48
+// NCRDIV-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP14]], 10
+// NCRDIV-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[IF_THEN_I21_I_I]], label [[_ZL3NANPKC_EXIT]]
+// NCRDIV:       if.then.i21.i.i:
+// NCRDIV-NEXT:    [[MUL_I22_I_I:%.*]] = mul i64 [[__R_0_I16_I_I7]], 10
+// NCRDIV-NEXT:    [[CONV5_I23_I_I:%.*]] = zext nneg i8 [[TMP13]] to i64
+// NCRDIV-NEXT:    [[ADD_I24_I_I:%.*]] = add i64 [[MUL_I22_I_I]], -48
+// NCRDIV-NEXT:    [[SUB_I25_I_I]] = add i64 [[ADD_I24_I_I]], [[CONV5_I23_I_I]]
+// NCRDIV-NEXT:    [[INCDEC_PTR_I26_I_I]] = getelementptr inbounds nuw i8, ptr [[__TAGP_ADDR_0_I15_I_I6]], i64 1
+// NCRDIV-NEXT:    [[TMP15]] = load i8, ptr [[INCDEC_PTR_I26_I_I]], align 1, !tbaa [[TBAA4]]
+// NCRDIV-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP15]], 0
+// NCRDIV-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I18_I_I]], !llvm.loop [[LOOP10]]
 // NCRDIV:       _ZL3nanPKc.exit:
-// NCRDIV-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[CLEANUP_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_COND_I_I_I]] ], [ 0, [[CLEANUP_I36_I_I]] ], [ [[__R_0_I32_I_I]], [[WHILE_COND_I30_I_I]] ], [ 0, [[CLEANUP_I20_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_COND_I14_I_I]] ]
+// NCRDIV-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[WHILE_COND_I_I_I_PREHEADER]] ], [ 0, [[IF_THEN5_I_I]] ], [ 0, [[WHILE_COND_I14_I_I_PREHEADER]] ], [ [[SUB_I_I_I]], [[IF_THEN_I_I_I]] ], [ 0, [[WHILE_BODY_I_I_I]] ], [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ 0, [[IF_ELSE17_I_I_I]] ], [ [[SUB_I25_I_I]], [[IF_THEN_I21_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ]
 // NCRDIV-NEXT:    [[BF_VALUE_I:%.*]] = and i64 [[RETVAL_0_I_I]], 2251799813685247
 // NCRDIV-NEXT:    [[BF_SET9_I:%.*]] = or disjoint i64 [[BF_VALUE_I]], 9221120237041090560
-// NCRDIV-NEXT:    [[TMP10:%.*]] = bitcast i64 [[BF_SET9_I]] to double
-// NCRDIV-NEXT:    ret double [[TMP10]]
+// NCRDIV-NEXT:    [[TMP16:%.*]] = bitcast i64 [[BF_SET9_I]] to double
+// NCRDIV-NEXT:    ret double [[TMP16]]
 //
 // AMDGCNSPIRV-LABEL: @test_nan(
 // AMDGCNSPIRV-NEXT:  entry:
@@ -4442,53 +4378,49 @@ extern "C" __device__ float test_nanf(const char *tag) {
 // AMDGCNSPIRV-NEXT:    [[INCDEC_PTR_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[TAG]], i64 1
 // AMDGCNSPIRV-NEXT:    [[TMP1:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA5]]
 // AMDGCNSPIRV-NEXT:    switch i8 [[TMP1]], label [[WHILE_COND_I_I_I:%.*]] [
-// AMDGCNSPIRV-NEXT:      i8 120, label [[WHILE_COND_I28_I_I_PREHEADER:%.*]]
-// AMDGCNSPIRV-NEXT:      i8 88, label [[WHILE_COND_I28_I_I_PREHEADER]]
+// AMDGCNSPIRV-NEXT:      i8 120, label [[IF_THEN5_I_I:%.*]]
+// AMDGCNSPIRV-NEXT:      i8 88, label [[IF_THEN5_I_I]]
 // AMDGCNSPIRV-NEXT:    ]
-// AMDGCNSPIRV:       while.cond.i28.i.i.preheader:
-// AMDGCNSPIRV-NEXT:    br label [[WHILE_COND_I28_I_I:%.*]]
-// AMDGCNSPIRV:       while.cond.i28.i.i:
-// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I29_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I34_I_I:%.*]], [[CLEANUP_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[WHILE_COND_I28_I_I_PREHEADER]] ]
-// AMDGCNSPIRV-NEXT:    [[__R_0_I30_I_I:%.*]] = phi i64 [ [[__R_2_I_I_I:%.*]], [[CLEANUP_I_I_I]] ], [ 0, [[WHILE_COND_I28_I_I_PREHEADER]] ]
-// AMDGCNSPIRV-NEXT:    [[TMP2:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I_I]], align 1, !tbaa [[TBAA5]]
-// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I31_I_I:%.*]] = icmp eq i8 [[TMP2]], 0
-// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I31_I_I]], label [[_ZL3NANPKC_EXIT:%.*]], label [[WHILE_BODY_I32_I_I:%.*]]
+// AMDGCNSPIRV:       if.then5.i.i:
+// AMDGCNSPIRV-NEXT:    [[TMP2:%.*]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I31_I_I5:%.*]] = icmp eq i8 [[TMP2]], 0
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I31_I_I5]], label [[_ZL3NANPKC_EXIT:%.*]], label [[WHILE_BODY_I32_I_I:%.*]]
 // AMDGCNSPIRV:       while.body.i32.i.i:
-// AMDGCNSPIRV-NEXT:    [[TMP3:%.*]] = add i8 [[TMP2]], -48
-// AMDGCNSPIRV-NEXT:    [[OR_COND_I33_I_I:%.*]] = icmp ult i8 [[TMP3]], 10
-// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I33_I_I]], label [[IF_END31_I_I_I:%.*]], label [[IF_ELSE_I_I_I:%.*]]
+// AMDGCNSPIRV-NEXT:    [[TMP3:%.*]] = phi i8 [ [[TMP7:%.*]], [[IF_END31_I_I_I:%.*]] ], [ [[TMP2]], [[IF_THEN5_I_I]] ]
+// AMDGCNSPIRV-NEXT:    [[__R_0_I30_I_I7:%.*]] = phi i64 [ [[ADD28_I_I_I:%.*]], [[IF_END31_I_I_I]] ], [ 0, [[IF_THEN5_I_I]] ]
+// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I29_I_I6:%.*]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I36_I_I:%.*]], [[IF_END31_I_I_I]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN5_I_I]] ]
+// AMDGCNSPIRV-NEXT:    [[TMP4:%.*]] = add i8 [[TMP3]], -48
+// AMDGCNSPIRV-NEXT:    [[OR_COND_I33_I_I:%.*]] = icmp ult i8 [[TMP4]], 10
+// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I33_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE_I_I_I:%.*]]
 // AMDGCNSPIRV:       if.else.i.i.i:
-// AMDGCNSPIRV-NEXT:    [[TMP4:%.*]] = add i8 [[TMP2]], -97
-// AMDGCNSPIRV-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP4]], 6
+// AMDGCNSPIRV-NEXT:    [[TMP5:%.*]] = add i8 [[TMP3]], -97
+// AMDGCNSPIRV-NEXT:    [[OR_COND33_I_I_I:%.*]] = icmp ult i8 [[TMP5]], 6
 // AMDGCNSPIRV-NEXT:    br i1 [[OR_COND33_I_I_I]], label [[IF_END31_I_I_I]], label [[IF_ELSE17_I_I_I:%.*]]
 // AMDGCNSPIRV:       if.else17.i.i.i:
-// AMDGCNSPIRV-NEXT:    [[TMP5:%.*]] = add i8 [[TMP2]], -65
-// AMDGCNSPIRV-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP5]], 6
-// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[CLEANUP_I_I_I]]
+// AMDGCNSPIRV-NEXT:    [[TMP6:%.*]] = add i8 [[TMP3]], -65
+// AMDGCNSPIRV-NEXT:    [[OR_COND34_I_I_I:%.*]] = icmp ult i8 [[TMP6]], 6
+// AMDGCNSPIRV-NEXT:    br i1 [[OR_COND34_I_I_I]], label [[IF_END31_I_I_I]], label [[_ZL3NANPKC_EXIT]]
 // AMDGCNSPIRV:       if.end31.i.i.i:
 // AMDGCNSPIRV-NEXT:    [[DOTSINK:%.*]] = phi i64 [ -48, [[WHILE_BODY_I32_I_I]] ], [ -87, [[IF_ELSE_I_I_I]] ], [ -55, [[IF_ELSE17_I_I_I]] ]
-// AMDGCNSPIRV-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I30_I_I]], 4
-// AMDGCNSPIRV-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP2]] to i64
+// AMDGCNSPIRV-NEXT:    [[MUL24_I_I_I:%.*]] = shl i64 [[__R_0_I30_I_I7]], 4
+// AMDGCNSPIRV-NEXT:    [[CONV25_I_I_I:%.*]] = zext nneg i8 [[TMP3]] to i64
 // AMDGCNSPIRV-NEXT:    [[ADD26_I_I_I:%.*]] = add i64 [[MUL24_I_I_I]], [[DOTSINK]]
-// AMDGCNSPIRV-NEXT:    [[ADD28_I_I_I:%.*]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
-// AMDGCNSPIRV-NEXT:    [[INCDEC_PTR_I37_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I_I]], i64 1
-// AMDGCNSPIRV-NEXT:    br label [[CLEANUP_I_I_I]]
-// AMDGCNSPIRV:       cleanup.i.i.i:
-// AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_1_I34_I_I]] = phi ptr addrspace(4) [ [[INCDEC_PTR_I37_I_I]], [[IF_END31_I_I_I]] ], [ [[__TAGP_ADDR_0_I29_I_I]], [[IF_ELSE17_I_I_I]] ]
-// AMDGCNSPIRV-NEXT:    [[__R_2_I_I_I]] = phi i64 [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ [[__R_0_I30_I_I]], [[IF_ELSE17_I_I_I]] ]
-// AMDGCNSPIRV-NEXT:    [[COND_I_I_I:%.*]] = phi i1 [ true, [[IF_END31_I_I_I]] ], [ false, [[IF_ELSE17_I_I_I]] ]
-// AMDGCNSPIRV-NEXT:    br i1 [[COND_I_I_I]], label [[WHILE_COND_I28_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP12]]
+// AMDGCNSPIRV-NEXT:    [[ADD28_I_I_I]] = add i64 [[ADD26_I_I_I]], [[CONV25_I_I_I]]
+// AMDGCNSPIRV-NEXT:    [[INCDEC_PTR_I36_I_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__TAGP_ADDR_0_I29_I_I6]], i64 1
+// AMDGCNSPIRV-NEXT:    [[TMP7]] = load i8, ptr addrspace(4) [[INCDEC_PTR_I36_I_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I31_I_I:%.*]] = icmp eq i8 [[TMP7]], 0
+// AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I31_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I32_I_I]], !llvm.loop [[LOOP12]]
 // AMDGCNSPIRV:       while.cond.i.i.i:
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I_I_I:%.*]], [[WHILE_BODY_I_I_I:%.*]] ], [ [[INCDEC_PTR_I_I]], [[IF_THEN_I_I]] ]
 // AMDGCNSPIRV-NEXT:    [[__R_0_I_I_I:%.*]] = phi i64 [ [[__R_1_I_I_I:%.*]], [[WHILE_BODY_I_I_I]] ], [ 0, [[IF_THEN_I_I]] ]
-// AMDGCNSPIRV-NEXT:    [[TMP6:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[TBAA5]]
-// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP6]], 0
+// AMDGCNSPIRV-NEXT:    [[TMP8:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I_I_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I_I_I:%.*]] = icmp eq i8 [[TMP8]], 0
 // AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I_I_I]]
 // AMDGCNSPIRV:       while.body.i.i.i:
-// AMDGCNSPIRV-NEXT:    [[TMP7:%.*]] = and i8 [[TMP6]], -8
-// AMDGCNSPIRV-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP7]], 48
+// AMDGCNSPIRV-NEXT:    [[TMP9:%.*]] = and i8 [[TMP8]], -8
+// AMDGCNSPIRV-NEXT:    [[OR_COND_I_I_I:%.*]] = icmp eq i8 [[TMP9]], 48
 // AMDGCNSPIRV-NEXT:    [[MUL_I_I_I:%.*]] = shl i64 [[__R_0_I_I_I]], 3
-// AMDGCNSPIRV-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP6]] to i64
+// AMDGCNSPIRV-NEXT:    [[CONV5_I_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64
 // AMDGCNSPIRV-NEXT:    [[ADD_I_I_I:%.*]] = add i64 [[MUL_I_I_I]], -48
 // AMDGCNSPIRV-NEXT:    [[SUB_I_I_I:%.*]] = add i64 [[ADD_I_I_I]], [[CONV5_I_I_I]]
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_1_I_I_I_IDX:%.*]] = zext i1 [[OR_COND_I_I_I]] to i64
@@ -4498,14 +4430,14 @@ extern "C" __device__ float test_nanf(const char *tag) {
 // AMDGCNSPIRV:       while.cond.i14.i.i:
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_0_I15_I_I:%.*]] = phi ptr addrspace(4) [ [[__TAGP_ADDR_1_I25_I_I:%.*]], [[WHILE_BODY_I18_I_I:%.*]] ], [ [[TAG]], [[ENTRY:%.*]] ]
 // AMDGCNSPIRV-NEXT:    [[__R_0_I16_I_I:%.*]] = phi i64 [ [[__R_1_I26_I_I:%.*]], [[WHILE_BODY_I18_I_I]] ], [ 0, [[ENTRY]] ]
-// AMDGCNSPIRV-NEXT:    [[TMP8:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[TBAA5]]
-// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP8]], 0
+// AMDGCNSPIRV-NEXT:    [[TMP10:%.*]] = load i8, ptr addrspace(4) [[__TAGP_ADDR_0_I15_I_I]], align 1, !tbaa [[TBAA5]]
+// AMDGCNSPIRV-NEXT:    [[CMP_NOT_I17_I_I:%.*]] = icmp eq i8 [[TMP10]], 0
 // AMDGCNSPIRV-NEXT:    br i1 [[CMP_NOT_I17_I_I]], label [[_ZL3NANPKC_EXIT]], label [[WHILE_BODY_I18_I_I]]
 // AMDGCNSPIRV:       while.body.i18.i.i:
-// AMDGCNSPIRV-NEXT:    [[TMP9:%.*]] = add i8 [[TMP8]], -48
-// AMDGCNSPIRV-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP9]], 10
+// AMDGCNSPIRV-NEXT:    [[TMP11:%.*]] = add i8 [[TMP10]], -48
+// AMDGCNSPIRV-NEXT:    [[OR_COND_I19_I_I:%.*]] = icmp ult i8 [[TMP11]], 10
 // AMDGCNSPIRV-NEXT:    [[MUL_I20_I_I:%.*]] = mul i64 [[__R_0_I16_I_I]], 10
-// AMDGCNSPIRV-NEXT:    [[CONV5_I21_I_I:%.*]] = zext nneg i8 [[TMP8]] to i64
+// AMDGCNSPIRV-NEXT:    [[CONV5_I21_I_I:%.*]] = zext nneg i8 [[TMP10]] to i64
 // AMDGCNSPIRV-NEXT:    [[ADD_I22_I_I:%.*]] = add i64 [[MUL_I20_I_I]], -48
 // AMDGCNSPIRV-NEXT:    [[SUB_I23_I_I:%.*]] = add i64 [[ADD_I22_I_I]], [[CONV5_I21_I_I]]
 // AMDGCNSPIRV-NEXT:    [[__TAGP_ADDR_1_I25_I_I_IDX:%.*]] = zext i1 [[OR_COND_I19_I_I]] to i64
@@ -4513,11 +4445,11 @@ extern "C" __device__ float test_nanf(const char *tag) {
 // AMDGCNSPIRV-NEXT:    [[__R_1_I26_I_I]] = select i1 [[OR_COND_I19_I_I]], i64 [[SUB_I23_I_I]], i64 [[__R_0_I16_I_I]]
 // AMDGCNSPIRV-NEXT:    br i1 [[OR_COND_I19_I_I]], label [[WHILE_COND_I14_I_I]], label [[_ZL3NANPKC_EXIT]], !llvm.loop [[LOOP11]]
 // AMDGCNSPIRV:       _ZL3nanPKc.exit:
-// AMDGCNSPIRV-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[WHILE_BODY_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_COND_I_I_I]] ], [ 0, [[CLEANUP_I_I_I]] ], [ [[__R_0_I30_I_I]], [[WHILE_COND_I28_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_COND_I14_I_I]] ]
+// AMDGCNSPIRV-NEXT:    [[RETVAL_0_I_I:%.*]] = phi i64 [ 0, [[IF_THEN5_I_I]] ], [ 0, [[WHILE_BODY_I_I_I]] ], [ [[__R_0_I_I_I]], [[WHILE_COND_I_I_I]] ], [ [[ADD28_I_I_I]], [[IF_END31_I_I_I]] ], [ 0, [[IF_ELSE17_I_I_I]] ], [ 0, [[WHILE_BODY_I18_I_I]] ], [ [[__R_0_I16_I_I]], [[WHILE_COND_I14_I_I]] ]
 // AMDGCNSPIRV-NEXT:    [[BF_VALUE_I:%.*]] = and i64 [[RETVAL_0_I_I]], 2251799813685247
 // AMDGCNSPIRV-NEXT:    [[BF_SET9_I:%.*]] = or disjoint i64 [[BF_VALUE_I]], 9221120237041090560
-// AMDGCNSPIRV-NEXT:    [[TMP10:%.*]] = bitcast i64 [[BF_SET9_I]] to double
-// AMDGCNSPIRV-NEXT:    ret double [[TMP10]]
+// AMDGCNSPIRV-NEXT:    [[TMP12:%.*]] = bitcast i64 [[BF_SET9_I]] to double
+// AMDGCNSPIRV-NEXT:    ret double [[TMP12]]
 //
 extern "C" __device__ double test_nan(const char *tag) {
   return nan(tag);
@@ -4679,22 +4611,22 @@ extern "C" __device__ double test_nearbyint(double x) {
 
 // DEFAULT-LABEL: @test_nextafterf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_nextafter_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_nextafter_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_nextafterf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_nextafter_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_nextafter_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_nextafterf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_nextafter_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_nextafter_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_nextafterf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_nextafter_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_nextafter_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_nextafterf(
@@ -4708,22 +4640,22 @@ extern "C" __device__ float test_nextafterf(float x, float y) {
 
 // DEFAULT-LABEL: @test_nextafter(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_nextafter_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_nextafter_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_nextafter(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_nextafter_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_nextafter_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_nextafter(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_nextafter_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_nextafter_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_nextafter(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_nextafter_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_nextafter_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_nextafter(
@@ -4737,22 +4669,22 @@ extern "C" __device__ double test_nextafter(double x, double y) {
 
 // DEFAULT-LABEL: @test_norm3df(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_norm3df(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_len3_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_len3_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_norm3df(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_norm3df(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_norm3df(
@@ -4766,22 +4698,22 @@ extern "C" __device__ float test_norm3df(float x, float y, float z) {
 
 // DEFAULT-LABEL: @test_norm3d(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_norm3d(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_len3_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_len3_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_norm3d(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_norm3d(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_norm3d(
@@ -4795,22 +4727,22 @@ extern "C" __device__ double test_norm3d(double x, double y, double z) {
 
 // DEFAULT-LABEL: @test_norm4df(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_norm4df(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_len4_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]], float noundef nofpclass(nan inf) [[W:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_len4_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]], float noundef nofpclass(nan inf) [[W:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_norm4df(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_norm4df(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_len4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_norm4df(
@@ -4824,22 +4756,22 @@ extern "C" __device__ float test_norm4df(float x, float y, float z, float w) {
 
 // DEFAULT-LABEL: @test_norm4d(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_norm4d(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_len4_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]], double noundef nofpclass(nan inf) [[W:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_len4_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]], double noundef nofpclass(nan inf) [[W:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_norm4d(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_norm4d(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_len4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_norm4d(
@@ -4853,22 +4785,22 @@ extern "C" __device__ double test_norm4d(double x, double y, double z, double w)
 
 // DEFAULT-LABEL: @test_normcdff(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdf_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdf_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_normcdff(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_ncdf_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_ncdf_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_normcdff(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdf_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdf_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_normcdff(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdf_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdf_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_normcdff(
@@ -4882,22 +4814,22 @@ extern "C" __device__ float test_normcdff(float x) {
 
 // DEFAULT-LABEL: @test_normcdf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdf_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdf_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_normcdf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_ncdf_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_ncdf_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_normcdf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdf_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdf_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_normcdf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdf_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdf_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_normcdf(
@@ -4911,22 +4843,22 @@ extern "C" __device__ double test_normcdf(double x) {
 
 // DEFAULT-LABEL: @test_normcdfinvf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdfinv_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdfinv_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_normcdfinvf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_ncdfinv_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_ncdfinv_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_normcdfinvf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdfinv_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdfinv_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_normcdfinvf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdfinv_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_ncdfinv_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_normcdfinvf(
@@ -4940,22 +4872,22 @@ extern "C" __device__ float test_normcdfinvf(float x) {
 
 // DEFAULT-LABEL: @test_normcdfinv(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdfinv_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdfinv_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_normcdfinv(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_ncdfinv_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_ncdfinv_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_normcdfinv(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdfinv_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdfinv_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_normcdfinv(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdfinv_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_ncdfinv_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_normcdfinv(
@@ -4981,11 +4913,13 @@ extern "C" __device__ double test_normcdfinv(double x) {
 // DEFAULT-NEXT:    [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]]
 // DEFAULT-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4
 // DEFAULT-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// DEFAULT-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP20:![0-9]+]]
+// DEFAULT-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP20:![0-9]+]]
+// DEFAULT:       _ZL5normfiPKf.exit.loopexit:
+// DEFAULT-NEXT:    [[TMP1:%.*]] = tail call contract float @llvm.sqrt.f32(float [[ADD_I]])
+// DEFAULT-NEXT:    br label [[_ZL5NORMFIPKF_EXIT]]
 // DEFAULT:       _ZL5normfiPKf.exit:
-// DEFAULT-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
-// DEFAULT-NEXT:    [[TMP1:%.*]] = tail call contract noundef float @llvm.sqrt.f32(float [[__R_0_I_LCSSA]])
-// DEFAULT-NEXT:    ret float [[TMP1]]
+// DEFAULT-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL5NORMFIPKF_EXIT_LOOPEXIT]] ]
+// DEFAULT-NEXT:    ret float [[__R_0_I_LCSSA]]
 //
 // FINITEONLY-LABEL: @test_normf(
 // FINITEONLY-NEXT:  entry:
@@ -5001,11 +4935,13 @@ extern "C" __device__ double test_normcdfinv(double x) {
 // FINITEONLY-NEXT:    [[ADD_I]] = fadd nnan ninf contract float [[__R_0_I4]], [[MUL_I]]
 // FINITEONLY-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4
 // FINITEONLY-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// FINITEONLY-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP20:![0-9]+]]
+// FINITEONLY-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP20:![0-9]+]]
+// FINITEONLY:       _ZL5normfiPKf.exit.loopexit:
+// FINITEONLY-NEXT:    [[TMP1:%.*]] = tail call nnan ninf contract float @llvm.sqrt.f32(float [[ADD_I]])
+// FINITEONLY-NEXT:    br label [[_ZL5NORMFIPKF_EXIT]]
 // FINITEONLY:       _ZL5normfiPKf.exit:
-// FINITEONLY-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
-// FINITEONLY-NEXT:    [[TMP1:%.*]] = tail call nnan ninf contract noundef float @llvm.sqrt.f32(float [[__R_0_I_LCSSA]])
-// FINITEONLY-NEXT:    ret float [[TMP1]]
+// FINITEONLY-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL5NORMFIPKF_EXIT_LOOPEXIT]] ]
+// FINITEONLY-NEXT:    ret float [[__R_0_I_LCSSA]]
 //
 // APPROX-LABEL: @test_normf(
 // APPROX-NEXT:  entry:
@@ -5021,11 +4957,13 @@ extern "C" __device__ double test_normcdfinv(double x) {
 // APPROX-NEXT:    [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]]
 // APPROX-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4
 // APPROX-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// APPROX-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP20:![0-9]+]]
+// APPROX-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP20:![0-9]+]]
+// APPROX:       _ZL5normfiPKf.exit.loopexit:
+// APPROX-NEXT:    [[TMP1:%.*]] = tail call contract float @llvm.sqrt.f32(float [[ADD_I]])
+// APPROX-NEXT:    br label [[_ZL5NORMFIPKF_EXIT]]
 // APPROX:       _ZL5normfiPKf.exit:
-// APPROX-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
-// APPROX-NEXT:    [[TMP1:%.*]] = tail call contract noundef float @llvm.sqrt.f32(float [[__R_0_I_LCSSA]])
-// APPROX-NEXT:    ret float [[TMP1]]
+// APPROX-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL5NORMFIPKF_EXIT_LOOPEXIT]] ]
+// APPROX-NEXT:    ret float [[__R_0_I_LCSSA]]
 //
 // NCRDIV-LABEL: @test_normf(
 // NCRDIV-NEXT:  entry:
@@ -5041,11 +4979,13 @@ extern "C" __device__ double test_normcdfinv(double x) {
 // NCRDIV-NEXT:    [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]]
 // NCRDIV-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4
 // NCRDIV-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// NCRDIV-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]]
+// NCRDIV-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]]
+// NCRDIV:       _ZL5normfiPKf.exit.loopexit:
+// NCRDIV-NEXT:    [[TMP1:%.*]] = tail call contract float @llvm.sqrt.f32(float [[ADD_I]])
+// NCRDIV-NEXT:    br label [[_ZL5NORMFIPKF_EXIT]]
 // NCRDIV:       _ZL5normfiPKf.exit:
-// NCRDIV-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
-// NCRDIV-NEXT:    [[TMP1:%.*]] = tail call contract noundef float @llvm.sqrt.f32(float [[__R_0_I_LCSSA]]), !fpmath [[META22:![0-9]+]]
-// NCRDIV-NEXT:    ret float [[TMP1]]
+// NCRDIV-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL5NORMFIPKF_EXIT_LOOPEXIT]] ]
+// NCRDIV-NEXT:    ret float [[__R_0_I_LCSSA]]
 //
 // AMDGCNSPIRV-LABEL: @test_normf(
 // AMDGCNSPIRV-NEXT:  entry:
@@ -5061,11 +5001,13 @@ extern "C" __device__ double test_normcdfinv(double x) {
 // AMDGCNSPIRV-NEXT:    [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]]
 // AMDGCNSPIRV-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__A_ADDR_0_I3]], i64 4
 // AMDGCNSPIRV-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// AMDGCNSPIRV-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]]
+// AMDGCNSPIRV-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5NORMFIPKF_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]]
+// AMDGCNSPIRV:       _ZL5normfiPKf.exit.loopexit:
+// AMDGCNSPIRV-NEXT:    [[TMP1:%.*]] = tail call contract addrspace(4) float @llvm.sqrt.f32(float [[ADD_I]])
+// AMDGCNSPIRV-NEXT:    br label [[_ZL5NORMFIPKF_EXIT]]
 // AMDGCNSPIRV:       _ZL5normfiPKf.exit:
-// AMDGCNSPIRV-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
-// AMDGCNSPIRV-NEXT:    [[TMP1:%.*]] = tail call contract noundef addrspace(4) float @llvm.sqrt.f32(float [[__R_0_I_LCSSA]])
-// AMDGCNSPIRV-NEXT:    ret float [[TMP1]]
+// AMDGCNSPIRV-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL5NORMFIPKF_EXIT_LOOPEXIT]] ]
+// AMDGCNSPIRV-NEXT:    ret float [[__R_0_I_LCSSA]]
 //
 extern "C" __device__ float test_normf(int x, const float *y) {
   return normf(x, y);
@@ -5085,11 +5027,13 @@ extern "C" __device__ float test_normf(int x, const float *y) {
 // DEFAULT-NEXT:    [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]]
 // DEFAULT-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8
 // DEFAULT-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// DEFAULT-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]]
+// DEFAULT-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]]
+// DEFAULT:       _ZL4normiPKd.exit.loopexit:
+// DEFAULT-NEXT:    [[TMP1:%.*]] = tail call contract double @llvm.sqrt.f64(double [[ADD_I]])
+// DEFAULT-NEXT:    br label [[_ZL4NORMIPKD_EXIT]]
 // DEFAULT:       _ZL4normiPKd.exit:
-// DEFAULT-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
-// DEFAULT-NEXT:    [[TMP1:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[__R_0_I_LCSSA]])
-// DEFAULT-NEXT:    ret double [[TMP1]]
+// DEFAULT-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL4NORMIPKD_EXIT_LOOPEXIT]] ]
+// DEFAULT-NEXT:    ret double [[__R_0_I_LCSSA]]
 //
 // FINITEONLY-LABEL: @test_norm(
 // FINITEONLY-NEXT:  entry:
@@ -5105,11 +5049,13 @@ extern "C" __device__ float test_normf(int x, const float *y) {
 // FINITEONLY-NEXT:    [[ADD_I]] = fadd nnan ninf contract double [[__R_0_I4]], [[MUL_I]]
 // FINITEONLY-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8
 // FINITEONLY-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// FINITEONLY-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]]
+// FINITEONLY-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]]
+// FINITEONLY:       _ZL4normiPKd.exit.loopexit:
+// FINITEONLY-NEXT:    [[TMP1:%.*]] = tail call nnan ninf contract double @llvm.sqrt.f64(double [[ADD_I]])
+// FINITEONLY-NEXT:    br label [[_ZL4NORMIPKD_EXIT]]
 // FINITEONLY:       _ZL4normiPKd.exit:
-// FINITEONLY-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
-// FINITEONLY-NEXT:    [[TMP1:%.*]] = tail call nnan ninf contract noundef double @llvm.sqrt.f64(double [[__R_0_I_LCSSA]])
-// FINITEONLY-NEXT:    ret double [[TMP1]]
+// FINITEONLY-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL4NORMIPKD_EXIT_LOOPEXIT]] ]
+// FINITEONLY-NEXT:    ret double [[__R_0_I_LCSSA]]
 //
 // APPROX-LABEL: @test_norm(
 // APPROX-NEXT:  entry:
@@ -5125,11 +5071,13 @@ extern "C" __device__ float test_normf(int x, const float *y) {
 // APPROX-NEXT:    [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]]
 // APPROX-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8
 // APPROX-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// APPROX-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]]
+// APPROX-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP21:![0-9]+]]
+// APPROX:       _ZL4normiPKd.exit.loopexit:
+// APPROX-NEXT:    [[TMP1:%.*]] = tail call contract double @llvm.sqrt.f64(double [[ADD_I]])
+// APPROX-NEXT:    br label [[_ZL4NORMIPKD_EXIT]]
 // APPROX:       _ZL4normiPKd.exit:
-// APPROX-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
-// APPROX-NEXT:    [[TMP1:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[__R_0_I_LCSSA]])
-// APPROX-NEXT:    ret double [[TMP1]]
+// APPROX-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL4NORMIPKD_EXIT_LOOPEXIT]] ]
+// APPROX-NEXT:    ret double [[__R_0_I_LCSSA]]
 //
 // NCRDIV-LABEL: @test_norm(
 // NCRDIV-NEXT:  entry:
@@ -5145,11 +5093,13 @@ extern "C" __device__ float test_normf(int x, const float *y) {
 // NCRDIV-NEXT:    [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]]
 // NCRDIV-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8
 // NCRDIV-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// NCRDIV-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]]
+// NCRDIV-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]]
+// NCRDIV:       _ZL4normiPKd.exit.loopexit:
+// NCRDIV-NEXT:    [[TMP1:%.*]] = tail call contract double @llvm.sqrt.f64(double [[ADD_I]])
+// NCRDIV-NEXT:    br label [[_ZL4NORMIPKD_EXIT]]
 // NCRDIV:       _ZL4normiPKd.exit:
-// NCRDIV-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
-// NCRDIV-NEXT:    [[TMP1:%.*]] = tail call contract noundef double @llvm.sqrt.f64(double [[__R_0_I_LCSSA]])
-// NCRDIV-NEXT:    ret double [[TMP1]]
+// NCRDIV-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL4NORMIPKD_EXIT_LOOPEXIT]] ]
+// NCRDIV-NEXT:    ret double [[__R_0_I_LCSSA]]
 //
 // AMDGCNSPIRV-LABEL: @test_norm(
 // AMDGCNSPIRV-NEXT:  entry:
@@ -5165,11 +5115,13 @@ extern "C" __device__ float test_normf(int x, const float *y) {
 // AMDGCNSPIRV-NEXT:    [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]]
 // AMDGCNSPIRV-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[__A_ADDR_0_I3]], i64 8
 // AMDGCNSPIRV-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// AMDGCNSPIRV-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]]
+// AMDGCNSPIRV-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL4NORMIPKD_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]]
+// AMDGCNSPIRV:       _ZL4normiPKd.exit.loopexit:
+// AMDGCNSPIRV-NEXT:    [[TMP1:%.*]] = tail call contract addrspace(4) double @llvm.sqrt.f64(double [[ADD_I]])
+// AMDGCNSPIRV-NEXT:    br label [[_ZL4NORMIPKD_EXIT]]
 // AMDGCNSPIRV:       _ZL4normiPKd.exit:
-// AMDGCNSPIRV-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
-// AMDGCNSPIRV-NEXT:    [[TMP1:%.*]] = tail call contract noundef addrspace(4) double @llvm.sqrt.f64(double [[__R_0_I_LCSSA]])
-// AMDGCNSPIRV-NEXT:    ret double [[TMP1]]
+// AMDGCNSPIRV-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[_ZL4NORMIPKD_EXIT_LOOPEXIT]] ]
+// AMDGCNSPIRV-NEXT:    ret double [[__R_0_I_LCSSA]]
 //
 extern "C" __device__ double test_norm(int x, const double *y) {
   return norm(x, y);
@@ -5177,22 +5129,22 @@ extern "C" __device__ double test_norm(int x, const double *y) {
 
 // DEFAULT-LABEL: @test_powf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_powf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_pow_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_pow_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_powf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_powf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_powf(
@@ -5206,22 +5158,22 @@ extern "C" __device__ float test_powf(float x, float y) {
 
 // DEFAULT-LABEL: @test_pow(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pow_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pow_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_pow(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_pow_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_pow_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_pow(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pow_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pow_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_pow(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pow_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pow_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_pow(
@@ -5235,22 +5187,22 @@ extern "C" __device__ double test_pow(double x, double y) {
 
 // DEFAULT-LABEL: @test_powif(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pown_f32(float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pown_f32(float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_powif(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_pown_f32(float noundef nofpclass(nan inf) [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_pown_f32(float noundef nofpclass(nan inf) [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_powif(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pown_f32(float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pown_f32(float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_powif(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pown_f32(float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pown_f32(float noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_powif(
@@ -5264,22 +5216,22 @@ extern "C" __device__ float test_powif(float x, int y) {
 
 // DEFAULT-LABEL: @test_powi(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pown_f64(double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pown_f64(double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_powi(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_pown_f64(double noundef nofpclass(nan inf) [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_pown_f64(double noundef nofpclass(nan inf) [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_powi(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pown_f64(double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pown_f64(double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_powi(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pown_f64(double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_pown_f64(double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_powi(
@@ -5293,22 +5245,22 @@ extern "C" __device__ double test_powi(double x, int y) {
 
 // DEFAULT-LABEL: @test_rcbrtf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rcbrt_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rcbrt_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_rcbrtf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rcbrt_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rcbrt_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_rcbrtf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rcbrt_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rcbrt_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_rcbrtf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rcbrt_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rcbrt_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_rcbrtf(
@@ -5322,22 +5274,22 @@ extern "C" __device__ float test_rcbrtf(float x) {
 
 // DEFAULT-LABEL: @test_rcbrt(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rcbrt_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rcbrt_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_rcbrt(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rcbrt_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rcbrt_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_rcbrt(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rcbrt_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rcbrt_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_rcbrt(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rcbrt_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rcbrt_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_rcbrt(
@@ -5351,22 +5303,22 @@ extern "C" __device__ double test_rcbrt(double x) {
 
 // DEFAULT-LABEL: @test_remainderf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_remainder_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_remainder_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_remainderf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_remainder_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_remainder_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_remainderf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_remainder_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_remainder_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_remainderf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_remainder_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_remainder_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_remainderf(
@@ -5380,22 +5332,22 @@ extern "C" __device__ float test_remainderf(float x, float y) {
 
 // DEFAULT-LABEL: @test_remainder(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_remainder_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_remainder_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_remainder(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_remainder_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_remainder_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_remainder(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_remainder_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_remainder_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_remainder(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_remainder_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_remainder_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_remainder(
@@ -5410,41 +5362,41 @@ extern "C" __device__ double test_remainder(double x, double y) {
 // DEFAULT-LABEL: @test_remquof(
 // DEFAULT-NEXT:  entry:
 // DEFAULT-NEXT:    [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5)
-// DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract noundef float @__ocml_remquo_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract noundef float @__ocml_remquo_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // DEFAULT-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA12]]
 // DEFAULT-NEXT:    store i32 [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA12]]
-// DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_remquof(
 // FINITEONLY-NEXT:  entry:
 // FINITEONLY-NEXT:    [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5)
-// FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_remquo_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_remquo_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA12]]
 // FINITEONLY-NEXT:    store i32 [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA12]]
-// FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_remquof(
 // APPROX-NEXT:  entry:
 // APPROX-NEXT:    [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5)
-// APPROX-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// APPROX-NEXT:    [[CALL_I:%.*]] = call contract noundef float @__ocml_remquo_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// APPROX-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = call contract noundef float @__ocml_remquo_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // APPROX-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA12]]
 // APPROX-NEXT:    store i32 [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA12]]
-// APPROX-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// APPROX-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_remquof(
 // NCRDIV-NEXT:  entry:
 // NCRDIV-NEXT:    [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5)
-// NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract noundef float @__ocml_remquo_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract noundef float @__ocml_remquo_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // NCRDIV-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA13]]
 // NCRDIV-NEXT:    store i32 [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA13]]
-// NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_remquof(
@@ -5465,41 +5417,41 @@ extern "C" __device__ float test_remquof(float x, float y, int* z) {
 // DEFAULT-LABEL: @test_remquo(
 // DEFAULT-NEXT:  entry:
 // DEFAULT-NEXT:    [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5)
-// DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract noundef double @__ocml_remquo_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract noundef double @__ocml_remquo_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // DEFAULT-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA12]]
 // DEFAULT-NEXT:    store i32 [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA12]]
-// DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_remquo(
 // FINITEONLY-NEXT:  entry:
 // FINITEONLY-NEXT:    [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5)
-// FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_remquo_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_remquo_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA12]]
 // FINITEONLY-NEXT:    store i32 [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA12]]
-// FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_remquo(
 // APPROX-NEXT:  entry:
 // APPROX-NEXT:    [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5)
-// APPROX-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// APPROX-NEXT:    [[CALL_I:%.*]] = call contract noundef double @__ocml_remquo_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// APPROX-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = call contract noundef double @__ocml_remquo_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // APPROX-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA12]]
 // APPROX-NEXT:    store i32 [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA12]]
-// APPROX-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// APPROX-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_remquo(
 // NCRDIV-NEXT:  entry:
 // NCRDIV-NEXT:    [[__TMP_I:%.*]] = alloca i32, align 4, addrspace(5)
-// NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract noundef double @__ocml_remquo_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract noundef double @__ocml_remquo_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // NCRDIV-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA13]]
 // NCRDIV-NEXT:    store i32 [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA13]]
-// NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_remquo(
@@ -5519,22 +5471,22 @@ extern "C" __device__ double test_remquo(double x, double y, int* z) {
 
 // DEFAULT-LABEL: @test_rhypotf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rhypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rhypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_rhypotf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rhypot_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rhypot_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_rhypotf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rhypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rhypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_rhypotf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rhypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rhypot_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_rhypotf(
@@ -5548,22 +5500,22 @@ extern "C" __device__ float test_rhypotf(float x, float y) {
 
 // DEFAULT-LABEL: @test_rhypot(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rhypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rhypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_rhypot(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rhypot_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rhypot_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_rhypot(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rhypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rhypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_rhypot(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rhypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rhypot_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_rhypot(
@@ -5650,7 +5602,7 @@ extern "C" __device__ double test_rint(double x) {
 // DEFAULT-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL6RNORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]]
 // DEFAULT:       _ZL6rnormfiPKf.exit:
 // DEFAULT-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[__R_0_I_LCSSA]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[__R_0_I_LCSSA]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_rnormf(
@@ -5670,7 +5622,7 @@ extern "C" __device__ double test_rint(double x) {
 // FINITEONLY-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL6RNORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]]
 // FINITEONLY:       _ZL6rnormfiPKf.exit:
 // FINITEONLY-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rsqrt_f32(float noundef nofpclass(nan inf) [[__R_0_I_LCSSA]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rsqrt_f32(float noundef nofpclass(nan inf) [[__R_0_I_LCSSA]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_rnormf(
@@ -5690,7 +5642,7 @@ extern "C" __device__ double test_rint(double x) {
 // APPROX-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL6RNORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP22:![0-9]+]]
 // APPROX:       _ZL6rnormfiPKf.exit:
 // APPROX-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[__R_0_I_LCSSA]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[__R_0_I_LCSSA]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_rnormf(
@@ -5707,10 +5659,10 @@ extern "C" __device__ double test_rint(double x) {
 // NCRDIV-NEXT:    [[ADD_I]] = fadd contract float [[__R_0_I4]], [[MUL_I]]
 // NCRDIV-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 4
 // NCRDIV-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// NCRDIV-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL6RNORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP24:![0-9]+]]
+// NCRDIV-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL6RNORMFIPKF_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]]
 // NCRDIV:       _ZL6rnormfiPKf.exit:
 // NCRDIV-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[__R_0_I_LCSSA]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[__R_0_I_LCSSA]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_rnormf(
@@ -5754,7 +5706,7 @@ extern "C" __device__ float test_rnormf(int x, const float* y) {
 // DEFAULT-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5RNORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]]
 // DEFAULT:       _ZL5rnormiPKd.exit:
 // DEFAULT-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[__R_0_I_LCSSA]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[__R_0_I_LCSSA]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_rnorm(
@@ -5774,7 +5726,7 @@ extern "C" __device__ float test_rnormf(int x, const float* y) {
 // FINITEONLY-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5RNORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]]
 // FINITEONLY:       _ZL5rnormiPKd.exit:
 // FINITEONLY-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rsqrt_f64(double noundef nofpclass(nan inf) [[__R_0_I_LCSSA]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rsqrt_f64(double noundef nofpclass(nan inf) [[__R_0_I_LCSSA]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_rnorm(
@@ -5794,7 +5746,7 @@ extern "C" __device__ float test_rnormf(int x, const float* y) {
 // APPROX-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5RNORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP23:![0-9]+]]
 // APPROX:       _ZL5rnormiPKd.exit:
 // APPROX-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[__R_0_I_LCSSA]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[__R_0_I_LCSSA]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_rnorm(
@@ -5811,10 +5763,10 @@ extern "C" __device__ float test_rnormf(int x, const float* y) {
 // NCRDIV-NEXT:    [[ADD_I]] = fadd contract double [[__R_0_I4]], [[MUL_I]]
 // NCRDIV-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__A_ADDR_0_I3]], i64 8
 // NCRDIV-NEXT:    [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[DEC_I]], 0
-// NCRDIV-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5RNORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP25:![0-9]+]]
+// NCRDIV-NEXT:    br i1 [[TOBOOL_NOT_I]], label [[_ZL5RNORMIPKD_EXIT]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP24:![0-9]+]]
 // NCRDIV:       _ZL5rnormiPKd.exit:
 // NCRDIV-NEXT:    [[__R_0_I_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I]], [[WHILE_BODY_I]] ]
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[__R_0_I_LCSSA]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[__R_0_I_LCSSA]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_rnorm(
@@ -5843,22 +5795,22 @@ extern "C" __device__ double test_rnorm(int x, const double* y) {
 
 // DEFAULT-LABEL: @test_rnorm3df(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_rnorm3df(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rlen3_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rlen3_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_rnorm3df(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_rnorm3df(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen3_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_rnorm3df(
@@ -5872,22 +5824,22 @@ extern "C" __device__ float test_rnorm3df(float x, float y, float z) {
 
 // DEFAULT-LABEL: @test_rnorm3d(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_rnorm3d(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rlen3_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rlen3_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_rnorm3d(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_rnorm3d(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen3_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_rnorm3d(
@@ -5901,22 +5853,22 @@ extern "C" __device__ double test_rnorm3d(double x, double y, double z) {
 
 // DEFAULT-LABEL: @test_rnorm4df(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_rnorm4df(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rlen4_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]], float noundef nofpclass(nan inf) [[W:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rlen4_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]], float noundef nofpclass(nan inf) [[Z:%.*]], float noundef nofpclass(nan inf) [[W:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_rnorm4df(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_rnorm4df(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rlen4_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]], float noundef [[Z:%.*]], float noundef [[W:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_rnorm4df(
@@ -5930,22 +5882,22 @@ extern "C" __device__ float test_rnorm4df(float x, float y, float z, float w) {
 
 // DEFAULT-LABEL: @test_rnorm4d(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_rnorm4d(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rlen4_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]], double noundef nofpclass(nan inf) [[W:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rlen4_f64(double noundef nofpclass(nan inf) [[X:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]], double noundef nofpclass(nan inf) [[W:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_rnorm4d(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_rnorm4d(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rlen4_f64(double noundef [[X:%.*]], double noundef [[Y:%.*]], double noundef [[Z:%.*]], double noundef [[W:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_rnorm4d(
@@ -6017,22 +5969,22 @@ extern "C" __device__ double test_round(double x) {
 
 // DEFAULT-LABEL: @test_rsqrtf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_rsqrtf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rsqrt_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_rsqrt_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_rsqrtf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_rsqrtf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_rsqrt_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_rsqrtf(
@@ -6046,22 +5998,22 @@ extern "C" __device__ float test_rsqrtf(float x) {
 
 // DEFAULT-LABEL: @test_rsqrt(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_rsqrt(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rsqrt_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_rsqrt_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_rsqrt(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_rsqrt(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_rsqrt_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_rsqrt(
@@ -6246,45 +6198,45 @@ extern "C" __device__ BOOL_TYPE test___signbit(double x) {
 // DEFAULT-LABEL: @test_sincosf(
 // DEFAULT-NEXT:  entry:
 // DEFAULT-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5)
-// DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract float @__ocml_sincos_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract float @__ocml_sincos_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // DEFAULT-NEXT:    store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]]
 // DEFAULT-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA16]]
 // DEFAULT-NEXT:    store float [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]]
-// DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // DEFAULT-NEXT:    ret void
 //
 // FINITEONLY-LABEL: @test_sincosf(
 // FINITEONLY-NEXT:  entry:
 // FINITEONLY-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5)
-// FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) float @__ocml_sincos_f32(float noundef nofpclass(nan inf) [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) float @__ocml_sincos_f32(float noundef nofpclass(nan inf) [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]]
 // FINITEONLY-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA16]]
 // FINITEONLY-NEXT:    store float [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]]
-// FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // FINITEONLY-NEXT:    ret void
 //
 // APPROX-LABEL: @test_sincosf(
 // APPROX-NEXT:  entry:
 // APPROX-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5)
-// APPROX-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// APPROX-NEXT:    [[CALL_I:%.*]] = call contract float @__ocml_sincos_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// APPROX-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = call contract float @__ocml_sincos_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // APPROX-NEXT:    store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]]
 // APPROX-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA16]]
 // APPROX-NEXT:    store float [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]]
-// APPROX-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// APPROX-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // APPROX-NEXT:    ret void
 //
 // NCRDIV-LABEL: @test_sincosf(
 // NCRDIV-NEXT:  entry:
 // NCRDIV-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5)
-// NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract float @__ocml_sincos_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract float @__ocml_sincos_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // NCRDIV-NEXT:    store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA17]]
 // NCRDIV-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA17]]
 // NCRDIV-NEXT:    store float [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA17]]
-// NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // NCRDIV-NEXT:    ret void
 //
 // AMDGCNSPIRV-LABEL: @test_sincosf(
@@ -6306,45 +6258,45 @@ extern "C" __device__ void test_sincosf(float x, float *y, float *z) {
 // DEFAULT-LABEL: @test_sincos(
 // DEFAULT-NEXT:  entry:
 // DEFAULT-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5)
-// DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract double @__ocml_sincos_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract double @__ocml_sincos_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // DEFAULT-NEXT:    store double [[CALL_I]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA18]]
 // DEFAULT-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA18]]
 // DEFAULT-NEXT:    store double [[TMP0]], ptr [[Z:%.*]], align 8, !tbaa [[TBAA18]]
-// DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // DEFAULT-NEXT:    ret void
 //
 // FINITEONLY-LABEL: @test_sincos(
 // FINITEONLY-NEXT:  entry:
 // FINITEONLY-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5)
-// FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) double @__ocml_sincos_f64(double noundef nofpclass(nan inf) [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) double @__ocml_sincos_f64(double noundef nofpclass(nan inf) [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    store double [[CALL_I]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA18]]
 // FINITEONLY-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA18]]
 // FINITEONLY-NEXT:    store double [[TMP0]], ptr [[Z:%.*]], align 8, !tbaa [[TBAA18]]
-// FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // FINITEONLY-NEXT:    ret void
 //
 // APPROX-LABEL: @test_sincos(
 // APPROX-NEXT:  entry:
 // APPROX-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5)
-// APPROX-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// APPROX-NEXT:    [[CALL_I:%.*]] = call contract double @__ocml_sincos_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// APPROX-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = call contract double @__ocml_sincos_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // APPROX-NEXT:    store double [[CALL_I]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA18]]
 // APPROX-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA18]]
 // APPROX-NEXT:    store double [[TMP0]], ptr [[Z:%.*]], align 8, !tbaa [[TBAA18]]
-// APPROX-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// APPROX-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // APPROX-NEXT:    ret void
 //
 // NCRDIV-LABEL: @test_sincos(
 // NCRDIV-NEXT:  entry:
 // NCRDIV-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5)
-// NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract double @__ocml_sincos_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract double @__ocml_sincos_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // NCRDIV-NEXT:    store double [[CALL_I]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA19]]
 // NCRDIV-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA19]]
 // NCRDIV-NEXT:    store double [[TMP0]], ptr [[Z:%.*]], align 8, !tbaa [[TBAA19]]
-// NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // NCRDIV-NEXT:    ret void
 //
 // AMDGCNSPIRV-LABEL: @test_sincos(
@@ -6366,45 +6318,45 @@ extern "C" __device__ void test_sincos(double x, double *y, double *z) {
 // DEFAULT-LABEL: @test_sincospif(
 // DEFAULT-NEXT:  entry:
 // DEFAULT-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5)
-// DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract float @__ocml_sincospi_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract float @__ocml_sincospi_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // DEFAULT-NEXT:    store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]]
 // DEFAULT-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA16]]
 // DEFAULT-NEXT:    store float [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]]
-// DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // DEFAULT-NEXT:    ret void
 //
 // FINITEONLY-LABEL: @test_sincospif(
 // FINITEONLY-NEXT:  entry:
 // FINITEONLY-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5)
-// FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) float @__ocml_sincospi_f32(float noundef nofpclass(nan inf) [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) float @__ocml_sincospi_f32(float noundef nofpclass(nan inf) [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]]
 // FINITEONLY-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA16]]
 // FINITEONLY-NEXT:    store float [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]]
-// FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // FINITEONLY-NEXT:    ret void
 //
 // APPROX-LABEL: @test_sincospif(
 // APPROX-NEXT:  entry:
 // APPROX-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5)
-// APPROX-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// APPROX-NEXT:    [[CALL_I:%.*]] = call contract float @__ocml_sincospi_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// APPROX-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = call contract float @__ocml_sincospi_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // APPROX-NEXT:    store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]]
 // APPROX-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA16]]
 // APPROX-NEXT:    store float [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]]
-// APPROX-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// APPROX-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // APPROX-NEXT:    ret void
 //
 // NCRDIV-LABEL: @test_sincospif(
 // NCRDIV-NEXT:  entry:
 // NCRDIV-NEXT:    [[__TMP_I:%.*]] = alloca float, align 4, addrspace(5)
-// NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract float @__ocml_sincospi_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract float @__ocml_sincospi_f32(float noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // NCRDIV-NEXT:    store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA17]]
 // NCRDIV-NEXT:    [[TMP0:%.*]] = load float, ptr addrspace(5) [[__TMP_I]], align 4, !tbaa [[TBAA17]]
 // NCRDIV-NEXT:    store float [[TMP0]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA17]]
-// NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // NCRDIV-NEXT:    ret void
 //
 // AMDGCNSPIRV-LABEL: @test_sincospif(
@@ -6426,45 +6378,45 @@ extern "C" __device__ void test_sincospif(float x, float *y, float *z) {
 // DEFAULT-LABEL: @test_sincospi(
 // DEFAULT-NEXT:  entry:
 // DEFAULT-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5)
-// DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract double @__ocml_sincospi_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// DEFAULT-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = call contract double @__ocml_sincospi_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // DEFAULT-NEXT:    store double [[CALL_I]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA18]]
 // DEFAULT-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA18]]
 // DEFAULT-NEXT:    store double [[TMP0]], ptr [[Z:%.*]], align 8, !tbaa [[TBAA18]]
-// DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// DEFAULT-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // DEFAULT-NEXT:    ret void
 //
 // FINITEONLY-LABEL: @test_sincospi(
 // FINITEONLY-NEXT:  entry:
 // FINITEONLY-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5)
-// FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) double @__ocml_sincospi_f64(double noundef nofpclass(nan inf) [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = call nnan ninf contract nofpclass(nan inf) double @__ocml_sincospi_f64(double noundef nofpclass(nan inf) [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    store double [[CALL_I]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA18]]
 // FINITEONLY-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA18]]
 // FINITEONLY-NEXT:    store double [[TMP0]], ptr [[Z:%.*]], align 8, !tbaa [[TBAA18]]
-// FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// FINITEONLY-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // FINITEONLY-NEXT:    ret void
 //
 // APPROX-LABEL: @test_sincospi(
 // APPROX-NEXT:  entry:
 // APPROX-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5)
-// APPROX-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// APPROX-NEXT:    [[CALL_I:%.*]] = call contract double @__ocml_sincospi_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// APPROX-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = call contract double @__ocml_sincospi_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // APPROX-NEXT:    store double [[CALL_I]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA18]]
 // APPROX-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA18]]
 // APPROX-NEXT:    store double [[TMP0]], ptr [[Z:%.*]], align 8, !tbaa [[TBAA18]]
-// APPROX-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// APPROX-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // APPROX-NEXT:    ret void
 //
 // NCRDIV-LABEL: @test_sincospi(
 // NCRDIV-NEXT:  entry:
 // NCRDIV-NEXT:    [[__TMP_I:%.*]] = alloca double, align 8, addrspace(5)
-// NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract double @__ocml_sincospi_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR14]]
+// NCRDIV-NEXT:    call void @llvm.lifetime.start.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = call contract double @__ocml_sincospi_f64(double noundef [[X:%.*]], ptr addrspace(5) noundef [[__TMP_I]]) #[[ATTR16]]
 // NCRDIV-NEXT:    store double [[CALL_I]], ptr [[Y:%.*]], align 8, !tbaa [[TBAA19]]
 // NCRDIV-NEXT:    [[TMP0:%.*]] = load double, ptr addrspace(5) [[__TMP_I]], align 8, !tbaa [[TBAA19]]
 // NCRDIV-NEXT:    store double [[TMP0]], ptr [[Z:%.*]], align 8, !tbaa [[TBAA19]]
-// NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR15]]
+// NCRDIV-NEXT:    call void @llvm.lifetime.end.p5(i64 8, ptr addrspace(5) [[__TMP_I]]) #[[ATTR17]]
 // NCRDIV-NEXT:    ret void
 //
 // AMDGCNSPIRV-LABEL: @test_sincospi(
@@ -6485,22 +6437,22 @@ extern "C" __device__ void test_sincospi(double x, double *y, double *z) {
 
 // DEFAULT-LABEL: @test_sinf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_sinf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_sin_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_sin_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_sinf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I1:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I1:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I1]]
 //
 // NCRDIV-LABEL: @test_sinf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_sinf(
@@ -6514,22 +6466,22 @@ extern "C" __device__ float test_sinf(float x) {
 
 // DEFAULT-LABEL: @test_sin(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sin_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sin_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_sin(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_sin_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_sin_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_sin(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sin_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sin_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_sin(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sin_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sin_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_sin(
@@ -6543,22 +6495,22 @@ extern "C" __device__ double test_sin(double x) {
 
 // DEFAULT-LABEL: @test_sinpif(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sinpi_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sinpi_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_sinpif(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_sinpi_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_sinpi_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_sinpif(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sinpi_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sinpi_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_sinpif(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sinpi_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_sinpi_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_sinpif(
@@ -6572,22 +6524,22 @@ extern "C" __device__ float test_sinpif(float x) {
 
 // DEFAULT-LABEL: @test_sinpi(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sinpi_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sinpi_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_sinpi(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_sinpi_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_sinpi_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_sinpi(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sinpi_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sinpi_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_sinpi(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sinpi_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_sinpi_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_sinpi(
@@ -6616,7 +6568,7 @@ extern "C" __device__ double test_sinpi(double x) {
 //
 // NCRDIV-LABEL: @test_sqrtf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.sqrt.f32(float [[X:%.*]]), !fpmath [[META22]]
+// NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract noundef float @llvm.sqrt.f32(float [[X:%.*]]), !fpmath [[META25:![0-9]+]]
 // NCRDIV-NEXT:    ret float [[TMP0]]
 //
 // AMDGCNSPIRV-LABEL: @test_sqrtf(
@@ -6659,22 +6611,22 @@ extern "C" __device__ double test_sqrt(double x) {
 
 // DEFAULT-LABEL: @test_tanf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tan_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tan_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_tanf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_tan_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_tan_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_tanf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tan_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tan_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_tanf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tan_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tan_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_tanf(
@@ -6688,22 +6640,22 @@ extern "C" __device__ float test_tanf(float x) {
 
 // DEFAULT-LABEL: @test_tan(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tan_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tan_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_tan(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_tan_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_tan_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_tan(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tan_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tan_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_tan(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tan_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tan_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_tan(
@@ -6717,22 +6669,22 @@ extern "C" __device__ double test_tan(double x) {
 
 // DEFAULT-LABEL: @test_tanhf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tanh_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tanh_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_tanhf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_tanh_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_tanh_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_tanhf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tanh_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tanh_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_tanhf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tanh_f32(float noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tanh_f32(float noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_tanhf(
@@ -6746,22 +6698,22 @@ extern "C" __device__ float test_tanhf(float x) {
 
 // DEFAULT-LABEL: @test_tanh(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tanh_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tanh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_tanh(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_tanh_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_tanh_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_tanh(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tanh_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tanh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_tanh(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tanh_f64(double noundef [[X:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tanh_f64(double noundef [[X:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_tanh(
@@ -6775,22 +6727,22 @@ extern "C" __device__ double test_tanh(double x) {
 
 // DEFAULT-LABEL: @test_tgammaf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tgamma_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tgamma_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_tgammaf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_tgamma_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_tgamma_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_tgammaf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tgamma_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tgamma_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_tgammaf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tgamma_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_tgamma_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_tgammaf(
@@ -6804,22 +6756,22 @@ extern "C" __device__ float test_tgammaf(float x) {
 
 // DEFAULT-LABEL: @test_tgamma(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tgamma_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tgamma_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_tgamma(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_tgamma_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_tgamma_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_tgamma(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tgamma_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tgamma_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_tgamma(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tgamma_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_tgamma_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_tgamma(
@@ -6891,22 +6843,22 @@ extern "C" __device__ double test_trunc(double x) {
 
 // DEFAULT-LABEL: @test_y0f(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_y0f(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y0_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y0_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_y0f(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_y0f(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_y0f(
@@ -6920,22 +6872,22 @@ extern "C" __device__ float test_y0f(float x) {
 
 // DEFAULT-LABEL: @test_y0(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_y0(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y0_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y0_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_y0(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_y0(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_y0(
@@ -6949,22 +6901,22 @@ extern "C" __device__ double test_y0(double x) {
 
 // DEFAULT-LABEL: @test_y1f(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_y1f(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y1_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y1_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test_y1f(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_y1f(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_y1f(
@@ -6978,22 +6930,22 @@ extern "C" __device__ float test_y1f(float x) {
 
 // DEFAULT-LABEL: @test_y1(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret double [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test_y1(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y1_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y1_f64(double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret double [[CALL_I]]
 //
 // APPROX-LABEL: @test_y1(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret double [[CALL_I]]
 //
 // NCRDIV-LABEL: @test_y1(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret double [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test_y1(
@@ -7012,14 +6964,14 @@ extern "C" __device__ double test_y1(double x) {
 // DEFAULT-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
 // DEFAULT-NEXT:    ]
 // DEFAULT:       if.then.i:
-// DEFAULT-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    br label [[_ZL3YNFIF_EXIT:%.*]]
 // DEFAULT:       if.then2.i:
-// DEFAULT-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR16]]
 // DEFAULT-NEXT:    br label [[_ZL3YNFIF_EXIT]]
 // DEFAULT:       if.end4.i:
-// DEFAULT-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR14]]
-// DEFAULT-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR16]]
+// DEFAULT-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR16]]
 // DEFAULT-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
 // DEFAULT-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3YNFIF_EXIT]]
 // DEFAULT:       for.body.i:
@@ -7045,14 +6997,14 @@ extern "C" __device__ double test_y1(double x) {
 // FINITEONLY-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
 // FINITEONLY-NEXT:    ]
 // FINITEONLY:       if.then.i:
-// FINITEONLY-NEXT:    [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y0_f32(float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y0_f32(float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    br label [[_ZL3YNFIF_EXIT:%.*]]
 // FINITEONLY:       if.then2.i:
-// FINITEONLY-NEXT:    [[CALL_I22_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y1_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I22_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y1_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    br label [[_ZL3YNFIF_EXIT]]
 // FINITEONLY:       if.end4.i:
-// FINITEONLY-NEXT:    [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y0_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
-// FINITEONLY-NEXT:    [[CALL_I21_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y1_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y0_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
+// FINITEONLY-NEXT:    [[CALL_I21_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_y1_f32(float noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
 // FINITEONLY-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3YNFIF_EXIT]]
 // FINITEONLY:       for.body.i:
@@ -7078,14 +7030,14 @@ extern "C" __device__ double test_y1(double x) {
 // APPROX-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
 // APPROX-NEXT:    ]
 // APPROX:       if.then.i:
-// APPROX-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    br label [[_ZL3YNFIF_EXIT:%.*]]
 // APPROX:       if.then2.i:
-// APPROX-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR16]]
 // APPROX-NEXT:    br label [[_ZL3YNFIF_EXIT]]
 // APPROX:       if.end4.i:
-// APPROX-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR14]]
-// APPROX-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR16]]
+// APPROX-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR16]]
 // APPROX-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
 // APPROX-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3YNFIF_EXIT]]
 // APPROX:       for.body.i:
@@ -7111,14 +7063,14 @@ extern "C" __device__ double test_y1(double x) {
 // NCRDIV-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
 // NCRDIV-NEXT:    ]
 // NCRDIV:       if.then.i:
-// NCRDIV-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    br label [[_ZL3YNFIF_EXIT:%.*]]
 // NCRDIV:       if.then2.i:
-// NCRDIV-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR16]]
 // NCRDIV-NEXT:    br label [[_ZL3YNFIF_EXIT]]
 // NCRDIV:       if.end4.i:
-// NCRDIV-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR14]]
-// NCRDIV-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_y0_f32(float noundef [[Y]]) #[[ATTR16]]
+// NCRDIV-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef float @__ocml_y1_f32(float noundef [[Y]]) #[[ATTR16]]
 // NCRDIV-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
 // NCRDIV-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL3YNFIF_EXIT]]
 // NCRDIV:       for.body.i:
@@ -7181,14 +7133,14 @@ extern "C" __device__ float test_ynf(int x, float y) {
 // DEFAULT-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
 // DEFAULT-NEXT:    ]
 // DEFAULT:       if.then.i:
-// DEFAULT-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    br label [[_ZL2YNID_EXIT:%.*]]
 // DEFAULT:       if.then2.i:
-// DEFAULT-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR16]]
 // DEFAULT-NEXT:    br label [[_ZL2YNID_EXIT]]
 // DEFAULT:       if.end4.i:
-// DEFAULT-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR14]]
-// DEFAULT-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR16]]
+// DEFAULT-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR16]]
 // DEFAULT-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
 // DEFAULT-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2YNID_EXIT]]
 // DEFAULT:       for.body.i:
@@ -7214,14 +7166,14 @@ extern "C" __device__ float test_ynf(int x, float y) {
 // FINITEONLY-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
 // FINITEONLY-NEXT:    ]
 // FINITEONLY:       if.then.i:
-// FINITEONLY-NEXT:    [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y0_f64(double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I20_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y0_f64(double noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    br label [[_ZL2YNID_EXIT:%.*]]
 // FINITEONLY:       if.then2.i:
-// FINITEONLY-NEXT:    [[CALL_I22_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y1_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I22_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y1_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    br label [[_ZL2YNID_EXIT]]
 // FINITEONLY:       if.end4.i:
-// FINITEONLY-NEXT:    [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y0_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
-// FINITEONLY-NEXT:    [[CALL_I21_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y1_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y0_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
+// FINITEONLY-NEXT:    [[CALL_I21_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) double @__ocml_y1_f64(double noundef nofpclass(nan inf) [[Y]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
 // FINITEONLY-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2YNID_EXIT]]
 // FINITEONLY:       for.body.i:
@@ -7247,14 +7199,14 @@ extern "C" __device__ float test_ynf(int x, float y) {
 // APPROX-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
 // APPROX-NEXT:    ]
 // APPROX:       if.then.i:
-// APPROX-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    br label [[_ZL2YNID_EXIT:%.*]]
 // APPROX:       if.then2.i:
-// APPROX-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR16]]
 // APPROX-NEXT:    br label [[_ZL2YNID_EXIT]]
 // APPROX:       if.end4.i:
-// APPROX-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR14]]
-// APPROX-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR16]]
+// APPROX-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR16]]
 // APPROX-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
 // APPROX-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2YNID_EXIT]]
 // APPROX:       for.body.i:
@@ -7280,14 +7232,14 @@ extern "C" __device__ float test_ynf(int x, float y) {
 // NCRDIV-NEXT:      i32 1, label [[IF_THEN2_I:%.*]]
 // NCRDIV-NEXT:    ]
 // NCRDIV:       if.then.i:
-// NCRDIV-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I20_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    br label [[_ZL2YNID_EXIT:%.*]]
 // NCRDIV:       if.then2.i:
-// NCRDIV-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I22_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR16]]
 // NCRDIV-NEXT:    br label [[_ZL2YNID_EXIT]]
 // NCRDIV:       if.end4.i:
-// NCRDIV-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR14]]
-// NCRDIV-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef double @__ocml_y0_f64(double noundef [[Y]]) #[[ATTR16]]
+// NCRDIV-NEXT:    [[CALL_I21_I:%.*]] = tail call contract noundef double @__ocml_y1_f64(double noundef [[Y]]) #[[ATTR16]]
 // NCRDIV-NEXT:    [[CMP7_I1:%.*]] = icmp sgt i32 [[X]], 1
 // NCRDIV-NEXT:    br i1 [[CMP7_I1]], label [[FOR_BODY_I:%.*]], label [[_ZL2YNID_EXIT]]
 // NCRDIV:       for.body.i:
@@ -7345,22 +7297,22 @@ extern "C" __device__ double test_yn(int x, double y) {
 
 // DEFAULT-LABEL: @test___cosf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test___cosf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_cos_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_cos_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test___cosf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test___cosf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test___cosf(
@@ -7616,22 +7568,22 @@ extern "C" __device__ float test___frsqrt_rn(float x) {
 
 // DEFAULT-LABEL: @test___fsqrt_rn(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sqrt_f32(float noundef [[X:%.*]]) #[[ATTR12]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sqrt_f32(float noundef [[X:%.*]]) #[[ATTR14]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test___fsqrt_rn(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_sqrt_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR12]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_sqrt_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test___fsqrt_rn(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sqrt_f32(float noundef [[X:%.*]]) #[[ATTR12]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sqrt_f32(float noundef [[X:%.*]]) #[[ATTR14]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test___fsqrt_rn(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sqrt_f32(float noundef [[X:%.*]]) #[[ATTR12]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sqrt_f32(float noundef [[X:%.*]]) #[[ATTR14]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test___fsqrt_rn(
@@ -7761,22 +7713,22 @@ extern "C" __device__ float test___logf(float x) {
 
 // DEFAULT-LABEL: @test___powf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR13]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR15]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test___powf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_pow_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR13]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_pow_f32(float noundef nofpclass(nan inf) [[X:%.*]], float noundef nofpclass(nan inf) [[Y:%.*]]) #[[ATTR15]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test___powf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR13]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR15]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test___powf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR13]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_pow_f32(float noundef [[X:%.*]], float noundef [[Y:%.*]]) #[[ATTR15]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test___powf(
@@ -7834,33 +7786,33 @@ extern "C" __device__ float test___saturatef(float x) {
 
 // DEFAULT-LABEL: @test___sincosf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]]
-// DEFAULT-NEXT:    [[CALL1_I:%.*]] = tail call contract float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL1_I:%.*]] = tail call contract float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]]
 // DEFAULT-NEXT:    store float [[CALL1_I]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]]
 // DEFAULT-NEXT:    ret void
 //
 // FINITEONLY-LABEL: @test___sincosf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract nofpclass(nan inf) float @__ocml_native_sin_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract nofpclass(nan inf) float @__ocml_native_sin_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]]
-// FINITEONLY-NEXT:    [[CALL1_I:%.*]] = tail call nnan ninf contract nofpclass(nan inf) float @__ocml_native_cos_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL1_I:%.*]] = tail call nnan ninf contract nofpclass(nan inf) float @__ocml_native_cos_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    store float [[CALL1_I]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]]
 // FINITEONLY-NEXT:    ret void
 //
 // APPROX-LABEL: @test___sincosf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA16]]
-// APPROX-NEXT:    [[CALL1_I:%.*]] = tail call contract float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL1_I:%.*]] = tail call contract float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]]
 // APPROX-NEXT:    store float [[CALL1_I]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA16]]
 // APPROX-NEXT:    ret void
 //
 // NCRDIV-LABEL: @test___sincosf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    store float [[CALL_I]], ptr [[Y:%.*]], align 4, !tbaa [[TBAA17]]
-// NCRDIV-NEXT:    [[CALL1_I:%.*]] = tail call contract float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL1_I:%.*]] = tail call contract float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]]
 // NCRDIV-NEXT:    store float [[CALL1_I]], ptr [[Z:%.*]], align 4, !tbaa [[TBAA17]]
 // NCRDIV-NEXT:    ret void
 //
@@ -7878,22 +7830,22 @@ extern "C" __device__ void test___sincosf(float x, float *y, float *z) {
 
 // DEFAULT-LABEL: @test___sinf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // DEFAULT-NEXT:    ret float [[CALL_I]]
 //
 // FINITEONLY-LABEL: @test___sinf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_sin_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_sin_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    ret float [[CALL_I]]
 //
 // APPROX-LABEL: @test___sinf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // APPROX-NEXT:    ret float [[CALL_I]]
 //
 // NCRDIV-LABEL: @test___sinf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]]
 // NCRDIV-NEXT:    ret float [[CALL_I]]
 //
 // AMDGCNSPIRV-LABEL: @test___sinf(
@@ -7907,32 +7859,32 @@ extern "C" __device__ float test___sinf(float x) {
 
 // DEFAULT-LABEL: @test___tanf(
 // DEFAULT-NEXT:  entry:
-// DEFAULT-NEXT:    [[CALL_I3_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]]
-// DEFAULT-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR14]]
+// DEFAULT-NEXT:    [[CALL_I3_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// DEFAULT-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]]
 // DEFAULT-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.amdgcn.rcp.f32(float [[CALL_I_I]])
 // DEFAULT-NEXT:    [[MUL_I:%.*]] = fmul contract float [[CALL_I3_I]], [[TMP0]]
 // DEFAULT-NEXT:    ret float [[MUL_I]]
 //
 // FINITEONLY-LABEL: @test___tanf(
 // FINITEONLY-NEXT:  entry:
-// FINITEONLY-NEXT:    [[CALL_I3_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_sin_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR14]]
-// FINITEONLY-NEXT:    [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_cos_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR14]]
+// FINITEONLY-NEXT:    [[CALL_I3_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_sin_f32(float noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR16]]
+// FINITEONLY-NEXT:    [[CALL_I_I:%.*]] = tail call nnan ninf contract noundef nofpclass(nan inf) float @__ocml_native_cos_f32(float noundef nofpclass(nan inf) [[X]]) #[[ATTR16]]
 // FINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf contract float @llvm.amdgcn.rcp.f32(float [[CALL_I_I]])
 // FINITEONLY-NEXT:    [[MUL_I:%.*]] = fmul nnan ninf contract float [[CALL_I3_I]], [[TMP0]]
 // FINITEONLY-NEXT:    ret float [[MUL_I]]
 //
 // APPROX-LABEL: @test___tanf(
 // APPROX-NEXT:  entry:
-// APPROX-NEXT:    [[CALL_I3_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]]
-// APPROX-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR14]]
+// APPROX-NEXT:    [[CALL_I3_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// APPROX-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]]
 // APPROX-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.amdgcn.rcp.f32(float [[CALL_I_I]])
 // APPROX-NEXT:    [[MUL_I:%.*]] = fmul contract float [[CALL_I3_I]], [[TMP0]]
 // APPROX-NEXT:    ret float [[MUL_I]]
 //
 // NCRDIV-LABEL: @test___tanf(
 // NCRDIV-NEXT:  entry:
-// NCRDIV-NEXT:    [[CALL_I3_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR14]]
-// NCRDIV-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR14]]
+// NCRDIV-NEXT:    [[CALL_I3_I:%.*]] = tail call contract noundef float @__ocml_native_sin_f32(float noundef [[X:%.*]]) #[[ATTR16]]
+// NCRDIV-NEXT:    [[CALL_I_I:%.*]] = tail call contract noundef float @__ocml_native_cos_f32(float noundef [[X]]) #[[ATTR16]]
 // NCRDIV-NEXT:    [[TMP0:%.*]] = tail call contract float @llvm.amdgcn.rcp.f32(float [[CALL_I_I]])
 // NCRDIV-NEXT:    [[MUL_I:%.*]] = fmul contract float [[CALL_I3_I]], [[TMP0]]
 // NCRDIV-NEXT:    ret float [[MUL_I]]
diff --git a/clang/test/Headers/__cpuidex_conflict.c b/clang/test/Headers/__cpuidex_conflict.c
index 49795c4..74f4532 100644
--- a/clang/test/Headers/__cpuidex_conflict.c
+++ b/clang/test/Headers/__cpuidex_conflict.c
@@ -1,19 +1,17 @@
 // Make sure that __cpuidex in cpuid.h doesn't conflict with the MS
 // extensions built in by ensuring compilation succeeds:
-// RUN: %clang_cc1 %s -ffreestanding -fms-extensions -fms-compatibility \
-// RUN:  -fms-compatibility-version=19.00 -triple x86_64-pc-windows-msvc -emit-llvm -o -
-// %clang_cc1 %s -ffreestanding -triple x86_64-w64-windows-gnu -fms-extensions -emit-llvm -o -
-//
-// FIXME: See https://github.com/llvm/llvm-project/pull/121839 and
-// FIXME: https://github.com/llvm/llvm-project/pull/126324
-// RUN: not %clang_cc1 %s -ffreestanding -fopenmp -fopenmp-is-target-device -aux-triple x86_64-unknown-linux-gnu
+// RUN: %clang_cc1 %s -DIS_STATIC="" -ffreestanding -fms-extensions -fms-compatibility -fms-compatibility-version=19.00 -triple x86_64-pc-windows-msvc -emit-llvm -o -
+// RUN: %clang_cc1 %s -DIS_STATIC="" -ffreestanding -triple x86_64-w64-windows-gnu -fms-extensions -emit-llvm -o -
+
+// Ensure that we do not run into conflicts when offloading.
+// RUN: %clang_cc1 %s -DIS_STATIC=static -ffreestanding -fopenmp -fopenmp-is-target-device -aux-triple x86_64-unknown-linux-gnu
 
 typedef __SIZE_TYPE__ size_t;
 
 // We declare __cpuidex here as where the buitlin should be exposed (MSVC), the
 // declaration is in <intrin.h>, but <intrin.h> is not available from all the
 // targets that are being tested here.
-void __cpuidex (int[4], int, int);
+IS_STATIC void __cpuidex (int[4], int, int);
 
 #include <cpuid.h>
 
@@ -22,4 +20,3 @@ int cpuid_info[4];
 void test_cpuidex(unsigned level, unsigned count) {
   __cpuidex(cpuid_info, level, count);
 }
-
diff --git a/clang/test/Misc/target-invalid-cpu-note/nvptx.c b/clang/test/Misc/target-invalid-cpu-note/nvptx.c
index b5209ff..b90f26e 100644
--- a/clang/test/Misc/target-invalid-cpu-note/nvptx.c
+++ b/clang/test/Misc/target-invalid-cpu-note/nvptx.c
@@ -30,8 +30,12 @@
 // CHECK-SAME: {{^}}, sm_100a
 // CHECK-SAME: {{^}}, sm_101
 // CHECK-SAME: {{^}}, sm_101a
+// CHECK-SAME: {{^}}, sm_103
+// CHECK-SAME: {{^}}, sm_103a
 // CHECK-SAME: {{^}}, sm_120
 // CHECK-SAME: {{^}}, sm_120a
+// CHECK-SAME: {{^}}, sm_121
+// CHECK-SAME: {{^}}, sm_121a
 // CHECK-SAME: {{^}}, gfx600
 // CHECK-SAME: {{^}}, gfx601
 // CHECK-SAME: {{^}}, gfx602
diff --git a/clang/test/PCH/debug-info-pch-container-path.c b/clang/test/PCH/debug-info-pch-container-path.c
index 257cbf5..19b1a28 100644
--- a/clang/test/PCH/debug-info-pch-container-path.c
+++ b/clang/test/PCH/debug-info-pch-container-path.c
@@ -9,6 +9,7 @@
 // RUN:     -triple %itanium_abi_triple                         \
 // RUN:     -fdebug-prefix-map=%t=BUILD                         \
 // RUN:     -fdebug-prefix-map=%S=SOURCE                        \
+// RUN:     -fdebug-compilation-dir=%t                          \
 // RUN:     -o %t/prefix.ll %S/debug-info-limited-struct.h      \
 // RUN:   -mllvm -debug-only=pchcontainer &>%t-container.ll
 // RUN: cat %t-container.ll | FileCheck %s
diff --git a/clang/test/PCH/debug-info-pch-path.c b/clang/test/PCH/debug-info-pch-path.c
index f94d2fa..22b367f 100644
--- a/clang/test/PCH/debug-info-pch-path.c
+++ b/clang/test/PCH/debug-info-pch-path.c
@@ -65,7 +65,8 @@
 // RUN: %clang_cc1 -debug-info-kind=standalone                  \
 // RUN:     -dwarf-ext-refs -fmodule-format=obj                 \
 // RUN:     -triple %itanium_abi_triple                         \
-// RUN:     -include-pch %t/prefix.pch %s -emit-llvm -o %t.abs.ll %s
+// RUN:     -include-pch %t/prefix.pch %s -emit-llvm            \
+// RUN:     -fdebug-compilation-dir=%t -o %t.abs.ll %s
 // RUN: cat %t.abs.ll | FileCheck %s --check-prefix=CHECK-ABS
 
 // CHECK-ABS: !DICompileUnit
diff --git a/clang/test/Parser/cxx23-assume.cpp b/clang/test/Parser/cxx23-assume.cpp
index 269fb7e..375c908 100644
--- a/clang/test/Parser/cxx23-assume.cpp
+++ b/clang/test/Parser/cxx23-assume.cpp
@@ -5,7 +5,7 @@ void f(int x, int y) {
   [[assume(1)]];
   [[assume(1.0)]];
   [[assume(1 + 2 == 3)]];
-  [[assume(x ? 1 : 2)]];
+  [[assume(x ? 1 : 2)]]; // expected-warning {{converting the result of '?:' with integer constants to a boolean always evaluates to 'true'}}
   [[assume(x && y)]];
   [[assume(true)]] [[assume(true)]];
 
diff --git a/clang/test/Preprocessor/Inputs/llvm-windres.h b/clang/test/Preprocessor/Inputs/llvm-windres.h
index 411ec48..ef6bc47 100644
--- a/clang/test/Preprocessor/Inputs/llvm-windres.h
+++ b/clang/test/Preprocessor/Inputs/llvm-windres.h
@@ -1,10 +1,12 @@
 #ifndef RC_INVOKED
-#error RC_INVOKED not defined
+#  error RC_INVOKED not defined
 #endif
-#ifndef _WIN32
-#error _WIN32 not defined
-#endif
-#ifndef __MINGW32__
-#error __MINGW32__ not defined
+#ifndef __CYGWIN__
+#  ifndef _WIN32
+#    error _WIN32 not defined
+#  endif
+#  ifndef __MINGW32__
+#    error __MINGW32__ not defined
+#  endif
 #endif
 #define MY_ID 42
diff --git a/clang/test/Preprocessor/init-x86.c b/clang/test/Preprocessor/init-x86.c
index 8ea4ce7..f5fd976 100644
--- a/clang/test/Preprocessor/init-x86.c
+++ b/clang/test/Preprocessor/init-x86.c
@@ -1535,6 +1535,7 @@
 // I386-CYGWIN:#define __GCC_ATOMIC_SHORT_LOCK_FREE 2
 // I386-CYGWIN:#define __GCC_ATOMIC_TEST_AND_SET_TRUEVAL 1
 // I386-CYGWIN:#define __GCC_ATOMIC_WCHAR_T_LOCK_FREE 2
+// I386-CYGWIN:#define __GXX_TYPEINFO_EQUALITY_INLINE 0
 // I386-CYGWIN:#define __ILP32__ 1
 // I386-CYGWIN:#define __INT16_C(c) c
 // I386-CYGWIN:#define __INT16_C_SUFFIX__
@@ -1746,6 +1747,7 @@
 // X86_64-CYGWIN:#define __GCC_ATOMIC_SHORT_LOCK_FREE 2
 // X86_64-CYGWIN:#define __GCC_ATOMIC_TEST_AND_SET_TRUEVAL 1
 // X86_64-CYGWIN:#define __GCC_ATOMIC_WCHAR_T_LOCK_FREE 2
+// X86_64-CYGWIN:#define __GXX_TYPEINFO_EQUALITY_INLINE 0
 // X86_64-CYGWIN:#define __INT16_C(c) c
 // X86_64-CYGWIN:#define __INT16_C_SUFFIX__
 // X86_64-CYGWIN:#define __INT16_FMTd__ "hd"
diff --git a/clang/test/Preprocessor/preprocess-cpp-output.c b/clang/test/Preprocessor/preprocess-cpp-output.c
new file mode 100644
index 0000000..2c18060
--- /dev/null
+++ b/clang/test/Preprocessor/preprocess-cpp-output.c
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -E -x c %s | FileCheck %s --check-prefixes=EXPANDED
+// RUN: %clang_cc1 -E -x cpp-output %s | FileCheck %s --check-prefixes=NOT-EXPANDED
+
+// EXPANDED: void __attribute__((__attribute__((always_inline)))) foo()
+// NOT-EXPANDED: void __attribute__((always_inline)) foo()
+
+#define always_inline __attribute__((always_inline))
+void __attribute__((always_inline)) foo() {
+    return 4;
+}
diff --git a/clang/test/Preprocessor/preprocess-pragma-cpp-output.c b/clang/test/Preprocessor/preprocess-pragma-cpp-output.c
new file mode 100644
index 0000000..d538937
--- /dev/null
+++ b/clang/test/Preprocessor/preprocess-pragma-cpp-output.c
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -E -x c %s | FileCheck %s
+// RUN: %clang_cc1 -x c -fsyntax-only %s -verify
+// RUN: %clang_cc1 -x cpp-output -fsyntax-only -verify %s
+// expected-no-diagnostics
+
+// The preprocessor does not expand macro-identifiers in #pragma directives.
+// When we preprocess & parse the code, clang expands the macros in directives.
+// When we parse already preprocessed code, clang still has to expand the
+// macros in the directives.
+// This means that we're not always able to parse the preprocessor's output
+// without preserving the definitions (-dD).
+
+#define FACTOR 4
+
+void foo() {
+    // CHECK: #pragma unroll FACTOR
+    #pragma unroll FACTOR
+    for(;;) {
+    }
+    return;
+}
diff --git a/clang/test/Preprocessor/wasm-target-features.c b/clang/test/Preprocessor/wasm-target-features.c
index 71b7cf6..3edaf9e 100644
--- a/clang/test/Preprocessor/wasm-target-features.c
+++ b/clang/test/Preprocessor/wasm-target-features.c
@@ -53,6 +53,15 @@
 // FP16: #define __wasm_fp16__ 1{{$}}
 
 // RUN: %clang -E -dM %s -o - 2>&1 \
+// RUN:     -target wasm32-unknown-unknown -mgc \
+// RUN:   | FileCheck %s -check-prefix=GC
+// RUN: %clang -E -dM %s -o - 2>&1 \
+// RUN:     -target wasm64-unknown-unknown -mgc \
+// RUN:   | FileCheck %s -check-prefix=GC
+//
+// GC: #define __wasm_gc__ 1{{$}}
+
+// RUN: %clang -E -dM %s -o - 2>&1 \
 // RUN:     -target wasm32-unknown-unknown -mmultimemory \
 // RUN:   | FileCheck %s -check-prefix=MULTIMEMORY
 // RUN: %clang -E -dM %s -o - 2>&1 \
@@ -145,6 +154,7 @@
 // MVP-NOT: #define __wasm_exception_handling__ 1{{$}}
 // MVP-NOT: #define __wasm_extended_const__ 1{{$}}
 // MVP-NOT: #define __wasm_fp16__ 1{{$}}
+// MVP-NOT: #define __wasm_gc__ 1{{$}}
 // MVP-NOT: #define __wasm_multimemory__ 1{{$}}
 // MVP-NOT: #define __wasm_multivalue__ 1{{$}}
 // MVP-NOT: #define __wasm_mutable_globals__ 1{{$}}
@@ -181,6 +191,7 @@
 // GENERIC-NOT: #define __wasm_exception_handling__ 1{{$}}
 // GENERIC-NOT: #define __wasm_extended_const__ 1{{$}}
 // GENERIC-NOT: #define __wasm__fp16__ 1{{$}}
+// GENERIC-NOT: #define __wasm_gc__ 1{{$}}
 // GENERIC-NOT: #define __wasm_multimemory__ 1{{$}}
 // GENERIC-NOT: #define __wasm_relaxed_simd__ 1{{$}}
 // GENERIC-NOT: #define __wasm_simd128__ 1{{$}}
@@ -199,6 +210,7 @@
 // BLEEDING-EDGE-INCLUDE-DAG: #define __wasm_exception_handling__ 1{{$}}
 // BLEEDING-EDGE-INCLUDE-DAG: #define __wasm_extended_const__ 1{{$}}
 // BLEEDING-EDGE-INCLUDE-DAG: #define __wasm_fp16__ 1{{$}}
+// BLEEDING-EDGE-INCLUDE-DAG: #define __wasm_gc__ 1{{$}}
 // BLEEDING-EDGE-INCLUDE-DAG: #define __wasm_multimemory__ 1{{$}}
 // BLEEDING-EDGE-INCLUDE-DAG: #define __wasm_multivalue__ 1{{$}}
 // BLEEDING-EDGE-INCLUDE-DAG: #define __wasm_mutable_globals__ 1{{$}}
diff --git a/clang/test/Profile/coverage-prefix-map.c b/clang/test/Profile/coverage-prefix-map.c
index de9e377..1cb095f 100644
--- a/clang/test/Profile/coverage-prefix-map.c
+++ b/clang/test/Profile/coverage-prefix-map.c
@@ -25,7 +25,7 @@
 // COVERAGE-PREFIX-MAP-ORDER: @__llvm_coverage_mapping = {{.*"\\02.*newpath.*root.*nested.*coverage-prefix-map\.c}}
 
 // Test that last -fcoverage-prefix-map option (-fcoverage-prefix-map=%/t/root=.) is applied.
-// RUN: %clang_cc1 -fprofile-instrument=clang -fcoverage-mapping -emit-llvm -mllvm -enable-name-compression=false -main-file-name coverage-prefix-map.c %t/root/nested/coverage-prefix-map.c -fcoverage-prefix-map==newpath -fcoverage-prefix-map=%/t/root=. -o - | FileCheck --check-prefix=COVERAGE-PREFIX-MAP-REORDER %s
+// RUN: %clang_cc1 -fprofile-instrument=clang -fcoverage-mapping -emit-llvm -mllvm -enable-name-compression=false -main-file-name coverage-prefix-map.c %t/root/nested/coverage-prefix-map.c -fcoverage-compilation-dir=%t/root -fcoverage-prefix-map==newpath -fcoverage-prefix-map=%/t/root=. -o - | FileCheck --check-prefix=COVERAGE-PREFIX-MAP-REORDER %s
 // COVERAGE-PREFIX-MAP-REORDER: @__llvm_coverage_mapping =
 // COVERAGE-PREFIX-MAP-REORDER-NOT: newpath
 // COVERAGE-PREFIX-MAP-REORDER-SAME: nested{{.*coverage-prefix-map\.c}}
diff --git a/clang/test/Sema/aarch64-sme-attrs-without-sve.cpp b/clang/test/Sema/aarch64-sme-attrs-without-sve.cpp
new file mode 100644
index 0000000..cc6a841
--- /dev/null
+++ b/clang/test/Sema/aarch64-sme-attrs-without-sve.cpp
@@ -0,0 +1,56 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -fsyntax-only -verify %s
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sme.h>
+
+void test_streaming(svint32_t *out, svint32_t *in) __arm_streaming {
+  *out = *in;
+}
+
+void test_non_streaming(svint32_t *out, svint32_t *in) {
+  *out = *in; // expected-error {{SVE vector type 'svint32_t' (aka '__SVInt32_t') cannot be used in a non-streaming function}} \
+                 expected-error {{SVE vector type 'svint32_t' (aka '__SVInt32_t') cannot be used in a non-streaming function}}
+}
+
+// This previously led to a diagnostic that '&a' could not be used in a non-streaming function,
+// even though all functions are streaming.
+void test_both_streaming(int32_t *out) __arm_streaming {
+  svint32_t a;
+  [&a, &out]() __arm_streaming {
+    a = svdup_s32(1);
+    svst1(svptrue_b32(), out, a);
+  }();
+}
+
+void test_lambda_streaming(int32_t *out) {
+  svint32_t a; // expected-error {{SVE vector type 'svint32_t' (aka '__SVInt32_t') cannot be used in a non-streaming function}}
+  [&a, &out]() __arm_streaming {
+    a = 1;
+    svst1(svptrue_b32(), out, a);
+  }();
+}
+
+void test_lambda_non_streaming_capture_do_nothing() __arm_streaming {
+  svint32_t a;
+  [&a] {
+    // Do nothing.
+  }();
+}
+
+// Error: Non-streaming function attempts to dereference capture:
+void test_lambda_non_streaming_capture_return_vector() __arm_streaming {
+  svint32_t a;
+  [&a] {
+    return a; // expected-error {{SVE vector type 'svint32_t' (aka '__SVInt32_t') cannot be used in a non-streaming function}}
+  }();
+}
+
+// By reference capture, only records and uses the address of `a`:
+// FIXME: This should be okay.
+void test_lambda_non_streaming_capture_return_address() __arm_streaming {
+  svint32_t a;
+  [&a] {
+    return &a; // expected-error {{SVE vector type 'svint32_t' (aka '__SVInt32_t') cannot be used in a non-streaming function}}
+  }();
+}
diff --git a/clang/test/Sema/builtins-arm-exclusive-124.c b/clang/test/Sema/builtins-arm-exclusive-124.c
new file mode 100644
index 0000000..013ae3f
--- /dev/null
+++ b/clang/test/Sema/builtins-arm-exclusive-124.c
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -triple armv7m -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple armv8m.main -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple armv8.1m.main -fsyntax-only -verify %s
+
+// All these architecture versions provide 1-, 2- or 4-byte exclusive accesses,
+// but don't have the LDREXD instruction which takes two operand registers and
+// performs an 8-byte exclusive access. So the calls with a pointer to long
+// long are rejected.
+
+int test_ldrex(char *addr) {
+  int sum = 0;
+  sum += __builtin_arm_ldrex(addr);
+  sum += __builtin_arm_ldrex((short *)addr);
+  sum += __builtin_arm_ldrex((int *)addr);
+  sum += __builtin_arm_ldrex((long long *)addr); // expected-error {{address argument to load or store exclusive builtin must be a pointer to 1,2 or 4 byte type}}
+  return sum;
+}
+
+int test_strex(char *addr) {
+  int res = 0;
+  res |= __builtin_arm_strex(4, addr);
+  res |= __builtin_arm_strex(42, (short *)addr);
+  res |= __builtin_arm_strex(42, (int *)addr);
+  res |= __builtin_arm_strex(42, (long long *)addr); // expected-error {{address argument to load or store exclusive builtin must be a pointer to 1,2 or 4 byte type}}
+  return res;
+}
diff --git a/clang/test/Sema/builtins-arm-exclusive-4.c b/clang/test/Sema/builtins-arm-exclusive-4.c
new file mode 100644
index 0000000..68f01f5
--- /dev/null
+++ b/clang/test/Sema/builtins-arm-exclusive-4.c
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -triple armv6 -fsyntax-only -verify %s
+
+// Armv6 (apart from Armv6-M) provides 4-byte exclusive accesses, but not any
+// other size. So only the calls with a pointer to a 32-bit type are accepted.
+
+int test_ldrex(char *addr) {
+  int sum = 0;
+  sum += __builtin_arm_ldrex(addr); // expected-error {{address argument to load or store exclusive builtin must be a pointer to 4 byte type}}
+  sum += __builtin_arm_ldrex((short *)addr); // expected-error {{address argument to load or store exclusive builtin must be a pointer to 4 byte type}}
+  sum += __builtin_arm_ldrex((int *)addr);
+  sum += __builtin_arm_ldrex((long long *)addr); // expected-error {{address argument to load or store exclusive builtin must be a pointer to 4 byte type}}
+  return sum;
+}
+
+int test_strex(char *addr) {
+  int res = 0;
+  res |= __builtin_arm_strex(4, addr); // expected-error {{address argument to load or store exclusive builtin must be a pointer to 4 byte type}}
+  res |= __builtin_arm_strex(42, (short *)addr); // expected-error {{address argument to load or store exclusive builtin must be a pointer to 4 byte type}}
+  res |= __builtin_arm_strex(42, (int *)addr);
+  res |= __builtin_arm_strex(42, (long long *)addr); // expected-error {{address argument to load or store exclusive builtin must be a pointer to 4 byte type}}
+  return res;
+}
diff --git a/clang/test/Sema/builtins-arm-exclusive-none.c b/clang/test/Sema/builtins-arm-exclusive-none.c
new file mode 100644
index 0000000..76d327f
--- /dev/null
+++ b/clang/test/Sema/builtins-arm-exclusive-none.c
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -triple armv6m -fsyntax-only -verify %s
+
+// Armv6-M does not support exclusive loads/stores at all, so all uses of
+// __builtin_arm_ldrex and __builtin_arm_strex is forbidden.
+
+int test_ldrex(char *addr) {
+  int sum = 0;
+  sum += __builtin_arm_ldrex(addr); // expected-error {{load and store exclusive builtins are not available on this architecture}}
+  sum += __builtin_arm_ldrex((short *)addr); // expected-error {{load and store exclusive builtins are not available on this architecture}}
+  sum += __builtin_arm_ldrex((int *)addr); // expected-error {{load and store exclusive builtins are not available on this architecture}}
+  sum += __builtin_arm_ldrex((long long *)addr); // expected-error {{load and store exclusive builtins are not available on this architecture}}
+  return sum;
+}
+
+int test_strex(char *addr) {
+  int res = 0;
+  res |= __builtin_arm_strex(4, addr); // expected-error {{load and store exclusive builtins are not available on this architecture}}
+  res |= __builtin_arm_strex(42, (short *)addr); // expected-error {{load and store exclusive builtins are not available on this architecture}}
+  res |= __builtin_arm_strex(42, (int *)addr); // expected-error {{load and store exclusive builtins are not available on this architecture}}
+  res |= __builtin_arm_strex(42, (long long *)addr); // expected-error {{load and store exclusive builtins are not available on this architecture}}
+  return res;
+}
diff --git a/clang/test/Sema/builtins-arm-exclusive.c b/clang/test/Sema/builtins-arm-exclusive.c
index 68457d2..49aea15 100644
--- a/clang/test/Sema/builtins-arm-exclusive.c
+++ b/clang/test/Sema/builtins-arm-exclusive.c
@@ -1,5 +1,13 @@
 // RUN: %clang_cc1 -triple armv7 -fsyntax-only -verify %s
 
+// General tests of __builtin_arm_ldrex and __builtin_arm_strex error checking.
+//
+// This test is compiled for Armv7-A, which provides exclusive load/store
+// instructions for 1-, 2-, 4- and 8-byte quantities. Other Arm architecture
+// versions provide subsets of those, requiring different error reporting.
+// Those are tested in builtins-arm-exclusive-124.c, builtins-arm-exclusive-4.c
+// and builtins-arm-exclusive-none.c.
+
 struct Simple {
   char a, b;
 };
diff --git a/clang/test/Sema/constexpr-void-cast.c b/clang/test/Sema/constexpr-void-cast.c
index 2ffc59f..cac671e 100644
--- a/clang/test/Sema/constexpr-void-cast.c
+++ b/clang/test/Sema/constexpr-void-cast.c
@@ -4,15 +4,16 @@
 // RUN: %clang_cc1 -x c -fsyntax-only %s -pedantic -verify=c-pedantic -std=c11 -fexperimental-new-constant-interpreter
 //
 // RUN: %clang_cc1 -x c++ -fsyntax-only %s -verify=cxx
-// RUN: %clang_cc1 -x c++ -fsyntax-only %s -pedantic -verify=cxx-pedantic
+// RUN: %clang_cc1 -x c++ -fsyntax-only %s -pedantic -verify=cxx,cxx-pedantic
 // RUN: %clang_cc1 -x c++ -fsyntax-only %s -verify=cxx -fexperimental-new-constant-interpreter
-// RUN: %clang_cc1 -x c++ -fsyntax-only %s -pedantic -verify=cxx-pedantic -fexperimental-new-constant-interpreter
+// RUN: %clang_cc1 -x c++ -fsyntax-only %s -pedantic -verify=cxx,cxx-pedantic -fexperimental-new-constant-interpreter
 
 // c-no-diagnostics
-// cxx-no-diagnostics
 
 void f(void);
 struct S {char c;} s;
 _Static_assert(&s != (void *)&f, ""); // c-pedantic-warning {{not an integer constant expression}} \
                                       // c-pedantic-note {{this conversion is not allowed in a constant expression}} \
+                                      // cxx-error {{static assertion expression is not an integral constant expression}} \
+                                      // cxx-note {{cast that performs the conversions of a reinterpret_cast is not allowed in a constant expression}} \
                                       // cxx-pedantic-warning {{'_Static_assert' is a C11 extension}}
diff --git a/clang/test/Sema/warn-unreachable_crash.cpp b/clang/test/Sema/warn-unreachable_crash.cpp
index 628abcc..1955c2c 100644
--- a/clang/test/Sema/warn-unreachable_crash.cpp
+++ b/clang/test/Sema/warn-unreachable_crash.cpp
@@ -1,16 +1,33 @@
-// RUN: %clang_cc1 -verify -Wunreachable-code %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -verify -Wunreachable-code %s
+// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -target-feature +fullfp16 -verify -Wunreachable-code %s
+// REQUIRES: aarch64-registered-target
 
-// Previously this test will crash
-static void test(__fp16& x) {
-  if (x != 0 || x != 1.0) { // expected-note{{}} no-crash
-      x = 0.9;
-    } else
-      x = 0.8; // expected-warning{{code will never be executed}}
+// =======  __fp16 version  =======
+static void test_fp16(__fp16 &x) {
+  if (x != 0 || x != 1.0) {           // expected-note {{}} no-crash
+    x = 0.9;
+  } else
+    x = 0.8;                          // expected-warning{{code will never be executed}}
 }
 
-static void test2(__fp16& x) {
-  if (x != 1 && x == 1.0) { // expected-note{{}} no-crash
-      x = 0.9; // expected-warning{{code will never be executed}}
-    } else
-      x = 0.8;
+static void test_fp16_b(__fp16 &x) {
+  if (x != 1 && x == 1.0) {           // expected-note {{}} no-crash
+    x = 0.9;                          // expected-warning{{code will never be executed}}
+  } else
+    x = 0.8;
+}
+
+// =======  _Float16 version  =======
+static void test_f16(_Float16 &x) {
+  if (x != 0 || x != 1.0) {           // expected-note {{}} no-crash
+    x = 0.9;
+  } else
+    x = 0.8;                          // expected-warning{{code will never be executed}}
+}
+
+static void test_f16_b(_Float16 &x) {
+  if (x != 1 && x == 1.0) {           // expected-note {{}} no-crash
+    x = 0.9;                          // expected-warning{{code will never be executed}}
+  } else
+    x = 0.8;
 }
diff --git a/clang/test/SemaCXX/cxx23-assume.cpp b/clang/test/SemaCXX/cxx23-assume.cpp
index 726cb3b..99a82d9 100644
--- a/clang/test/SemaCXX/cxx23-assume.cpp
+++ b/clang/test/SemaCXX/cxx23-assume.cpp
@@ -8,6 +8,14 @@
 struct A{};
 struct B{ explicit operator bool() { return true; } };
 
+// This should be the first test case of this file.
+void IsActOnFinishFullExprCalled() {
+  // Do not add other test cases to this function.
+  // Make sure `ActOnFinishFullExpr` is called and creates `ExprWithCleanups`
+  // to avoid assertion failure.
+  [[assume(B{})]]; // expected-warning {{assumption is ignored because it contains (potential) side-effects}} // ext-warning {{C++23 extension}}
+}
+
 template <bool cond>
 void f() {
   [[assume(cond)]]; // ext-warning {{C++23 extension}}
diff --git a/clang/test/SemaCXX/noreturn-weverything.c b/clang/test/SemaCXX/noreturn-weverything.c
new file mode 100644
index 0000000..92a587d
--- /dev/null
+++ b/clang/test/SemaCXX/noreturn-weverything.c
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 -fsyntax-only %s -Weverything
+
+void free(void *);
+typedef void (*set_free_func)(void *);
+struct Method {
+  int nparams;
+  int *param;
+};
+void selelem_free_method(struct Method* method, void* data) {
+    set_free_func free_func = 0;
+    for (int i = 0; i < method->nparams; ++i)
+        free(&method->param[i]);
+    if (data && free_func)
+        free_func(data);
+}
diff --git a/clang/test/SemaCXX/overload-resolution-deferred-templates.cpp b/clang/test/SemaCXX/overload-resolution-deferred-templates.cpp
index 135865c..8b469dd2 100644
--- a/clang/test/SemaCXX/overload-resolution-deferred-templates.cpp
+++ b/clang/test/SemaCXX/overload-resolution-deferred-templates.cpp
@@ -80,30 +80,21 @@ struct ImplicitlyCopyable {
 static_assert(__is_constructible(ImplicitlyCopyable, const ImplicitlyCopyable&));
 
 
-struct Movable { // #Movable
+struct Movable {
   template <typename T>
   requires __is_constructible(Movable, T) // #err-self-constraint-1
-  explicit Movable(T op) noexcept; // #Movable1
-  Movable(Movable&&) noexcept = default; // #Movable2
+  explicit Movable(T op) noexcept; // #1
+  Movable(Movable&&) noexcept = default; // #2
 };
 static_assert(__is_constructible(Movable, Movable&&));
 static_assert(__is_constructible(Movable, const Movable&));
-// expected-error@-1 {{static assertion failed due to requirement '__is_constructible(Movable, const Movable &)'}} \
-// expected-error@-1 {{call to implicitly-deleted copy constructor of 'Movable'}} \
-// expected-note@#Movable  {{'Movable' defined here}} \
-// expected-note@#Movable  {{candidate constructor (the implicit copy constructor) not viable: no known conversion from 'int' to 'const Movable' for 1st argument}} \
-// expected-note@#Movable2  {{copy constructor is implicitly deleted because 'Movable' has a user-declared move constructor}} \
-// expected-note@#Movable2  {{candidate constructor not viable: no known conversion from 'int' to 'Movable' for 1st argument}} \
-// expected-note@#Movable1  {{candidate template ignored: constraints not satisfied [with T = int]}}
-
+// expected-error@-1 {{static assertion failed due to requirement '__is_constructible(Movable, const Movable &)'}}
 
 static_assert(__is_constructible(Movable, int));
-// expected-error@-1 {{static assertion failed due to requirement '__is_constructible(Movable, int)'}} \
-// expected-error@-1 {{no matching constructor for initialization of 'Movable'}} \
+// expected-error@-1{{static assertion failed due to requirement '__is_constructible(Movable, int)'}} \
 // expected-note@-1 2{{}}
 // expected-error@#err-self-constraint-1{{satisfaction of constraint '__is_constructible(Movable, T)' depends on itself}}
 // expected-note@#err-self-constraint-1 4{{}}
-// expected-note@#Movable  {{'Movable' defined here}}
 
 template <typename T>
 struct Members {
diff --git a/clang/test/SemaCXX/type-traits-unsatisfied-diags-std.cpp b/clang/test/SemaCXX/type-traits-unsatisfied-diags-std.cpp
index f3ddbbf..cf33ac2 100644
--- a/clang/test/SemaCXX/type-traits-unsatisfied-diags-std.cpp
+++ b/clang/test/SemaCXX/type-traits-unsatisfied-diags-std.cpp
@@ -42,14 +42,6 @@ static constexpr bool value = __is_standard_layout(T);
 };
 template <typename T>
 constexpr bool is_standard_layout_v = __is_standard_layout(T);
-
-template <typename... Args>
-struct is_constructible {
-    static constexpr bool value = __is_constructible(Args...);
-};
-
-template <typename... Args>
-constexpr bool is_constructible_v = __is_constructible(Args...);
 #endif
 
 #ifdef STD2
@@ -105,17 +97,6 @@ template <typename T>
 using is_standard_layout = __details_is_standard_layout<T>;
 template <typename T>
 constexpr bool is_standard_layout_v = __is_standard_layout(T);
-
-template <typename... Args>
-struct __details_is_constructible{
-    static constexpr bool value = __is_constructible(Args...);
-};
-
-template <typename... Args>
-using is_constructible  = __details_is_constructible<Args...>;
-
-template <typename... Args>
-constexpr bool is_constructible_v = __is_constructible(Args...);
 #endif
 
 
@@ -168,15 +149,6 @@ template <typename T>
 using is_standard_layout = __details_is_standard_layout<T>;
 template <typename T>
 constexpr bool is_standard_layout_v = is_standard_layout<T>::value;
-
-template <typename... Args>
-struct __details_is_constructible : bool_constant<__is_constructible(Args...)> {};
-
-template <typename... Args>
-using is_constructible  = __details_is_constructible<Args...>;
-
-template <typename... Args>
-constexpr bool is_constructible_v = is_constructible<Args...>::value;
 #endif
 
 }
@@ -239,15 +211,6 @@ static_assert(std::is_assignable_v<int&, void>);
 // expected-error@-1 {{static assertion failed due to requirement 'std::is_assignable_v<int &, void>'}} \
 // expected-error@-1 {{assigning to 'int' from incompatible type 'void'}}
 
-static_assert(std::is_constructible<int, int>::value);
-
-static_assert(std::is_constructible<void>::value);
-// expected-error-re@-1 {{static assertion failed due to requirement 'std::{{.*}}is_constructible<void>::value'}} \
-// expected-note@-1 {{because it is a cv void type}}
-static_assert(std::is_constructible_v<void>);
-// expected-error@-1 {{static assertion failed due to requirement 'std::is_constructible_v<void>'}} \
-// expected-note@-1 {{because it is a cv void type}}
-
 namespace test_namespace {
     using namespace std;
     static_assert(is_trivially_relocatable<int&>::value);
@@ -293,13 +256,6 @@ namespace test_namespace {
     // expected-error@-1 {{static assertion failed due to requirement 'is_empty_v<int &>'}} \
     // expected-note@-1 {{'int &' is not empty}} \
     // expected-note@-1 {{because it is a reference type}}
-
-    static_assert(is_constructible<void>::value);
-    // expected-error-re@-1 {{static assertion failed due to requirement '{{.*}}is_constructible<void>::value'}} \
-    // expected-note@-1 {{because it is a cv void type}}
-    static_assert(is_constructible_v<void>);
-    // expected-error@-1 {{static assertion failed due to requirement 'is_constructible_v<void>'}} \
-    // expected-note@-1 {{because it is a cv void type}}
 }
 
 
@@ -328,15 +284,6 @@ concept C4 = std::is_assignable_v<T, U>; // #concept8
 
 template <C4<void> T> void g4();  // #cand8
 
-template <typename... Args>
-requires std::is_constructible<Args...>::value void f3();  // #cand5
-
-template <typename... Args>
-concept C3 = std::is_constructible_v<Args...>; // #concept6
-
-template <C3 T> void g3();  // #cand6
-
-
 void test() {
     f<int&>();
     // expected-error@-1 {{no matching function for call to 'f'}} \
@@ -380,19 +327,6 @@ void test() {
     // expected-note@#cand8 {{because 'C4<int &, void>' evaluated to false}} \
     // expected-note@#concept8 {{because 'std::is_assignable_v<int &, void>' evaluated to false}} \
     // expected-error@#concept8 {{assigning to 'int' from incompatible type 'void'}}
-
-    f3<void>();
-    // expected-error@-1 {{no matching function for call to 'f3'}} \
-    // expected-note@#cand5 {{candidate template ignored: constraints not satisfied [with Args = <void>]}} \
-    // expected-note-re@#cand5 {{because '{{.*}}is_constructible<void>::value' evaluated to false}} \
-    // expected-note@#cand5 {{because it is a cv void type}}
-
-    g3<void>();
-    // expected-error@-1 {{no matching function for call to 'g3'}} \
-    // expected-note@#cand6 {{candidate template ignored: constraints not satisfied [with T = void]}} \
-    // expected-note@#cand6 {{because 'void' does not satisfy 'C3'}} \
-    // expected-note@#concept6 {{because 'std::is_constructible_v<void>' evaluated to false}} \
-    // expected-note@#concept6 {{because it is a cv void type}}
 }
 }
 
diff --git a/clang/test/SemaCXX/type-traits-unsatisfied-diags.cpp b/clang/test/SemaCXX/type-traits-unsatisfied-diags.cpp
index 54806a9..cc923d2 100644
--- a/clang/test/SemaCXX/type-traits-unsatisfied-diags.cpp
+++ b/clang/test/SemaCXX/type-traits-unsatisfied-diags.cpp
@@ -489,68 +489,6 @@ static_assert(__is_trivially_copyable(S12));
 // expected-note@#tc-S12 {{'S12' defined here}}
 }
 
-namespace constructible {
-
-struct S1 {  // #c-S1
-    S1(int); // #cc-S1
-};
-static_assert(__is_constructible(S1, char*));
-// expected-error@-1 {{static assertion failed due to requirement '__is_constructible(constructible::S1, char *)'}} \
-// expected-error@-1 {{no matching constructor for initialization of 'S1'}} \
-// expected-note@#c-S1 {{candidate constructor (the implicit copy constructor) not viable: no known conversion from 'char *' to 'const S1' for 1st argument}} \
-// expected-note@#c-S1 {{candidate constructor (the implicit move constructor) not viable: no known conversion from 'char *' to 'S1' for 1st argument}} \
-// expected-note@#cc-S1 {{candidate constructor not viable: no known conversion from 'char *' to 'int' for 1st argument; dereference the argument with *}} \
-// expected-note@#c-S1 {{'S1' defined here}}
-
-struct S2 { // #c-S2
-    S2(int, float, double); // #cc-S2
-};
-static_assert(__is_constructible(S2, float));
-// expected-error@-1 {{static assertion failed due to requirement '__is_constructible(constructible::S2, float)'}} \
-// expected-note@#c-S2 {{candidate constructor (the implicit copy constructor) not viable: no known conversion from 'float' to 'const S2' for 1st argument}} \
-// expected-note@#c-S2 {{candidate constructor (the implicit move constructor) not viable: no known conversion from 'float' to 'S2' for 1st argument}} \
-// expected-error@-1 {{no matching constructor for initialization of 'S2'}} \
-// expected-note@#cc-S2 {{candidate constructor not viable: requires 3 arguments, but 1 was provided}} \
-// expected-note@#c-S2 {{'S2' defined here}}
-
-static_assert(__is_constructible(S2, float, void));
-// expected-error@-1 {{static assertion failed due to requirement '__is_constructible(constructible::S2, float, void)'}} \
-// expected-note@#c-S2 {{candidate constructor (the implicit move constructor) not viable: requires 1 argument, but 2 were provided}} \
-// expected-note@#c-S2 {{candidate constructor (the implicit copy constructor) not viable: requires 1 argument, but 2 were provided}} \
-// expected-note@-1{{because it is a cv void type}} \
-// expected-error@-1 {{no matching constructor for initialization of 'S2'}} \
-// expected-note@#cc-S2 {{candidate constructor not viable: requires 3 arguments, but 2 were provided}} \
-// expected-note@#c-S2 {{'S2' defined here}}
-
-static_assert(__is_constructible(int[]));
-// expected-error@-1 {{static assertion failed due to requirement '__is_constructible(int[])'}} \
-// expected-note@-1 {{because it is an incomplete array type}}
-
-static_assert(__is_constructible(void));
-// expected-error@-1 {{static assertion failed due to requirement '__is_constructible(void)'}} \
-// expected-note@-1 {{because it is a cv void type}}
-
-static_assert(__is_constructible(void, void));
-// expected-error@-1 {{static assertion failed due to requirement '__is_constructible(void, void)'}} \
-// expected-note@-1 {{because it is a cv void type}}
-
-static_assert(__is_constructible(const void));
-// expected-error@-1 {{static assertion failed due to requirement '__is_constructible(const void)'}} \
-// expected-note@-1 {{because it is a cv void type}}
-
-static_assert(__is_constructible(volatile void));
-// expected-error@-1 {{static assertion failed due to requirement '__is_constructible(volatile void)'}} \
-// expected-note@-1 {{because it is a cv void type}}
-
-static_assert(__is_constructible(int ()));
-// expected-error@-1 {{static assertion failed due to requirement '__is_constructible(int ())'}} \
-// expected-note@-1 {{because it is a function type}}
-
-static_assert(__is_constructible(void (int, float)));
-// expected-error@-1 {{static assertion failed due to requirement '__is_constructible(void (int, float))'}} \
-// expected-note@-1 {{because it is a function type}}
-}
-
 namespace assignable {
 struct S1;
 static_assert(__is_assignable(S1&, const S1&));
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
index d6c32ca..6ae329a 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-libc-functions.cpp
@@ -47,11 +47,24 @@ namespace std {
     T *c_str();
     T *data();
     unsigned size_bytes();
+    unsigned size();
   };
 
   typedef basic_string<char> string;
   typedef basic_string<wchar_t> wstring;
 
+  template<typename T>
+  struct basic_string_view {
+    T *c_str() const noexcept;
+    T *data()  const noexcept;
+    unsigned size();
+    const T* begin() const noexcept;
+    const T* end() const noexcept;
+  };
+
+  typedef basic_string_view<char> string_view;
+  typedef basic_string_view<wchar_t> wstring_view;
+
   // C function under std:
   void memcpy();
   void strcpy();
@@ -134,6 +147,29 @@ void safe_examples(std::string s1, int *p) {
   snprintf(nullptr, 0, "%s%d%s%p%s", __PRETTY_FUNCTION__, *p, "hello", s1.c_str());           // no warn
 }
 
+void test_sarg_precision(std::string Str, std::string_view Sv, std::wstring_view WSv,
+			 std::span<char> SpC, std::span<int> SpI) {
+  printf("%.*s");
+  printf("%.*s", (int)Str.size(), Str.data());
+  printf("%.*s", (int)Str.size_bytes(), Str.data());
+  printf("%.*s", (int)Sv.size(), Sv.data());
+  printf("%.*s", (int)SpC.size(), SpC.data());
+  printf("%.*s", SpC.size(), SpC.data());
+  printf("%.*ls", WSv.size(), WSv.data());
+  printf("%.*s", SpC.data());  // no warn because `SpC.data()` is passed to the precision while the actually string pointer is not given
+
+  printf("%.*s", SpI.size(), SpI.data()); // expected-warning {{function 'printf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}}
+  printf("%.*s", SpI.size(), SpC.data()); // expected-warning {{function 'printf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}}
+  printf("%.*s", WSv.size(), WSv.data()); // expected-warning {{function 'printf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}}
+
+  char a[10];
+  int  b[10];
+
+  printf("%.10s", a);
+  printf("%.11s", a); // expected-warning {{function 'printf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}}
+  printf("%.10s", b); // expected-warning {{function 'printf' is unsafe}} expected-note{{string argument is not guaranteed to be null-terminated}}
+}
+
 
 void g(char *begin, char *end, char *p, std::span<char> s) {
   std::copy(begin, end, p); // no warn
diff --git a/clang/test/SemaHLSL/BuiltIns/D3DCOLORtoUBYTE4-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/D3DCOLORtoUBYTE4-errors.hlsl
index e9ba851..e9bf4c9 100644
--- a/clang/test/SemaHLSL/BuiltIns/D3DCOLORtoUBYTE4-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/D3DCOLORtoUBYTE4-errors.hlsl
@@ -25,5 +25,5 @@ struct S {
 int4 struct_arg(S v) {
     return D3DCOLORtoUBYTE4(v);
     // expected-error@-1 {{no matching function for call to 'D3DCOLORtoUBYTE4'}}
-    // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: no known conversion from 'S' to 'vector<float, 4>' (vector of 4 'float' values) for 1st argument}}
+    // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: no known conversion from 'S' to 'float4' (aka 'vector<float, 4>') for 1st argument}}
 }
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl
index 3247380..83c63f1 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl
@@ -1,7 +1,21 @@
 // REQUIRES: amdgpu-registered-target
 // RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-- -target-cpu gfx1250 -verify -S -o - %s
 
+typedef unsigned int uint;
+typedef unsigned short int ushort;
 typedef int    v2i   __attribute__((ext_vector_type(2)));
+typedef unsigned int __attribute__((ext_vector_type(2))) uint2;
+typedef unsigned int __attribute__((ext_vector_type(3))) uint3;
+typedef __bf16 __attribute__((ext_vector_type(8))) bfloat8;
+typedef __bf16 __attribute__((ext_vector_type(16))) bfloat16;
+typedef __bf16 __attribute__((ext_vector_type(32))) bfloat32;
+typedef half __attribute__((ext_vector_type(8))) half8;
+typedef half __attribute__((ext_vector_type(16))) half16;
+typedef half __attribute__((ext_vector_type(32))) half32;
+typedef float __attribute__((ext_vector_type(8))) float8;
+typedef float __attribute__((ext_vector_type(16))) float16;
+typedef float __attribute__((ext_vector_type(32))) float32;
+
 typedef int    v4i   __attribute__((ext_vector_type(4)));
 typedef int    v8i   __attribute__((ext_vector_type(8)));
 
@@ -29,6 +43,32 @@ void test__builtin_amdgcn_cvt_f16_bf8(int a, int b) {
   __builtin_amdgcn_cvt_f16_bf8(a, b); // expected-error {{'__builtin_amdgcn_cvt_f16_bf8' must be a constant integer}}
 }
 
+void test_cvt_scale_pk(global half8 *outh8, global bfloat8 *outy8, uint2 src2,
+                       global float32 *outf32, global half16 *outh16, global bfloat16 *outy16,
+                       global float16 *outf16, uint3 src3,
+                       global float8 *outf8, uint src1, uint scale, uint scale_sel)
+{
+  *outh8 = __builtin_amdgcn_cvt_scale_pk8_f16_fp8(src2, scale, scale_sel); // expected-error {{'__builtin_amdgcn_cvt_scale_pk8_f16_fp8' must be a constant integer}}
+  *outy8 = __builtin_amdgcn_cvt_scale_pk8_bf16_fp8(src2, scale, scale_sel); // expected-error {{'__builtin_amdgcn_cvt_scale_pk8_bf16_fp8' must be a constant integer}}
+  *outh8 = __builtin_amdgcn_cvt_scale_pk8_f16_bf8(src2, scale, scale_sel); // expected-error {{'__builtin_amdgcn_cvt_scale_pk8_f16_bf8' must be a constant integer}}
+  *outy8 = __builtin_amdgcn_cvt_scale_pk8_bf16_bf8(src2, scale, scale_sel); // expected-error {{'__builtin_amdgcn_cvt_scale_pk8_bf16_bf8' must be a constant integer}}
+  *outh8 = __builtin_amdgcn_cvt_scale_pk8_f16_fp4(src1, scale, scale_sel); // expected-error {{'__builtin_amdgcn_cvt_scale_pk8_f16_fp4' must be a constant integer}}
+  *outy8 = __builtin_amdgcn_cvt_scale_pk8_bf16_fp4(src1, scale, scale_sel); // expected-error {{'__builtin_amdgcn_cvt_scale_pk8_bf16_fp4' must be a constant integer}}
+  *outf8 = __builtin_amdgcn_cvt_scale_pk8_f32_fp8(src2, scale, scale_sel); // expected-error {{'__builtin_amdgcn_cvt_scale_pk8_f32_fp8' must be a constant integer}}
+  *outf8 = __builtin_amdgcn_cvt_scale_pk8_f32_bf8(src2, scale, scale_sel); // expected-error {{'__builtin_amdgcn_cvt_scale_pk8_f32_bf8' must be a constant integer}}
+  *outf8 = __builtin_amdgcn_cvt_scale_pk8_f32_fp4(src1, scale, scale_sel); // expected-error {{'__builtin_amdgcn_cvt_scale_pk8_f32_fp4' must be a constant integer}}
+
+  *outh8 = __builtin_amdgcn_cvt_scale_pk8_f16_fp8(src2, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+  *outy8 = __builtin_amdgcn_cvt_scale_pk8_bf16_fp8(src2, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+  *outh8 = __builtin_amdgcn_cvt_scale_pk8_f16_bf8(src2, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+  *outy8 = __builtin_amdgcn_cvt_scale_pk8_bf16_bf8(src2, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+  *outh8 = __builtin_amdgcn_cvt_scale_pk8_f16_fp4(src1, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+  *outy8 = __builtin_amdgcn_cvt_scale_pk8_bf16_fp4(src1, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+  *outf8 = __builtin_amdgcn_cvt_scale_pk8_f32_fp8(src2, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+  *outf8 = __builtin_amdgcn_cvt_scale_pk8_f32_bf8(src2, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+  *outf8 = __builtin_amdgcn_cvt_scale_pk8_f32_fp4(src1, scale, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+}
+
 void test_amdgcn_load_monitor(global int* b32gaddr, global v2i* b64gaddr, global v4i* b128gaddr, int *b32faddr, v2i* b64faddr, v4i *b128faddr,
                               global int* b32out, global v2i* b64out, global v4i* b128out, int cpol)
 {
diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-appertainment.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-appertainment.cpp
index 872a44d..4774c8e 100644
--- a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-appertainment.cpp
+++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-appertainment.cpp
@@ -43,10 +43,11 @@ template<int, int = 0> struct KN;
 ////////////////////////////////////////////////////////////////////////////////
 
 // Function declaration with GNU attribute spelling
+// expected-warning@+1 {{unknown attribute 'sycl_kernel_entry_point' ignored}}
 __attribute__((sycl_kernel_entry_point(KN<1>)))
 void ok1();
 
-// Function declaration with Clang attribute spelling.
+// Function declaration with C++11 attribute spelling.
 [[clang::sycl_kernel_entry_point(KN<2>)]]
 void ok2();
 
@@ -142,7 +143,7 @@ struct S15 {
 // on occassion), main() still can't function as a SYCL kernel entry point,
 // so this test ensures such attempted uses of the attribute are rejected.
 struct Smain;
-// expected-error@+1 {{'sycl_kernel_entry_point' attribute only applies to functions with a 'void' return type}}
+// expected-error@+1 {{'clang::sycl_kernel_entry_point' attribute only applies to functions with a 'void' return type}}
 [[clang::sycl_kernel_entry_point(Smain)]]
 int main();
 
@@ -164,7 +165,7 @@ struct B2 {
 
 struct B3 {
   // Non-static member function declaration.
-  // expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a non-static member function}}
+  // expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a non-static member function}}
   [[clang::sycl_kernel_entry_point(BADKN<3>)]]
   void bad3();
 };
@@ -210,14 +211,14 @@ enum {
 };
 
 // Attribute added after the definition.
-// expected-error@+3 {{'sycl_kernel_entry_point' attribute cannot be added to a function after the function is defined}}
+// expected-error@+3 {{the 'clang::sycl_kernel_entry_point' attribute cannot be added to a function after the function is defined}}
 // expected-note@+1 {{previous definition is here}}
 void bad15() {}
 [[clang::sycl_kernel_entry_point(BADKN<15>)]]
 void bad15();
 
 // The function must return void.
-// expected-error@+1 {{'sycl_kernel_entry_point' attribute only applies to functions with a 'void' return type}}
+// expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute only applies to functions with a 'void' return type}}
 [[clang::sycl_kernel_entry_point(BADKN<16>)]]
 int bad16();
 
@@ -230,12 +231,12 @@ void bad17(void (fp [[clang::sycl_kernel_entry_point(BADKN<17>)]])());
 // FIXME: and the C++ standard is unclear regarding whether such attributes are
 // FIXME: permitted. P3324 (Attributes for namespace aliases, template
 // FIXME: parameters, and lambda captures) seeks to clarify the situation.
-// FIXME-expected-error@+1 {{'sycl_kernel_entry_point' attribute only applies to functions}}
+// FIXME-expected-error@+1 {{'clang::sycl_kernel_entry_point' attribute only applies to functions}}
 template<void (fp [[clang::sycl_kernel_entry_point(BADKN<18>)]])()>
 void bad18();
 
 #if __cplusplus >= 202002L
-// expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a coroutine}}
+// expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a coroutine}}
 [[clang::sycl_kernel_entry_point(BADKN<19>)]]
 void bad19() {
   co_return;
@@ -243,36 +244,36 @@ void bad19() {
 #endif
 
 struct B20 {
-  // expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a non-static member function}}
+  // expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a non-static member function}}
   [[clang::sycl_kernel_entry_point(BADKN<20>)]]
   B20();
 };
 
 struct B21 {
-  // expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a non-static member function}}
+  // expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a non-static member function}}
   [[clang::sycl_kernel_entry_point(BADKN<21>)]]
   ~B21();
 };
 
-// expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a variadic function}}
+// expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a variadic function}}
 [[clang::sycl_kernel_entry_point(BADKN<22>)]]
 void bad22(...);
 
-// expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a deleted function}}
+// expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a deleted function}}
 [[clang::sycl_kernel_entry_point(BADKN<23>)]]
 void bad23() = delete;
 
-// expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a constexpr function}}
+// expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a constexpr function}}
 [[clang::sycl_kernel_entry_point(BADKN<24>)]]
 constexpr void bad24() {}
 
 #if __cplusplus >= 202002L
-// expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a consteval function}}
+// expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a consteval function}}
 [[clang::sycl_kernel_entry_point(BADKN<25>)]]
 consteval void bad25() {}
 #endif
 
-// expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a function declared with the 'noreturn' attribute}}
+// expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a function declared with the 'noreturn' attribute}}
 [[clang::sycl_kernel_entry_point(BADKN<26>)]]
 [[noreturn]] void bad26();
 
@@ -283,7 +284,7 @@ __attribute__((target("sse4.2"))) void bad27();
 
 template<typename KNT>
 struct B28 {
-  // expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a deleted function}}
+  // expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a deleted function}}
   [[clang::sycl_kernel_entry_point(KNT)]]
   friend void bad28() = delete;
 };
@@ -291,7 +292,7 @@ struct B28 {
 #if __cplusplus >= 202002L
 template<typename KNT, typename T>
 struct B29 {
-  // expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a defaulted function}}
+  // expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a defaulted function}}
   [[clang::sycl_kernel_entry_point(KNT)]]
   friend T operator==(B29, B29) = default;
 };
@@ -300,7 +301,7 @@ struct B29 {
 #if __cplusplus >= 202002L
 template<typename KNT>
 struct B30 {
-  // expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a coroutine}}
+  // expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a coroutine}}
   [[clang::sycl_kernel_entry_point(KNT)]]
   friend void bad30() { co_return; }
 };
@@ -308,14 +309,14 @@ struct B30 {
 
 template<typename KNT>
 struct B31 {
-  // expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a variadic function}}
+  // expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a variadic function}}
   [[clang::sycl_kernel_entry_point(KNT)]]
   friend void bad31(...) {}
 };
 
 template<typename KNT>
 struct B32 {
-  // expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a constexpr function}}
+  // expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a constexpr function}}
   [[clang::sycl_kernel_entry_point(KNT)]]
   friend constexpr void bad32() {}
 };
@@ -323,7 +324,7 @@ struct B32 {
 #if __cplusplus >= 202002L
 template<typename KNT>
 struct B33 {
-  // expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a consteval function}}
+  // expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a consteval function}}
   [[clang::sycl_kernel_entry_point(KNT)]]
   friend consteval void bad33() {}
 };
@@ -331,31 +332,31 @@ struct B33 {
 
 template<typename KNT>
 struct B34 {
-  // expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a function declared with the 'noreturn' attribute}}
+  // expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a function declared with the 'noreturn' attribute}}
   [[clang::sycl_kernel_entry_point(KNT)]]
   [[noreturn]] friend void bad34() {}
 };
 
 #if __cplusplus >= 202302L
-// expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a non-static member function}}
+// expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a non-static member function}}
 auto bad35 = [] [[clang::sycl_kernel_entry_point(BADKN<35>)]] -> void {};
 #endif
 
 #if __cplusplus >= 202302L
-// expected-error@+1 {{'sycl_kernel_entry_point' attribute only applies to functions with a non-deduced 'void' return type}}
+// expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute only applies to functions with a non-deduced 'void' return type}}
 auto bad36 = [] [[clang::sycl_kernel_entry_point(BADKN<36>)]] static {};
 #endif
 
 #if __cplusplus >= 202302L
-// expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a coroutine}}
+// expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a coroutine}}
 auto bad37 = [] [[clang::sycl_kernel_entry_point(BADKN<37>)]] static -> void { co_return; };
 #endif
 
-// expected-error@+1 {{'sycl_kernel_entry_point' attribute cannot be applied to a function defined with a function try block}}
+// expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a function defined with a function try block}}
 [[clang::sycl_kernel_entry_point(BADKN<38>)]]
 void bad38() try {} catch(...) {}
 
-// expected-error@+2 {{'sycl_kernel_entry_point' attribute cannot be applied to a function defined with a function try block}}
+// expected-error@+2 {{the 'clang::sycl_kernel_entry_point' attribute cannot be applied to a function defined with a function try block}}
 template<typename>
 [[clang::sycl_kernel_entry_point(BADKN<39>)]]
 void bad39() try {} catch(...) {}
diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-module.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-module.cpp
index 83c3e5c..8788e14 100644
--- a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-module.cpp
+++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-module.cpp
@@ -71,29 +71,29 @@ template void m2_test8<KN<8>>();
 #include "m2.h"
 
 // Expected diagnostics for m1_test3() and m2_test3():
-// expected-error@m2.h:4 {{'sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}}
+// expected-error@m2.h:4 {{the 'clang::sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}}
 // expected-note@m1.h:12 {{previous declaration is here}}
 
 // Expected diagnostics for m1_test4<KN<4>>() and m2_test4<KN<4>>():
-// expected-error@m2.h:8 {{'sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}}
+// expected-error@m2.h:8 {{the 'clang::sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}}
 // expected-note@m1.h:16 {{previous declaration is here}}
 
-// expected-error@+3 {{'sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}}
+// expected-error@+3 {{the 'clang::sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}}
 // expected-note@m1.h:4 {{previous declaration is here}}
 [[clang::sycl_kernel_entry_point(KN<5>)]]
 void test5() {}
 
-// expected-error@+3 {{'sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}}
+// expected-error@+3 {{the 'clang::sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}}
 // expected-note@m1.h:8 {{previous declaration is here}}
 [[clang::sycl_kernel_entry_point(KN<6>)]]
 void test6() {}
 
-// expected-error@+3 {{'sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}}
+// expected-error@+3 {{the 'clang::sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}}
 // expected-note@m2.h:12 {{previous declaration is here}}
 [[clang::sycl_kernel_entry_point(KN<7>)]]
 void test7() {}
 
-// expected-error@+3 {{'sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}}
+// expected-error@+3 {{the 'clang::sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}}
 // expected-note@m2.h:16 {{previous declaration is here}}
 [[clang::sycl_kernel_entry_point(KN<8>)]]
 void test8() {}
diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-pch.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-pch.cpp
index 0814d89..0575a7a 100644
--- a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-pch.cpp
+++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name-pch.cpp
@@ -25,12 +25,12 @@ template void pch_test2<KN<2>>();
 
 
 #--- test.cpp
-// expected-error@+3 {{'sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}}
+// expected-error@+3 {{the 'clang::sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}}
 // expected-note@pch.h:4 {{previous declaration is here}}
 [[clang::sycl_kernel_entry_point(KN<1>)]]
 void test1() {}
 
-// expected-error@+3 {{'sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}}
+// expected-error@+3 {{the 'clang::sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}}
 // expected-note@pch.h:8 {{previous declaration is here}}
 [[clang::sycl_kernel_entry_point(KN<2>)]]
 void test2() {}
diff --git a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name.cpp b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name.cpp
index 78dd896..c7b8393 100644
--- a/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name.cpp
+++ b/clang/test/SemaSYCL/sycl-kernel-entry-point-attr-kernel-name.cpp
@@ -7,7 +7,7 @@
 // specification.
 
 struct S1;
-// expected-warning@+3 {{redundant 'sycl_kernel_entry_point' attribute}}
+// expected-warning@+3 {{redundant 'clang::sycl_kernel_entry_point' attribute}}
 // expected-note@+1  {{previous attribute is here}}
 [[clang::sycl_kernel_entry_point(S1),
   clang::sycl_kernel_entry_point(S1)]]
@@ -46,13 +46,13 @@ enum E9 : int; // #E9-decl
 struct B10 {
   struct MS;
 };
-// FIXME-expected-error@+1 {{'sycl_kernel_entry_point' attribute argument must be a forward declarable class type}}
+// FIXME-expected-error@+1 {{the 'clang::sycl_kernel_entry_point' attribute argument must be a forward declarable class type}}
 [[clang::sycl_kernel_entry_point(B10::MS)]] void bad10();
 
 struct B11 {
   struct MS;
 };
-// FIXME-expected-error@+3 {{'sycl_kernel_entry_point' attribute argument must be a forward declarable class type}}
+// FIXME-expected-error@+3 {{the 'clang::sycl_kernel_entry_point' attribute argument must be a forward declarable class type}}
 template<typename T>
 [[clang::sycl_kernel_entry_point(typename T::MS)]] void bad11() {}
 template void bad11<B11>();
@@ -60,35 +60,35 @@ template void bad11<B11>();
 template<typename T>
 [[clang::sycl_kernel_entry_point(T)]] void bad12();
 void f12() {
-  // FIXME-expected-error@+2 {{'sycl_kernel_entry_point' attribute argument must be a forward declarable class type}}
+  // FIXME-expected-error@+2 {{the 'clang::sycl_kernel_entry_point' attribute argument must be a forward declarable class type}}
   struct LS;
   bad12<LS>();
 }
 
 struct B13_1;
 struct B13_2;
-// expected-error@+3 {{'sycl_kernel_entry_point' kernel name argument does not match prior declaration: 'B13_2' vs 'B13_1'}}
+// expected-error@+3 {{the 'clang::sycl_kernel_entry_point' kernel name argument does not match prior declaration: 'B13_2' vs 'B13_1'}}
 // expected-note@+1  {{'bad13' declared here}}
 [[clang::sycl_kernel_entry_point(B13_1)]] void bad13();
 [[clang::sycl_kernel_entry_point(B13_2)]] void bad13() {}
 
 struct B14_1;
 struct B14_2;
-// expected-error@+3 {{'sycl_kernel_entry_point' kernel name argument does not match prior declaration: 'B14_2' vs 'B14_1'}}
+// expected-error@+3 {{the 'clang::sycl_kernel_entry_point' kernel name argument does not match prior declaration: 'B14_2' vs 'B14_1'}}
 // expected-note@+1  {{previous attribute is here}}
 [[clang::sycl_kernel_entry_point(B14_1),
   clang::sycl_kernel_entry_point(B14_2)]]
 void bad14();
 
 struct B15;
-// expected-error@+3 {{'sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}}
+// expected-error@+3 {{the 'clang::sycl_kernel_entry_point' kernel name argument conflicts with a previous declaration}}
 // expected-note@+1  {{previous declaration is here}}
 [[clang::sycl_kernel_entry_point(B15)]] void bad15_1();
 [[clang::sycl_kernel_entry_point(B15)]] void bad15_2();
 
 struct B16_1;
 struct B16_2;
-// expected-error@+4 {{'sycl_kernel_entry_point' kernel name argument does not match prior declaration: 'B16_2' vs 'B16_1'}}
+// expected-error@+4 {{the 'clang::sycl_kernel_entry_point' kernel name argument does not match prior declaration: 'B16_2' vs 'B16_1'}}
 // expected-note@+1  {{'bad16' declared here}}
 [[clang::sycl_kernel_entry_point(B16_1)]] void bad16();
 void bad16(); // The attribute from the previous declaration is inherited.
diff --git a/clang/test/SemaTemplate/concepts.cpp b/clang/test/SemaTemplate/concepts.cpp
index 663bc98..d63ad01 100644
--- a/clang/test/SemaTemplate/concepts.cpp
+++ b/clang/test/SemaTemplate/concepts.cpp
@@ -1228,26 +1228,26 @@ template <KnownKind T> struct KnownType {
 
 }
 
-namespace GH115838 {
+namespace CWG2369_Regression_2 {
 
-template<typename T> concept has_x = requires(T t) {{ t.x };};
-
-class Publ { public:    int x = 0; };
-class Priv { private:   int x = 0; };
-class Prot { protected: int x = 0; };
-class Same { protected: int x = 0; };
-
-template<typename T> class D;
-template<typename T> requires ( has_x<T>) class D<T>: public T { public: static constexpr bool has = 1; };
-template<typename T> requires (!has_x<T>) class D<T>: public T { public: static constexpr bool has = 0; };
+template <typename T>
+concept HasFastPropertyForAttribute =
+    requires(T element, int name) { element.propertyForAttribute(name); };
+
+template <typename OwnerType>
+struct SVGPropertyOwnerRegistry {
+  static int fastAnimatedPropertyLookup() {
+    static_assert (HasFastPropertyForAttribute<OwnerType>);
+    return 1;
+  }
+};
 
-// "Same" is identical to "Prot" but queried before used.
-static_assert(!has_x<Same>,  "Protected should be invisible.");
-static_assert(!D<Same>::has, "Protected should be invisible.");
+class SVGCircleElement {
+  friend SVGPropertyOwnerRegistry<SVGCircleElement>;
+  void propertyForAttribute(int);
+};
 
-static_assert( D<Publ>::has, "Public should be visible.");
-static_assert(!D<Priv>::has, "Private should be invisible.");
-static_assert(!D<Prot>::has, "Protected should be invisible.");
+int i = SVGPropertyOwnerRegistry<SVGCircleElement>::fastAnimatedPropertyLookup();
 
 }
 
diff --git a/clang/tools/cir-lsp-server/CMakeLists.txt b/clang/tools/cir-lsp-server/CMakeLists.txt
index aad2646..f421215 100644
--- a/clang/tools/cir-lsp-server/CMakeLists.txt
+++ b/clang/tools/cir-lsp-server/CMakeLists.txt
@@ -1,26 +1,23 @@
-get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
-get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS)
-
 include_directories(${LLVM_MAIN_SRC_DIR}/../mlir/include)
 include_directories(${CMAKE_BINARY_DIR}/tools/mlir/include)
 
 set(LIBS
-  ${dialect_libs}
-  ${conversion_libs}
   ${test_libs}
   clangCIR
   clangCIRLoweringDirectToLLVM
-  MLIRCIR
   MLIRAffineAnalysis
   MLIRAnalysis
+  MLIRCIR
   MLIRDialect
+  MLIRIR
   MLIRLspServerLib
   MLIRParser
   MLIRPass
-  MLIRTransforms
-  MLIRTransformUtils
+  MLIRRegisterAllDialects
+  MLIRRegisterAllPasses
   MLIRSupport
-  MLIRIR
+  MLIRTransformUtils
+  MLIRTransforms
   )
 
 add_mlir_tool(cir-lsp-server
diff --git a/clang/tools/cir-opt/cir-opt.cpp b/clang/tools/cir-opt/cir-opt.cpp
index 3dad3b1..c4d29a2 100644
--- a/clang/tools/cir-opt/cir-opt.cpp
+++ b/clang/tools/cir-opt/cir-opt.cpp
@@ -17,11 +17,12 @@
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
-#include "mlir/InitAllPasses.h"
+#include "mlir/IR/BuiltinDialect.h"
 #include "mlir/Pass/PassManager.h"
 #include "mlir/Pass/PassOptions.h"
 #include "mlir/Pass/PassRegistry.h"
 #include "mlir/Tools/mlir-opt/MlirOptMain.h"
+#include "mlir/Transforms/Passes.h"
 #include "clang/CIR/Dialect/IR/CIRDialect.h"
 #include "clang/CIR/Dialect/Passes.h"
 #include "clang/CIR/Passes.h"
diff --git a/clang/tools/clang-extdef-mapping/ClangExtDefMapGen.cpp b/clang/tools/clang-extdef-mapping/ClangExtDefMapGen.cpp
index 0b621b8..ddb2944 100644
--- a/clang/tools/clang-extdef-mapping/ClangExtDefMapGen.cpp
+++ b/clang/tools/clang-extdef-mapping/ClangExtDefMapGen.cpp
@@ -134,10 +134,9 @@ GetDiagnosticsEngine(DiagnosticOptions &DiagOpts) {
   TextDiagnosticPrinter *DiagClient =
       new TextDiagnosticPrinter(llvm::errs(), DiagOpts);
   DiagClient->setPrefix("clang-extdef-mappping");
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
 
-  IntrusiveRefCntPtr<DiagnosticsEngine> DiagEngine(
-      new DiagnosticsEngine(DiagID, DiagOpts, DiagClient));
+  auto DiagEngine = llvm::makeIntrusiveRefCnt<DiagnosticsEngine>(
+      DiagnosticIDs::create(), DiagOpts, DiagClient);
   Diags.swap(DiagEngine);
 
   // Retain this one time so it's not destroyed by ASTUnit::LoadFromASTFile
diff --git a/clang/tools/clang-format/ClangFormat.cpp b/clang/tools/clang-format/ClangFormat.cpp
index 24ad3cb..5f6502f 100644
--- a/clang/tools/clang-format/ClangFormat.cpp
+++ b/clang/tools/clang-format/ClangFormat.cpp
@@ -237,12 +237,11 @@ static bool parseLineRange(StringRef Input, unsigned &FromLine,
 
 static bool fillRanges(MemoryBuffer *Code,
                        std::vector<tooling::Range> &Ranges) {
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto InMemoryFileSystem =
+      makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   FileManager Files(FileSystemOptions(), InMemoryFileSystem);
   DiagnosticOptions DiagOpts;
-  DiagnosticsEngine Diagnostics(
-      IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs), DiagOpts);
+  DiagnosticsEngine Diagnostics(DiagnosticIDs::create(), DiagOpts);
   SourceManager Sources(Diagnostics, Files);
   const auto ID = createInMemoryFile("<irrelevant>", *Code, Sources, Files,
                                      InMemoryFileSystem.get());
@@ -511,15 +510,14 @@ static bool format(StringRef FileName, bool ErrorOnIncompleteFormat = false) {
   if (OutputXML) {
     outputXML(Replaces, FormatChanges, Status, Cursor, CursorPosition);
   } else {
-    IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-        new llvm::vfs::InMemoryFileSystem);
+    auto InMemoryFileSystem =
+        makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
     FileManager Files(FileSystemOptions(), InMemoryFileSystem);
 
     DiagnosticOptions DiagOpts;
     ClangFormatDiagConsumer IgnoreDiagnostics;
-    DiagnosticsEngine Diagnostics(
-        IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs), DiagOpts,
-        &IgnoreDiagnostics, false);
+    DiagnosticsEngine Diagnostics(DiagnosticIDs::create(), DiagOpts,
+                                  &IgnoreDiagnostics, false);
     SourceManager Sources(Diagnostics, Files);
     FileID ID = createInMemoryFile(AssumedFileName, *Code, Sources, Files,
                                    InMemoryFileSystem.get());
diff --git a/clang/tools/clang-fuzzer/handle-cxx/handle_cxx.cpp b/clang/tools/clang-fuzzer/handle-cxx/handle_cxx.cpp
index 98925c0..8259361 100644
--- a/clang/tools/clang-fuzzer/handle-cxx/handle_cxx.cpp
+++ b/clang/tools/clang-fuzzer/handle-cxx/handle_cxx.cpp
@@ -33,9 +33,8 @@ void clang_fuzzer::HandleCXX(const std::string &S,
       new FileManager(FileSystemOptions()));
   IgnoringDiagConsumer Diags;
   DiagnosticOptions DiagOpts;
-  DiagnosticsEngine Diagnostics(
-      IntrusiveRefCntPtr<clang::DiagnosticIDs>(new DiagnosticIDs()), DiagOpts,
-      &Diags, false);
+  DiagnosticsEngine Diagnostics(DiagnosticIDs::create(), DiagOpts, &Diags,
+                                false);
   std::unique_ptr<clang::CompilerInvocation> Invocation(
       tooling::newInvocation(&Diagnostics, CC1Args, /*BinaryName=*/nullptr));
   std::unique_ptr<llvm::MemoryBuffer> Input =
@@ -49,4 +48,3 @@ void clang_fuzzer::HandleCXX(const std::string &S,
   action->runInvocation(std::move(Invocation), Files.get(), PCHContainerOps,
                         &Diags);
 }
-
diff --git a/clang/tools/clang-import-test/clang-import-test.cpp b/clang/tools/clang-import-test/clang-import-test.cpp
index 7f5df92..ab021a5 100644
--- a/clang/tools/clang-import-test/clang-import-test.cpp
+++ b/clang/tools/clang-import-test/clang-import-test.cpp
@@ -236,7 +236,7 @@ std::unique_ptr<CodeGenerator> BuildCodeGen(CompilerInstance &CI,
                                             llvm::LLVMContext &LLVMCtx) {
   StringRef ModuleName("$__module");
   return std::unique_ptr<CodeGenerator>(CreateLLVMCodeGen(
-      CI.getDiagnostics(), ModuleName, &CI.getVirtualFileSystem(),
+      CI.getDiagnostics(), ModuleName, CI.getVirtualFileSystemPtr(),
       CI.getHeaderSearchOpts(), CI.getPreprocessorOpts(), CI.getCodeGenOpts(),
       LLVMCtx));
 }
diff --git a/clang/tools/clang-installapi/ClangInstallAPI.cpp b/clang/tools/clang-installapi/ClangInstallAPI.cpp
index 60e9fc4..70091fc 100644
--- a/clang/tools/clang-installapi/ClangInstallAPI.cpp
+++ b/clang/tools/clang-installapi/ClangInstallAPI.cpp
@@ -77,16 +77,17 @@ static bool run(ArrayRef<const char *> Args, const char *ProgName) {
       ArrayRef(Args).slice(1), MissingArgIndex, MissingArgCount);
   ParseDiagnosticArgs(DiagOpts, ParsedArgs);
 
-  IntrusiveRefCntPtr<DiagnosticsEngine> Diag = new clang::DiagnosticsEngine(
-      new clang::DiagnosticIDs(), DiagOpts,
+  auto Diag = llvm::makeIntrusiveRefCnt<clang::DiagnosticsEngine>(
+      clang::DiagnosticIDs::create(), DiagOpts,
       new clang::TextDiagnosticPrinter(llvm::errs(), DiagOpts));
 
   // Create file manager for all file operations and holding in-memory generated
   // inputs.
-  llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayFileSystem(
-      new llvm::vfs::OverlayFileSystem(llvm::vfs::getRealFileSystem()));
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto OverlayFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(
+          llvm::vfs::getRealFileSystem());
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   OverlayFileSystem->pushOverlay(InMemoryFileSystem);
   IntrusiveRefCntPtr<clang::FileManager> FM(
       new FileManager(clang::FileSystemOptions(), OverlayFileSystem));
diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
index 9d34b62..1d91f5f2 100644
--- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -474,10 +474,10 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args,
   StringRef Arch = Args.getLastArgValue(OPT_arch_EQ);
   // Create a new file to write the linked device image to. Assume that the
   // input filename already has the device and architecture.
-  auto TempFileOrErr =
-      createOutputFile(sys::path::filename(ExecutableName) + "." +
-                           Triple.getArchName() + "." + Arch,
-                       "img");
+  std::string OutputFileBase =
+      "." + Triple.getArchName().str() + "." + Arch.str();
+  auto TempFileOrErr = createOutputFile(
+      sys::path::filename(ExecutableName) + OutputFileBase, "img");
   if (!TempFileOrErr)
     return TempFileOrErr.takeError();
 
@@ -486,6 +486,12 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args,
       "--no-default-config",
       "-o",
       *TempFileOrErr,
+      // Without -dumpdir, Clang will place auxiliary output files in the
+      // temporary directory of TempFileOrErr, where they will not easily be
+      // found by the user and might eventually be automatically removed.  Tell
+      // Clang to instead place them alongside the final executable.
+      "-dumpdir",
+      Args.MakeArgString(ExecutableName + OutputFileBase + ".img."),
       Args.MakeArgString("--target=" + Triple.getTriple()),
   };
 
diff --git a/clang/tools/clang-repl/CMakeLists.txt b/clang/tools/clang-repl/CMakeLists.txt
index 68d86dd..c3d14ce 100644
--- a/clang/tools/clang-repl/CMakeLists.txt
+++ b/clang/tools/clang-repl/CMakeLists.txt
@@ -19,14 +19,14 @@ if(MSVC)
   set_target_properties(clang-repl PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS 1)
 
   # RTTI/C++ symbols
-  set(clang_repl_exports ${clang_repl_exports} ??_7type_info@@6B@
-    ?__type_info_root_node@@3U__type_info_node@@A
-    ?nothrow@std@@3Unothrow_t@1@B
+  set(clang_repl_exports ${clang_repl_exports} ??_7type_info@@6B@,DATA
+    ?__type_info_root_node@@3U__type_info_node@@A,DATA
+    ?nothrow@std@@3Unothrow_t@1@B,DATA
   )
 
   # Compiler added symbols for static variables. NOT for VStudio < 2015
-  set(clang_repl_exports ${clang_repl_exports} _Init_thread_abort _Init_thread_epoch
-    _Init_thread_footer _Init_thread_header _tls_index
+  set(clang_repl_exports ${clang_repl_exports} _Init_thread_abort _Init_thread_epoch,DATA
+    _Init_thread_footer _Init_thread_header _tls_index,DATA
   )
 
   if(CMAKE_SIZEOF_VOID_P EQUAL 8)
@@ -50,7 +50,10 @@ if(MSVC)
   endif()
 
   # List to '/EXPORT:sym0 /EXPORT:sym1 /EXPORT:sym2 ...'
-  list(TRANSFORM clang_repl_exports PREPEND "LINKER:/EXPORT:")
+  # The 'SHELL' prefix tells CMake to use a space instead of comma as the
+  # separator between the driver and linker options, which we need since MSVC's
+  # linker uses `,DATA` as a suffix to indicate that data is being exported.
+  list(TRANSFORM clang_repl_exports PREPEND "LINKER:SHELL:/EXPORT:")
 
   set_property(TARGET clang-repl APPEND PROPERTY LINK_OPTIONS ${clang_repl_exports})
 
diff --git a/clang/tools/diagtool/ShowEnabledWarnings.cpp b/clang/tools/diagtool/ShowEnabledWarnings.cpp
index 0d1455d..bea0288 100644
--- a/clang/tools/diagtool/ShowEnabledWarnings.cpp
+++ b/clang/tools/diagtool/ShowEnabledWarnings.cpp
@@ -55,7 +55,6 @@ static char getCharForLevel(DiagnosticsEngine::Level Level) {
 
 static IntrusiveRefCntPtr<DiagnosticsEngine>
 createDiagnostics(unsigned int argc, char **argv) {
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagIDs(new DiagnosticIDs());
   DiagnosticOptions DiagOpts;
 
   // Buffer diagnostics from argument parsing so that we can output them using a
@@ -67,7 +66,8 @@ createDiagnostics(unsigned int argc, char **argv) {
   Args.push_back("diagtool");
   Args.append(argv, argv + argc);
   CreateInvocationOptions CIOpts;
-  CIOpts.Diags = new DiagnosticsEngine(DiagIDs, DiagOpts, DiagsBuffer);
+  CIOpts.Diags = llvm::makeIntrusiveRefCnt<DiagnosticsEngine>(
+      DiagnosticIDs::create(), DiagOpts, DiagsBuffer);
   std::unique_ptr<CompilerInvocation> Invocation =
       createInvocation(Args, CIOpts);
   if (!Invocation)
diff --git a/clang/tools/diagtool/TreeView.cpp b/clang/tools/diagtool/TreeView.cpp
index 7e47c74..13b8015 100644
--- a/clang/tools/diagtool/TreeView.cpp
+++ b/clang/tools/diagtool/TreeView.cpp
@@ -32,7 +32,7 @@ public:
   static bool isIgnored(unsigned DiagID) {
     // FIXME: This feels like a hack.
     static DiagnosticOptions DiagOpts;
-    static clang::DiagnosticsEngine Diags(new DiagnosticIDs, DiagOpts);
+    static clang::DiagnosticsEngine Diags(DiagnosticIDs::create(), DiagOpts);
     return Diags.isIgnored(DiagID, SourceLocation());
   }
 
diff --git a/clang/tools/driver/cc1_main.cpp b/clang/tools/driver/cc1_main.cpp
index 9b1e390..854ab3e 100644
--- a/clang/tools/driver/cc1_main.cpp
+++ b/clang/tools/driver/cc1_main.cpp
@@ -217,7 +217,7 @@ static int PrintEnabledExtensions(const TargetOptions& TargetOpts) {
 int cc1_main(ArrayRef<const char *> Argv, const char *Argv0, void *MainAddr) {
   ensureSufficientStack();
 
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
+  IntrusiveRefCntPtr<DiagnosticIDs> DiagID = DiagnosticIDs::create();
 
   // Register the support for object-file-wrapped Clang modules.
   auto PCHOps = std::make_shared<PCHContainerOperations>();
diff --git a/clang/tools/driver/cc1as_main.cpp b/clang/tools/driver/cc1as_main.cpp
index f938e7e..e9243ca 100644
--- a/clang/tools/driver/cc1as_main.cpp
+++ b/clang/tools/driver/cc1as_main.cpp
@@ -662,8 +662,7 @@ int cc1as_main(ArrayRef<const char *> Argv, const char *Argv0, void *MainAddr) {
   TextDiagnosticPrinter *DiagClient =
       new TextDiagnosticPrinter(errs(), DiagOpts);
   DiagClient->setPrefix("clang -cc1as");
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
-  DiagnosticsEngine Diags(DiagID, DiagOpts, DiagClient);
+  DiagnosticsEngine Diags(DiagnosticIDs::create(), DiagOpts, DiagClient);
 
   // Set an error handler, so that any LLVM backend diagnostics go through our
   // error handler.
diff --git a/clang/tools/driver/cc1gen_reproducer_main.cpp b/clang/tools/driver/cc1gen_reproducer_main.cpp
index 8d7171e..ddff0d0 100644
--- a/clang/tools/driver/cc1gen_reproducer_main.cpp
+++ b/clang/tools/driver/cc1gen_reproducer_main.cpp
@@ -119,8 +119,8 @@ generateReproducerForInvocationArguments(ArrayRef<const char *> Argv,
 
   DiagnosticOptions DiagOpts;
 
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
-  DiagnosticsEngine Diags(DiagID, DiagOpts, new IgnoringDiagConsumer());
+  DiagnosticsEngine Diags(DiagnosticIDs::create(), DiagOpts,
+                          new IgnoringDiagConsumer());
   auto VFS = llvm::vfs::getRealFileSystem();
   ProcessWarningOptions(Diags, DiagOpts, *VFS, /*ReportDiags=*/false);
   Driver TheDriver(ToolContext.Path, llvm::sys::getDefaultTargetTriple(), Diags,
diff --git a/clang/tools/driver/driver.cpp b/clang/tools/driver/driver.cpp
index 9ed8d4e..e5c3c4e 100644
--- a/clang/tools/driver/driver.cpp
+++ b/clang/tools/driver/driver.cpp
@@ -331,9 +331,7 @@ int clang_main(int Argc, char **Argv, const llvm::ToolContext &ToolContext) {
       new TextDiagnosticPrinter(llvm::errs(), *DiagOpts);
   FixupDiagPrefixExeName(DiagClient, ProgName);
 
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
-
-  DiagnosticsEngine Diags(DiagID, *DiagOpts, DiagClient);
+  DiagnosticsEngine Diags(DiagnosticIDs::create(), *DiagOpts, DiagClient);
 
   if (!DiagOpts->DiagnosticSerializationFile.empty()) {
     auto SerializedConsumer =
diff --git a/clang/tools/libclang/CIndexCodeCompletion.cpp b/clang/tools/libclang/CIndexCodeCompletion.cpp
index 8f6729b..adac7c3 100644
--- a/clang/tools/libclang/CIndexCodeCompletion.cpp
+++ b/clang/tools/libclang/CIndexCodeCompletion.cpp
@@ -357,8 +357,8 @@ static std::atomic<unsigned> CodeCompletionResultObjects;
 AllocatedCXCodeCompleteResults::AllocatedCXCodeCompleteResults(
     IntrusiveRefCntPtr<FileManager> FileMgr)
     : CXCodeCompleteResults(),
-      Diag(new DiagnosticsEngine(
-          IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs), DiagOpts)),
+      Diag(llvm::makeIntrusiveRefCnt<DiagnosticsEngine>(DiagnosticIDs::create(),
+                                                        DiagOpts)),
       FileMgr(std::move(FileMgr)),
       SourceMgr(new SourceManager(*Diag, *this->FileMgr)),
       CodeCompletionAllocator(
@@ -763,7 +763,7 @@ clang_codeCompleteAt_Impl(CXTranslationUnit TU, const char *complete_filename,
                     RemappedFiles, (options & CXCodeComplete_IncludeMacros),
                     (options & CXCodeComplete_IncludeCodePatterns),
                     IncludeBriefComments, Capture,
-                    CXXIdx->getPCHContainerOperations(), *Results->Diag,
+                    CXXIdx->getPCHContainerOperations(), Results->Diag,
                     Results->LangOpts, *Results->SourceMgr, *Results->FileMgr,
                     Results->Diagnostics, Results->TemporaryBuffers,
                     /*SyntaxOnlyAction=*/nullptr);
diff --git a/clang/unittests/AST/ASTVectorTest.cpp b/clang/unittests/AST/ASTVectorTest.cpp
index 66003b4..03da549 100644
--- a/clang/unittests/AST/ASTVectorTest.cpp
+++ b/clang/unittests/AST/ASTVectorTest.cpp
@@ -26,14 +26,13 @@ namespace {
 class ASTVectorTest : public ::testing::Test {
 protected:
   ASTVectorTest()
-      : FileMgr(FileMgrOpts), DiagID(new DiagnosticIDs()),
-        Diags(DiagID, DiagOpts, new IgnoringDiagConsumer()),
+      : FileMgr(FileMgrOpts),
+        Diags(DiagnosticIDs::create(), DiagOpts, new IgnoringDiagConsumer()),
         SourceMgr(Diags, FileMgr), Idents(LangOpts, nullptr),
         Ctxt(LangOpts, SourceMgr, Idents, Sels, Builtins, TU_Complete) {}
 
   FileSystemOptions FileMgrOpts;
   FileManager FileMgr;
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
   DiagnosticOptions DiagOpts;
   DiagnosticsEngine Diags;
   SourceManager SourceMgr;
diff --git a/clang/unittests/AST/CommentLexer.cpp b/clang/unittests/AST/CommentLexer.cpp
index dc10dae..99f4691 100644
--- a/clang/unittests/AST/CommentLexer.cpp
+++ b/clang/unittests/AST/CommentLexer.cpp
@@ -27,13 +27,12 @@ namespace {
 class CommentLexerTest : public ::testing::Test {
 protected:
   CommentLexerTest()
-      : FileMgr(FileMgrOpts), DiagID(new DiagnosticIDs()),
-        Diags(DiagID, DiagOpts, new IgnoringDiagConsumer()),
+      : FileMgr(FileMgrOpts),
+        Diags(DiagnosticIDs::create(), DiagOpts, new IgnoringDiagConsumer()),
         SourceMgr(Diags, FileMgr), Traits(Allocator, CommentOptions()) {}
 
   FileSystemOptions FileMgrOpts;
   FileManager FileMgr;
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
   DiagnosticOptions DiagOpts;
   DiagnosticsEngine Diags;
   SourceManager SourceMgr;
@@ -2006,4 +2005,3 @@ TEST_F(CommentLexerTest, MultipleComments) {
 
 } // end namespace comments
 } // end namespace clang
-
diff --git a/clang/unittests/AST/CommentParser.cpp b/clang/unittests/AST/CommentParser.cpp
index 67fabe5..3bd2bdb 100644
--- a/clang/unittests/AST/CommentParser.cpp
+++ b/clang/unittests/AST/CommentParser.cpp
@@ -33,13 +33,12 @@ const bool MY_DEBUG = true;
 class CommentParserTest : public ::testing::Test {
 protected:
   CommentParserTest()
-      : FileMgr(FileMgrOpts), DiagID(new DiagnosticIDs()),
-        Diags(DiagID, DiagOpts, new IgnoringDiagConsumer()),
+      : FileMgr(FileMgrOpts),
+        Diags(DiagnosticIDs::create(), DiagOpts, new IgnoringDiagConsumer()),
         SourceMgr(Diags, FileMgr), Traits(Allocator, CommentOptions()) {}
 
   FileSystemOptions FileMgrOpts;
   FileManager FileMgr;
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
   DiagnosticOptions DiagOpts;
   DiagnosticsEngine Diags;
   SourceManager SourceMgr;
diff --git a/clang/unittests/AST/CommentTextTest.cpp b/clang/unittests/AST/CommentTextTest.cpp
index 84ec51a..675173c 100644
--- a/clang/unittests/AST/CommentTextTest.cpp
+++ b/clang/unittests/AST/CommentTextTest.cpp
@@ -44,7 +44,7 @@ protected:
     // shouldn't matter.
     RawComment Comment(SourceMgr, CommentRange, EmptyOpts, /*Merged=*/true);
     DiagnosticOptions DiagOpts;
-    DiagnosticsEngine Diags(new DiagnosticIDs, DiagOpts);
+    DiagnosticsEngine Diags(DiagnosticIDs::create(), DiagOpts);
     return Comment.getFormattedText(SourceMgr, Diags);
   }
 };
diff --git a/clang/unittests/Analysis/MacroExpansionContextTest.cpp b/clang/unittests/Analysis/MacroExpansionContextTest.cpp
index 9874ea6..25a76ed 100644
--- a/clang/unittests/Analysis/MacroExpansionContextTest.cpp
+++ b/clang/unittests/Analysis/MacroExpansionContextTest.cpp
@@ -33,10 +33,10 @@ namespace {
 class MacroExpansionContextTest : public ::testing::Test {
 protected:
   MacroExpansionContextTest()
-      : InMemoryFileSystem(new llvm::vfs::InMemoryFileSystem),
+      : InMemoryFileSystem(
+            llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>()),
         FileMgr(FileSystemOptions(), InMemoryFileSystem),
-        DiagID(new DiagnosticIDs()),
-        Diags(DiagID, DiagOpts, new IgnoringDiagConsumer()),
+        Diags(DiagnosticIDs::create(), DiagOpts, new IgnoringDiagConsumer()),
         SourceMgr(Diags, FileMgr), TargetOpts(new TargetOptions()) {
     TargetOpts->Triple = "x86_64-pc-linux-unknown";
     Target = TargetInfo::CreateTargetInfo(Diags, *TargetOpts);
@@ -45,7 +45,6 @@ protected:
 
   IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem;
   FileManager FileMgr;
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
   DiagnosticOptions DiagOpts;
   DiagnosticsEngine Diags;
   SourceManager SourceMgr;
diff --git a/clang/unittests/Analysis/UnsafeBufferUsageTest.cpp b/clang/unittests/Analysis/UnsafeBufferUsageTest.cpp
index 9da2c58..f795918 100644
--- a/clang/unittests/Analysis/UnsafeBufferUsageTest.cpp
+++ b/clang/unittests/Analysis/UnsafeBufferUsageTest.cpp
@@ -12,13 +12,12 @@ namespace {
 class UnsafeBufferUsageTest : public ::testing::Test {
 protected:
   UnsafeBufferUsageTest()
-      : FileMgr(FileMgrOpts), DiagID(new DiagnosticIDs()),
-        Diags(DiagID, DiagOpts, new IgnoringDiagConsumer()),
+      : FileMgr(FileMgrOpts),
+        Diags(DiagnosticIDs::create(), DiagOpts, new IgnoringDiagConsumer()),
         SourceMgr(Diags, FileMgr) {}
 
   FileSystemOptions FileMgrOpts;
   FileManager FileMgr;
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
   DiagnosticOptions DiagOpts;
   DiagnosticsEngine Diags;
   SourceManager SourceMgr;
@@ -58,4 +57,4 @@ TEST_F(UnsafeBufferUsageTest, FixItHintsConflict) {
 
   Fixes = {H1, H2, H3, MkDummyHint(2, 23) /* overlaps H1, H2, and H3 */};
   EXPECT_TRUE(internal::anyConflict(Fixes, SourceMgr));
-}
-\ No newline at end of file
+}
diff --git a/clang/unittests/Basic/DiagnosticTest.cpp b/clang/unittests/Basic/DiagnosticTest.cpp
index b0a034e..4b3af00 100644
--- a/clang/unittests/Basic/DiagnosticTest.cpp
+++ b/clang/unittests/Basic/DiagnosticTest.cpp
@@ -47,7 +47,7 @@ using testing::IsEmpty;
 // Check that DiagnosticErrorTrap works with SuppressAllDiagnostics.
 TEST(DiagnosticTest, suppressAndTrap) {
   DiagnosticOptions DiagOpts;
-  DiagnosticsEngine Diags(new DiagnosticIDs(), DiagOpts,
+  DiagnosticsEngine Diags(DiagnosticIDs::create(), DiagOpts,
                           new IgnoringDiagConsumer());
   Diags.setSuppressAllDiagnostics(true);
 
@@ -78,7 +78,7 @@ TEST(DiagnosticTest, suppressAndTrap) {
 TEST(DiagnosticTest, fatalsAsError) {
   for (unsigned FatalsAsError = 0; FatalsAsError != 2; ++FatalsAsError) {
     DiagnosticOptions DiagOpts;
-    DiagnosticsEngine Diags(new DiagnosticIDs(), DiagOpts,
+    DiagnosticsEngine Diags(DiagnosticIDs::create(), DiagOpts,
                             new IgnoringDiagConsumer());
     Diags.setFatalsAsError(FatalsAsError);
 
@@ -102,7 +102,7 @@ TEST(DiagnosticTest, fatalsAsError) {
 
 TEST(DiagnosticTest, tooManyErrorsIsAlwaysFatal) {
   DiagnosticOptions DiagOpts;
-  DiagnosticsEngine Diags(new DiagnosticIDs(), DiagOpts,
+  DiagnosticsEngine Diags(DiagnosticIDs::create(), DiagOpts,
                           new IgnoringDiagConsumer());
   Diags.setFatalsAsError(true);
 
@@ -119,7 +119,7 @@ TEST(DiagnosticTest, tooManyErrorsIsAlwaysFatal) {
 // Check that soft RESET works as intended
 TEST(DiagnosticTest, softReset) {
   DiagnosticOptions DiagOpts;
-  DiagnosticsEngine Diags(new DiagnosticIDs(), DiagOpts,
+  DiagnosticsEngine Diags(DiagnosticIDs::create(), DiagOpts,
                           new IgnoringDiagConsumer());
 
   unsigned numWarnings = 0U, numErrors = 0U;
@@ -143,7 +143,7 @@ TEST(DiagnosticTest, softReset) {
 
 TEST(DiagnosticTest, diagnosticError) {
   DiagnosticOptions DiagOpts;
-  DiagnosticsEngine Diags(new DiagnosticIDs(), DiagOpts,
+  DiagnosticsEngine Diags(DiagnosticIDs::create(), DiagOpts,
                           new IgnoringDiagConsumer());
   PartialDiagnostic::DiagStorageAllocator Alloc;
   llvm::Expected<std::pair<int, int>> Value = DiagnosticError::create(
@@ -166,7 +166,7 @@ TEST(DiagnosticTest, diagnosticError) {
 
 TEST(DiagnosticTest, storedDiagEmptyWarning) {
   DiagnosticOptions DiagOpts;
-  DiagnosticsEngine Diags(new DiagnosticIDs(), DiagOpts);
+  DiagnosticsEngine Diags(DiagnosticIDs::create(), DiagOpts);
 
   class CaptureDiagnosticConsumer : public DiagnosticConsumer {
   public:
@@ -197,7 +197,7 @@ protected:
   llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS =
       llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   DiagnosticOptions DiagOpts;
-  DiagnosticsEngine Diags{new DiagnosticIDs(), DiagOpts};
+  DiagnosticsEngine Diags{DiagnosticIDs::create(), DiagOpts};
 
   llvm::ArrayRef<StoredDiagnostic> diags() {
     return CaptureConsumer.StoredDiags;
diff --git a/clang/unittests/Basic/FileManagerTest.cpp b/clang/unittests/Basic/FileManagerTest.cpp
index 88d778f..7b3e8bc 100644
--- a/clang/unittests/Basic/FileManagerTest.cpp
+++ b/clang/unittests/Basic/FileManagerTest.cpp
@@ -454,8 +454,7 @@ TEST_F(FileManagerTest, makeAbsoluteUsesVFS) {
                                                        : StringRef("/");
   llvm::sys::path::append(CustomWorkingDir, "some", "weird", "path");
 
-  auto FS = IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem>(
-      new llvm::vfs::InMemoryFileSystem);
+  auto FS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   // setCurrentworkingdirectory must finish without error.
   ASSERT_TRUE(!FS->setCurrentWorkingDirectory(CustomWorkingDir));
 
@@ -475,8 +474,7 @@ TEST_F(FileManagerTest, makeAbsoluteUsesVFS) {
 TEST_F(FileManagerTest, getVirtualFileFillsRealPathName) {
   SmallString<64> CustomWorkingDir = getSystemRoot();
 
-  auto FS = IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem>(
-      new llvm::vfs::InMemoryFileSystem);
+  auto FS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   // setCurrentworkingdirectory must finish without error.
   ASSERT_TRUE(!FS->setCurrentWorkingDirectory(CustomWorkingDir));
 
@@ -501,8 +499,7 @@ TEST_F(FileManagerTest, getVirtualFileFillsRealPathName) {
 TEST_F(FileManagerTest, getFileDontOpenRealPath) {
   SmallString<64> CustomWorkingDir = getSystemRoot();
 
-  auto FS = IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem>(
-      new llvm::vfs::InMemoryFileSystem);
+  auto FS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   // setCurrentworkingdirectory must finish without error.
   ASSERT_TRUE(!FS->setCurrentWorkingDirectory(CustomWorkingDir));
 
@@ -533,8 +530,7 @@ TEST_F(FileManagerTest, getBypassFile) {
   CustomWorkingDir = "/";
 #endif
 
-  auto FS = IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem>(
-      new llvm::vfs::InMemoryFileSystem);
+  auto FS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   // setCurrentworkingdirectory must finish without error.
   ASSERT_TRUE(!FS->setCurrentWorkingDirectory(CustomWorkingDir));
 
diff --git a/clang/unittests/Basic/SarifTest.cpp b/clang/unittests/Basic/SarifTest.cpp
index ad9f8ec..089b6cb 100644
--- a/clang/unittests/Basic/SarifTest.cpp
+++ b/clang/unittests/Basic/SarifTest.cpp
@@ -41,15 +41,14 @@ static std::string serializeSarifDocument(llvm::json::Object &&Doc) {
 class SarifDocumentWriterTest : public ::testing::Test {
 protected:
   SarifDocumentWriterTest()
-      : InMemoryFileSystem(new llvm::vfs::InMemoryFileSystem),
+      : InMemoryFileSystem(
+            llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>()),
         FileMgr(FileSystemOptions(), InMemoryFileSystem),
-        DiagID(new DiagnosticIDs()),
-        Diags(DiagID, DiagOpts, new IgnoringDiagConsumer()),
+        Diags(DiagnosticIDs::create(), DiagOpts, new IgnoringDiagConsumer()),
         SourceMgr(Diags, FileMgr) {}
 
   IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem;
   FileManager FileMgr;
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
   DiagnosticOptions DiagOpts;
   DiagnosticsEngine Diags;
   SourceManager SourceMgr;
diff --git a/clang/unittests/Basic/SourceManagerTest.cpp b/clang/unittests/Basic/SourceManagerTest.cpp
index cbe047b..04b23dd 100644
--- a/clang/unittests/Basic/SourceManagerTest.cpp
+++ b/clang/unittests/Basic/SourceManagerTest.cpp
@@ -40,8 +40,8 @@ namespace {
 class SourceManagerTest : public ::testing::Test {
 protected:
   SourceManagerTest()
-      : FileMgr(FileMgrOpts), DiagID(new DiagnosticIDs()),
-        Diags(DiagID, DiagOpts, new IgnoringDiagConsumer()),
+      : FileMgr(FileMgrOpts),
+        Diags(DiagnosticIDs::create(), DiagOpts, new IgnoringDiagConsumer()),
         SourceMgr(Diags, FileMgr), TargetOpts(new TargetOptions) {
     TargetOpts->Triple = "x86_64-apple-darwin11.1.0";
     Target = TargetInfo::CreateTargetInfo(Diags, *TargetOpts);
@@ -49,7 +49,6 @@ protected:
 
   FileSystemOptions FileMgrOpts;
   FileManager FileMgr;
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
   DiagnosticOptions DiagOpts;
   DiagnosticsEngine Diags;
   SourceManager SourceMgr;
diff --git a/clang/unittests/CodeGen/TestCompiler.h b/clang/unittests/CodeGen/TestCompiler.h
index a6fec7f..f6fada5 100644
--- a/clang/unittests/CodeGen/TestCompiler.h
+++ b/clang/unittests/CodeGen/TestCompiler.h
@@ -58,7 +58,7 @@ struct TestCompiler {
 
     CG.reset(CreateLLVMCodeGen(
         compiler.getDiagnostics(), "main-module",
-        &compiler.getVirtualFileSystem(), compiler.getHeaderSearchOpts(),
+        compiler.getVirtualFileSystemPtr(), compiler.getHeaderSearchOpts(),
         compiler.getPreprocessorOpts(), compiler.getCodeGenOpts(), Context));
   }
 
diff --git a/clang/unittests/Driver/DXCModeTest.cpp b/clang/unittests/Driver/DXCModeTest.cpp
index f684593..e7d8137 100644
--- a/clang/unittests/Driver/DXCModeTest.cpp
+++ b/clang/unittests/Driver/DXCModeTest.cpp
@@ -55,17 +55,15 @@ static void validateTargetProfile(
 }
 
 TEST(DxcModeTest, TargetProfileValidation) {
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
-
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
 
   InMemoryFileSystem->addFile("foo.hlsl", 0,
                               llvm::MemoryBuffer::getMemBuffer("\n"));
 
   auto *DiagConsumer = new SimpleDiagnosticConsumer;
   DiagnosticOptions DiagOpts;
-  DiagnosticsEngine Diags(DiagID, DiagOpts, DiagConsumer);
+  DiagnosticsEngine Diags(DiagnosticIDs::create(), DiagOpts, DiagConsumer);
 
   validateTargetProfile("-Tvs_6_0", "dxilv1.0--shadermodel6.0-vertex",
                         InMemoryFileSystem, Diags);
@@ -105,17 +103,15 @@ TEST(DxcModeTest, TargetProfileValidation) {
 }
 
 TEST(DxcModeTest, ValidatorVersionValidation) {
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
-
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
 
   InMemoryFileSystem->addFile("foo.hlsl", 0,
                               llvm::MemoryBuffer::getMemBuffer("\n"));
 
   auto *DiagConsumer = new SimpleDiagnosticConsumer;
   DiagnosticOptions DiagOpts;
-  DiagnosticsEngine Diags(DiagID, DiagOpts, DiagConsumer);
+  DiagnosticsEngine Diags(DiagnosticIDs::create(), DiagOpts, DiagConsumer);
   Driver TheDriver("/bin/clang", "", Diags, "", InMemoryFileSystem);
   std::unique_ptr<Compilation> C(TheDriver.BuildCompilation(
       {"clang", "--driver-mode=dxc", "-Tlib_6_7", "foo.hlsl"}));
@@ -210,8 +206,8 @@ TEST(DxcModeTest, ValidatorVersionValidation) {
 }
 
 TEST(DxcModeTest, DefaultEntry) {
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
 
   InMemoryFileSystem->addFile("foo.hlsl", 0,
                               llvm::MemoryBuffer::getMemBuffer("\n"));
diff --git a/clang/unittests/Driver/SanitizerArgsTest.cpp b/clang/unittests/Driver/SanitizerArgsTest.cpp
index b8bfc68..7847947 100644
--- a/clang/unittests/Driver/SanitizerArgsTest.cpp
+++ b/clang/unittests/Driver/SanitizerArgsTest.cpp
@@ -53,7 +53,7 @@ protected:
     assert(!DriverInstance && "Running twice is not allowed");
 
     DiagnosticOptions DiagOpts;
-    DiagnosticsEngine Diags(new DiagnosticIDs, DiagOpts,
+    DiagnosticsEngine Diags(DiagnosticIDs::create(), DiagOpts,
                             new TextDiagnosticPrinter(llvm::errs(), DiagOpts));
     DriverInstance.emplace(ClangBinary, "x86_64-unknown-linux-gnu", Diags,
                            "clang LLVM compiler", prepareFS(ExtraFiles));
@@ -78,8 +78,7 @@ protected:
 private:
   llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem>
   prepareFS(llvm::ArrayRef<std::string> ExtraFiles) {
-    llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS =
-        new llvm::vfs::InMemoryFileSystem;
+    auto FS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
     FS->addFile(ClangBinary, time_t(), llvm::MemoryBuffer::getMemBuffer(""));
     FS->addFile(InputFile, time_t(), llvm::MemoryBuffer::getMemBuffer(""));
     for (llvm::StringRef F : ExtraFiles)
diff --git a/clang/unittests/Driver/SimpleDiagnosticConsumer.h b/clang/unittests/Driver/SimpleDiagnosticConsumer.h
index c3772ba..7ab409b 100644
--- a/clang/unittests/Driver/SimpleDiagnosticConsumer.h
+++ b/clang/unittests/Driver/SimpleDiagnosticConsumer.h
@@ -42,13 +42,12 @@ struct SimpleDiagnosticConsumer : public clang::DiagnosticConsumer {
 // for testing situations where it will only ever be used for emitting
 // diagnostics, such as being passed to `MultilibSet::select`.
 inline clang::driver::Driver diagnostic_test_driver() {
-  llvm::IntrusiveRefCntPtr<clang::DiagnosticIDs> DiagID(
-      new clang::DiagnosticIDs());
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   auto *DiagConsumer = new SimpleDiagnosticConsumer;
   clang::DiagnosticOptions DiagOpts;
-  clang::DiagnosticsEngine Diags(DiagID, DiagOpts, DiagConsumer);
+  clang::DiagnosticsEngine Diags(clang::DiagnosticIDs::create(), DiagOpts,
+                                 DiagConsumer);
   return clang::driver::Driver("/bin/clang", "", Diags, "", InMemoryFileSystem);
 }
 
diff --git a/clang/unittests/Driver/ToolChainTest.cpp b/clang/unittests/Driver/ToolChainTest.cpp
index 670090a..4fa2729 100644
--- a/clang/unittests/Driver/ToolChainTest.cpp
+++ b/clang/unittests/Driver/ToolChainTest.cpp
@@ -40,10 +40,10 @@ namespace {
 TEST(ToolChainTest, VFSGCCInstallation) {
   DiagnosticOptions DiagOpts;
 
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
+  IntrusiveRefCntPtr<DiagnosticIDs> DiagID = DiagnosticIDs::create();
   struct TestDiagnosticConsumer : public DiagnosticConsumer {};
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
 
   const char *EmptyFiles[] = {
       "foo.cpp",
@@ -137,11 +137,11 @@ TEST(ToolChainTest, VFSGCCInstallation) {
 TEST(ToolChainTest, VFSGCCInstallationRelativeDir) {
   DiagnosticOptions DiagOpts;
 
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
+  IntrusiveRefCntPtr<DiagnosticIDs> DiagID = DiagnosticIDs::create();
   struct TestDiagnosticConsumer : public DiagnosticConsumer {};
   DiagnosticsEngine Diags(DiagID, DiagOpts, new TestDiagnosticConsumer);
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   Driver TheDriver("/home/test/bin/clang", "arm-linux-gnueabi", Diags,
                    "clang LLVM compiler", InMemoryFileSystem);
 
@@ -176,10 +176,10 @@ TEST(ToolChainTest, VFSGCCInstallationRelativeDir) {
 TEST(ToolChainTest, VFSSolarisMultiGCCInstallation) {
   DiagnosticOptions DiagOpts;
 
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
+  IntrusiveRefCntPtr<DiagnosticIDs> DiagID = DiagnosticIDs::create();
   struct TestDiagnosticConsumer : public DiagnosticConsumer {};
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
 
   const char *EmptyFiles[] = {
       // Sort entries so the latest version doesn't come first.
@@ -340,10 +340,10 @@ MATCHER_P(jobHasArgs, Substr, "") {
 TEST(ToolChainTest, VFSGnuLibcxxPathNoSysroot) {
   DiagnosticOptions DiagOpts;
 
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
+  IntrusiveRefCntPtr<DiagnosticIDs> DiagID = DiagnosticIDs::create();
   struct TestDiagnosticConsumer : public DiagnosticConsumer {};
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
 
   const char *EmptyFiles[] = {
       "foo.cpp",
@@ -371,11 +371,11 @@ TEST(ToolChainTest, VFSGnuLibcxxPathNoSysroot) {
 TEST(ToolChainTest, DefaultDriverMode) {
   DiagnosticOptions DiagOpts;
 
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
+  IntrusiveRefCntPtr<DiagnosticIDs> DiagID = DiagnosticIDs::create();
   struct TestDiagnosticConsumer : public DiagnosticConsumer {};
   DiagnosticsEngine Diags(DiagID, DiagOpts, new TestDiagnosticConsumer);
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
 
   Driver CCDriver("/home/test/bin/clang", "arm-linux-gnueabi", Diags,
                   "clang LLVM compiler", InMemoryFileSystem);
@@ -402,7 +402,7 @@ TEST(ToolChainTest, DefaultDriverMode) {
   EXPECT_TRUE(CLDriver.IsCLMode());
 }
 TEST(ToolChainTest, InvalidArgument) {
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
+  IntrusiveRefCntPtr<DiagnosticIDs> DiagID = DiagnosticIDs::create();
   struct TestDiagnosticConsumer : public DiagnosticConsumer {};
   DiagnosticOptions DiagOpts;
   DiagnosticsEngine Diags(DiagID, DiagOpts, new TestDiagnosticConsumer);
@@ -517,11 +517,11 @@ TEST(ToolChainTest, GetTargetAndMode) {
 TEST(ToolChainTest, CommandOutput) {
   DiagnosticOptions DiagOpts;
 
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
+  IntrusiveRefCntPtr<DiagnosticIDs> DiagID = DiagnosticIDs::create();
   struct TestDiagnosticConsumer : public DiagnosticConsumer {};
   DiagnosticsEngine Diags(DiagID, DiagOpts, new TestDiagnosticConsumer);
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
 
   Driver CCDriver("/home/test/bin/clang", "arm-linux-gnueabi", Diags,
                   "clang LLVM compiler", InMemoryFileSystem);
@@ -545,11 +545,11 @@ TEST(ToolChainTest, CommandOutput) {
 
 TEST(ToolChainTest, PostCallback) {
   DiagnosticOptions DiagOpts;
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
+  IntrusiveRefCntPtr<DiagnosticIDs> DiagID = DiagnosticIDs::create();
   struct TestDiagnosticConsumer : public DiagnosticConsumer {};
   DiagnosticsEngine Diags(DiagID, DiagOpts, new TestDiagnosticConsumer);
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
 
   // The executable path must not exist.
   Driver CCDriver("/home/test/bin/clang", "arm-linux-gnueabi", Diags,
@@ -598,11 +598,11 @@ TEST(ToolChainTest, UEFICallingConventionTest) {
 
 TEST(ToolChainTest, UEFIDefaultDebugFormatTest) {
   DiagnosticOptions DiagOpts;
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
+  IntrusiveRefCntPtr<DiagnosticIDs> DiagID = DiagnosticIDs::create();
   struct TestDiagnosticConsumer : public DiagnosticConsumer {};
   DiagnosticsEngine Diags(DiagID, DiagOpts, new TestDiagnosticConsumer);
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   Driver CCDriver("/home/test/bin/clang", "x86_64-unknown-uefi", Diags,
                   "clang LLVM compiler", InMemoryFileSystem);
   CCDriver.setCheckInputsExist(false);
@@ -640,11 +640,10 @@ struct SimpleDiagnosticConsumer : public DiagnosticConsumer {
 
 TEST(ToolChainTest, ConfigFileSearch) {
   DiagnosticOptions DiagOpts;
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
+  IntrusiveRefCntPtr<DiagnosticIDs> DiagID = DiagnosticIDs::create();
   struct TestDiagnosticConsumer : public DiagnosticConsumer {};
   DiagnosticsEngine Diags(DiagID, DiagOpts, new TestDiagnosticConsumer);
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS(
-      new llvm::vfs::InMemoryFileSystem);
+  auto FS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
 
 #ifdef _WIN32
   const char *TestRoot = "C:\\";
@@ -717,11 +716,11 @@ struct FileSystemWithError : public llvm::vfs::FileSystem {
 
 TEST(ToolChainTest, ConfigFileError) {
   DiagnosticOptions DiagOpts;
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
+  IntrusiveRefCntPtr<DiagnosticIDs> DiagID = DiagnosticIDs::create();
   std::unique_ptr<SimpleDiagnosticConsumer> DiagConsumer(
       new SimpleDiagnosticConsumer());
   DiagnosticsEngine Diags(DiagID, DiagOpts, DiagConsumer.get(), false);
-  IntrusiveRefCntPtr<llvm::vfs::FileSystem> FS(new FileSystemWithError);
+  auto FS = llvm::makeIntrusiveRefCnt<FileSystemWithError>();
 
   Driver TheDriver("/home/test/bin/clang", "arm-linux-gnueabi", Diags,
                    "clang LLVM compiler", FS);
@@ -738,12 +737,11 @@ TEST(ToolChainTest, ConfigFileError) {
 
 TEST(ToolChainTest, BadConfigFile) {
   DiagnosticOptions DiagOpts;
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
+  IntrusiveRefCntPtr<DiagnosticIDs> DiagID = DiagnosticIDs::create();
   std::unique_ptr<SimpleDiagnosticConsumer> DiagConsumer(
       new SimpleDiagnosticConsumer());
   DiagnosticsEngine Diags(DiagID, DiagOpts, DiagConsumer.get(), false);
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS(
-      new llvm::vfs::InMemoryFileSystem);
+  auto FS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
 
 #ifdef _WIN32
   const char *TestRoot = "C:\\";
@@ -812,12 +810,11 @@ TEST(ToolChainTest, BadConfigFile) {
 
 TEST(ToolChainTest, ConfigInexistentInclude) {
   DiagnosticOptions DiagOpts;
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
+  IntrusiveRefCntPtr<DiagnosticIDs> DiagID = DiagnosticIDs::create();
   std::unique_ptr<SimpleDiagnosticConsumer> DiagConsumer(
       new SimpleDiagnosticConsumer());
   DiagnosticsEngine Diags(DiagID, DiagOpts, DiagConsumer.get(), false);
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS(
-      new llvm::vfs::InMemoryFileSystem);
+  auto FS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
 
 #ifdef _WIN32
   const char *TestRoot = "C:\\";
@@ -853,12 +850,11 @@ TEST(ToolChainTest, ConfigInexistentInclude) {
 
 TEST(ToolChainTest, ConfigRecursiveInclude) {
   DiagnosticOptions DiagOpts;
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
+  IntrusiveRefCntPtr<DiagnosticIDs> DiagID = DiagnosticIDs::create();
   std::unique_ptr<SimpleDiagnosticConsumer> DiagConsumer(
       new SimpleDiagnosticConsumer());
   DiagnosticsEngine Diags(DiagID, DiagOpts, DiagConsumer.get(), false);
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS(
-      new llvm::vfs::InMemoryFileSystem);
+  auto FS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
 
 #ifdef _WIN32
   const char *TestRoot = "C:\\";
@@ -899,11 +895,10 @@ TEST(ToolChainTest, ConfigRecursiveInclude) {
 
 TEST(ToolChainTest, NestedConfigFile) {
   DiagnosticOptions DiagOpts;
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
+  IntrusiveRefCntPtr<DiagnosticIDs> DiagID = DiagnosticIDs::create();
   struct TestDiagnosticConsumer : public DiagnosticConsumer {};
   DiagnosticsEngine Diags(DiagID, DiagOpts, new TestDiagnosticConsumer);
-  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS(
-      new llvm::vfs::InMemoryFileSystem);
+  auto FS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
 
 #ifdef _WIN32
   const char *TestRoot = "C:\\";
diff --git a/clang/unittests/Format/IntegerLiteralSeparatorTest.cpp b/clang/unittests/Format/IntegerLiteralSeparatorTest.cpp
index 8681c3d..53b6dd8 100644
--- a/clang/unittests/Format/IntegerLiteralSeparatorTest.cpp
+++ b/clang/unittests/Format/IntegerLiteralSeparatorTest.cpp
@@ -83,6 +83,9 @@ TEST_F(IntegerLiteralSeparatorTest, SingleQuoteAsSeparator) {
                "d = 5678_km;\n"
                "h = 0xDEF_u16;",
                Style);
+
+  Style.Standard = FormatStyle::LS_Cpp11;
+  verifyFormat("ld = 1234L;", Style);
 }
 
 TEST_F(IntegerLiteralSeparatorTest, UnderscoreAsSeparator) {
diff --git a/clang/unittests/Frontend/ASTUnitTest.cpp b/clang/unittests/Frontend/ASTUnitTest.cpp
index afa64b5..7148ca0 100644
--- a/clang/unittests/Frontend/ASTUnitTest.cpp
+++ b/clang/unittests/Frontend/ASTUnitTest.cpp
@@ -119,8 +119,7 @@ TEST_F(ASTUnitTest, GetBufferForFileMemoryMapping) {
 }
 
 TEST_F(ASTUnitTest, ModuleTextualHeader) {
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFs =
-      new llvm::vfs::InMemoryFileSystem();
+  auto InMemoryFs = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   InMemoryFs->addFile("test.cpp", 0, llvm::MemoryBuffer::getMemBuffer(R"cpp(
       #include "Textual.h"
       void foo() {}
diff --git a/clang/unittests/Frontend/CodeGenActionTest.cpp b/clang/unittests/Frontend/CodeGenActionTest.cpp
index 90818b7..b2792c4 100644
--- a/clang/unittests/Frontend/CodeGenActionTest.cpp
+++ b/clang/unittests/Frontend/CodeGenActionTest.cpp
@@ -76,40 +76,4 @@ TEST(CodeGenTest, CodeGenFromIRMemBuffer) {
   bool Success = Compiler.ExecuteAction(Action);
   EXPECT_TRUE(Success);
 }
-
-TEST(CodeGenTest, DebugInfoCWDCodeGen) {
-  // Check that debug info is accessing the current working directory from the
-  // VFS instead of calling \p llvm::sys::fs::current_path() directly.
-
-  auto Sept = llvm::sys::path::get_separator();
-  auto VFS = std::make_unique<llvm::vfs::InMemoryFileSystem>();
-  VFS->setCurrentWorkingDirectory(
-      std::string(llvm::formatv("{0}in-memory-fs-cwd", Sept)));
-  std::string TestPath =
-      std::string(llvm::formatv("{0}in-memory-fs-cwd{0}test.cpp", Sept));
-  VFS->addFile(TestPath, 0, llvm::MemoryBuffer::getMemBuffer("int x;\n"));
-
-  auto Invocation = std::make_shared<CompilerInvocation>();
-  Invocation->getFrontendOpts().Inputs.push_back(
-      FrontendInputFile("test.cpp", Language::CXX));
-  Invocation->getFrontendOpts().ProgramAction = EmitLLVM;
-  Invocation->getTargetOpts().Triple = "x86_64-unknown-linux-gnu";
-  Invocation->getCodeGenOpts().setDebugInfo(codegenoptions::FullDebugInfo);
-  CompilerInstance Compiler(std::move(Invocation));
-
-  SmallString<256> IRBuffer;
-  Compiler.setOutputStream(std::make_unique<raw_svector_ostream>(IRBuffer));
-  Compiler.createDiagnostics(*VFS);
-  Compiler.createFileManager(std::move(VFS));
-
-  EmitLLVMAction Action;
-  bool Success = Compiler.ExecuteAction(Action);
-  EXPECT_TRUE(Success);
-
-  SmallString<128> RealCWD;
-  llvm::sys::fs::current_path(RealCWD);
-  EXPECT_TRUE(!RealCWD.empty());
-  EXPECT_FALSE(IRBuffer.str().contains(RealCWD));
-  EXPECT_TRUE(IRBuffer.str().contains("in-memory-fs-cwd"));
-}
 }
diff --git a/clang/unittests/Frontend/CompilerInstanceTest.cpp b/clang/unittests/Frontend/CompilerInstanceTest.cpp
index 459a386..7c1b653 100644
--- a/clang/unittests/Frontend/CompilerInstanceTest.cpp
+++ b/clang/unittests/Frontend/CompilerInstanceTest.cpp
@@ -71,7 +71,7 @@ TEST(CompilerInstance, DefaultVFSOverlayFromInvocation) {
   // Create a minimal CompilerInstance which should use the VFS we specified
   // in the CompilerInvocation (as we don't explicitly set our own).
   CompilerInstance Instance(std::move(CInvok));
-  Instance.setDiagnostics(Diags.get());
+  Instance.setDiagnostics(Diags);
   Instance.createFileManager();
 
   // Check if the virtual file exists which means that our VFS is used by the
@@ -135,7 +135,7 @@ TEST(CompilerInstance, MultipleInputsCleansFileIDs) {
   ASSERT_TRUE(CInvok) << "could not create compiler invocation";
 
   CompilerInstance Instance(std::move(CInvok));
-  Instance.setDiagnostics(Diags.get());
+  Instance.setDiagnostics(Diags);
   Instance.createFileManager(VFS);
 
   // Run once for `a.cc` and then for `a.h`. This makes sure we get the same
diff --git a/clang/unittests/Frontend/PCHPreambleTest.cpp b/clang/unittests/Frontend/PCHPreambleTest.cpp
index faad408..d27f793 100644
--- a/clang/unittests/Frontend/PCHPreambleTest.cpp
+++ b/clang/unittests/Frontend/PCHPreambleTest.cpp
@@ -62,7 +62,7 @@ public:
   void TearDown() override {}
 
   void ResetVFS() {
-    VFS = new ReadCountingInMemoryFileSystem();
+    VFS = llvm::makeIntrusiveRefCnt<ReadCountingInMemoryFileSystem>();
     // We need the working directory to be set to something absolute,
     // otherwise it ends up being inadvertently set to the current
     // working directory in the real file system due to a series of
diff --git a/clang/unittests/Frontend/ReparseWorkingDirTest.cpp b/clang/unittests/Frontend/ReparseWorkingDirTest.cpp
index 1b8051f..38ef468 100644
--- a/clang/unittests/Frontend/ReparseWorkingDirTest.cpp
+++ b/clang/unittests/Frontend/ReparseWorkingDirTest.cpp
@@ -28,7 +28,9 @@ class ReparseWorkingDirTest : public ::testing::Test {
   std::shared_ptr<PCHContainerOperations> PCHContainerOpts;
 
 public:
-  void SetUp() override { VFS = new vfs::InMemoryFileSystem(); }
+  void SetUp() override {
+    VFS = llvm::makeIntrusiveRefCnt<vfs::InMemoryFileSystem>();
+  }
   void TearDown() override {}
 
   void setWorkingDirectory(StringRef Path) {
diff --git a/clang/unittests/Frontend/SearchPathTest.cpp b/clang/unittests/Frontend/SearchPathTest.cpp
index c74a5c7..a8c16fe 100644
--- a/clang/unittests/Frontend/SearchPathTest.cpp
+++ b/clang/unittests/Frontend/SearchPathTest.cpp
@@ -40,8 +40,8 @@ namespace {
 class SearchPathTest : public ::testing::Test {
 protected:
   SearchPathTest()
-      : Diags(new DiagnosticIDs(), DiagOpts, new IgnoringDiagConsumer()),
-        VFS(new llvm::vfs::InMemoryFileSystem),
+      : Diags(DiagnosticIDs::create(), DiagOpts, new IgnoringDiagConsumer()),
+        VFS(llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>()),
         FileMgr(FileSystemOptions(), VFS), SourceMgr(Diags, FileMgr),
         Invocation(std::make_unique<CompilerInvocation>()) {}
 
diff --git a/clang/unittests/Frontend/TextDiagnosticTest.cpp b/clang/unittests/Frontend/TextDiagnosticTest.cpp
index 8fd8187..622dbc5 100644
--- a/clang/unittests/Frontend/TextDiagnosticTest.cpp
+++ b/clang/unittests/Frontend/TextDiagnosticTest.cpp
@@ -36,9 +36,8 @@ TEST(TextDiagnostic, ShowLine) {
   // Create dummy FileManager and SourceManager.
   FileSystemOptions FSOpts;
   FileManager FileMgr(FSOpts);
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs);
   DiagnosticOptions DiagEngineOpts;
-  DiagnosticsEngine DiagEngine(DiagID, DiagEngineOpts,
+  DiagnosticsEngine DiagEngine(DiagnosticIDs::create(), DiagEngineOpts,
                                new IgnoringDiagConsumer());
   SourceManager SrcMgr(DiagEngine, FileMgr);
 
diff --git a/clang/unittests/Frontend/UtilsTest.cpp b/clang/unittests/Frontend/UtilsTest.cpp
index cf385a5..fc411e4 100644
--- a/clang/unittests/Frontend/UtilsTest.cpp
+++ b/clang/unittests/Frontend/UtilsTest.cpp
@@ -29,7 +29,7 @@ TEST(BuildCompilerInvocationTest, RecoverMultipleJobs) {
   clang::DiagnosticOptions DiagOpts;
   CreateInvocationOptions Opts;
   Opts.RecoverOnError = true;
-  Opts.VFS = new llvm::vfs::InMemoryFileSystem();
+  Opts.VFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   Opts.Diags = clang::CompilerInstance::createDiagnostics(*Opts.VFS, DiagOpts,
                                                           &D, false);
   std::unique_ptr<CompilerInvocation> CI = createInvocation(Args, Opts);
diff --git a/clang/unittests/Lex/HeaderSearchTest.cpp b/clang/unittests/Lex/HeaderSearchTest.cpp
index 9903c12..0213bfe 100644
--- a/clang/unittests/Lex/HeaderSearchTest.cpp
+++ b/clang/unittests/Lex/HeaderSearchTest.cpp
@@ -28,9 +28,9 @@ namespace {
 class HeaderSearchTest : public ::testing::Test {
 protected:
   HeaderSearchTest()
-      : VFS(new llvm::vfs::InMemoryFileSystem), FileMgr(FileMgrOpts, VFS),
-        DiagID(new DiagnosticIDs()),
-        Diags(DiagID, DiagOpts, new IgnoringDiagConsumer()),
+      : VFS(llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>()),
+        FileMgr(FileMgrOpts, VFS),
+        Diags(DiagnosticIDs::create(), DiagOpts, new IgnoringDiagConsumer()),
         SourceMgr(Diags, FileMgr), TargetOpts(new TargetOptions),
         Search(HSOpts, SourceMgr, Diags, LangOpts, Target.get()) {
     TargetOpts->Triple = "x86_64-apple-darwin11.1.0";
@@ -80,7 +80,6 @@ protected:
   IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> VFS;
   FileSystemOptions FileMgrOpts;
   FileManager FileMgr;
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
   DiagnosticOptions DiagOpts;
   DiagnosticsEngine Diags;
   SourceManager SourceMgr;
diff --git a/clang/unittests/Lex/LexerTest.cpp b/clang/unittests/Lex/LexerTest.cpp
index 86df872..56d73ce 100644
--- a/clang/unittests/Lex/LexerTest.cpp
+++ b/clang/unittests/Lex/LexerTest.cpp
@@ -41,8 +41,8 @@ using testing::ElementsAre;
 class LexerTest : public ::testing::Test {
 protected:
   LexerTest()
-      : FileMgr(FileMgrOpts), DiagID(new DiagnosticIDs()),
-        Diags(DiagID, DiagOpts, new IgnoringDiagConsumer()),
+      : FileMgr(FileMgrOpts),
+        Diags(DiagnosticIDs::create(), DiagOpts, new IgnoringDiagConsumer()),
         SourceMgr(Diags, FileMgr), TargetOpts(new TargetOptions) {
     TargetOpts->Triple = "x86_64-apple-darwin11.1.0";
     Target = TargetInfo::CreateTargetInfo(Diags, *TargetOpts);
@@ -102,7 +102,6 @@ protected:
 
   FileSystemOptions FileMgrOpts;
   FileManager FileMgr;
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
   DiagnosticOptions DiagOpts;
   DiagnosticsEngine Diags;
   SourceManager SourceMgr;
diff --git a/clang/unittests/Lex/ModuleDeclStateTest.cpp b/clang/unittests/Lex/ModuleDeclStateTest.cpp
index 6ecba4d..adc6cf1 100644
--- a/clang/unittests/Lex/ModuleDeclStateTest.cpp
+++ b/clang/unittests/Lex/ModuleDeclStateTest.cpp
@@ -54,8 +54,8 @@ public:
 class ModuleDeclStateTest : public ::testing::Test {
 protected:
   ModuleDeclStateTest()
-      : FileMgr(FileMgrOpts), DiagID(new DiagnosticIDs()),
-        Diags(DiagID, DiagOpts, new IgnoringDiagConsumer()),
+      : FileMgr(FileMgrOpts),
+        Diags(DiagnosticIDs::create(), DiagOpts, new IgnoringDiagConsumer()),
         SourceMgr(Diags, FileMgr), TargetOpts(new TargetOptions) {
     TargetOpts->Triple = "x86_64-unknown-linux-gnu";
     Target = TargetInfo::CreateTargetInfo(Diags, *TargetOpts);
@@ -93,7 +93,6 @@ protected:
 
   FileSystemOptions FileMgrOpts;
   FileManager FileMgr;
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
   DiagnosticOptions DiagOpts;
   DiagnosticsEngine Diags;
   SourceManager SourceMgr;
diff --git a/clang/unittests/Lex/PPCallbacksTest.cpp b/clang/unittests/Lex/PPCallbacksTest.cpp
index af86c18..990689c 100644
--- a/clang/unittests/Lex/PPCallbacksTest.cpp
+++ b/clang/unittests/Lex/PPCallbacksTest.cpp
@@ -133,9 +133,10 @@ public:
 class PPCallbacksTest : public ::testing::Test {
 protected:
   PPCallbacksTest()
-      : InMemoryFileSystem(new llvm::vfs::InMemoryFileSystem),
+      : InMemoryFileSystem(
+            llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>()),
         FileMgr(FileSystemOptions(), InMemoryFileSystem),
-        DiagID(new DiagnosticIDs()),
+        DiagID(DiagnosticIDs::create()),
         Diags(DiagID, DiagOpts, new IgnoringDiagConsumer()),
         SourceMgr(Diags, FileMgr), TargetOpts(new TargetOptions()) {
     TargetOpts->Triple = "x86_64-apple-darwin11.1.0";
diff --git a/clang/unittests/Lex/PPConditionalDirectiveRecordTest.cpp b/clang/unittests/Lex/PPConditionalDirectiveRecordTest.cpp
index 54c1d02..4a88bd4 100644
--- a/clang/unittests/Lex/PPConditionalDirectiveRecordTest.cpp
+++ b/clang/unittests/Lex/PPConditionalDirectiveRecordTest.cpp
@@ -29,8 +29,8 @@ namespace {
 class PPConditionalDirectiveRecordTest : public ::testing::Test {
 protected:
   PPConditionalDirectiveRecordTest()
-      : FileMgr(FileMgrOpts), DiagID(new DiagnosticIDs()),
-        Diags(DiagID, DiagOpts, new IgnoringDiagConsumer()),
+      : FileMgr(FileMgrOpts),
+        Diags(DiagnosticIDs::create(), DiagOpts, new IgnoringDiagConsumer()),
         SourceMgr(Diags, FileMgr), TargetOpts(new TargetOptions) {
     TargetOpts->Triple = "x86_64-apple-darwin11.1.0";
     Target = TargetInfo::CreateTargetInfo(Diags, *TargetOpts);
@@ -38,7 +38,6 @@ protected:
 
   FileSystemOptions FileMgrOpts;
   FileManager FileMgr;
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
   DiagnosticOptions DiagOpts;
   DiagnosticsEngine Diags;
   SourceManager SourceMgr;
diff --git a/clang/unittests/Lex/PPDependencyDirectivesTest.cpp b/clang/unittests/Lex/PPDependencyDirectivesTest.cpp
index 061cb13..15cc283 100644
--- a/clang/unittests/Lex/PPDependencyDirectivesTest.cpp
+++ b/clang/unittests/Lex/PPDependencyDirectivesTest.cpp
@@ -31,8 +31,8 @@ namespace {
 class PPDependencyDirectivesTest : public ::testing::Test {
 protected:
   PPDependencyDirectivesTest()
-      : FileMgr(FileMgrOpts), DiagID(new DiagnosticIDs()),
-        Diags(DiagID, DiagOpts, new IgnoringDiagConsumer()),
+      : FileMgr(FileMgrOpts),
+        Diags(DiagnosticIDs::create(), DiagOpts, new IgnoringDiagConsumer()),
         SourceMgr(Diags, FileMgr), TargetOpts(new TargetOptions) {
     TargetOpts->Triple = "x86_64-apple-macos12";
     Target = TargetInfo::CreateTargetInfo(Diags, *TargetOpts);
@@ -40,7 +40,6 @@ protected:
 
   FileSystemOptions FileMgrOpts;
   FileManager FileMgr;
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
   DiagnosticOptions DiagOpts;
   DiagnosticsEngine Diags;
   SourceManager SourceMgr;
@@ -75,7 +74,7 @@ TEST_F(PPDependencyDirectivesTest, MacroGuard) {
   // "head2.h" and "head3.h" have tokens following the macro check, they should
   // be included multiple times.
 
-  auto VFS = new llvm::vfs::InMemoryFileSystem();
+  auto VFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   VFS->addFile(
       "head1.h", 0,
       llvm::MemoryBuffer::getMemBuffer("#ifndef H1_H\n#define H1_H\n#endif\n"));
diff --git a/clang/unittests/Lex/PPMemoryAllocationsTest.cpp b/clang/unittests/Lex/PPMemoryAllocationsTest.cpp
index 4d83003..f873774 100644
--- a/clang/unittests/Lex/PPMemoryAllocationsTest.cpp
+++ b/clang/unittests/Lex/PPMemoryAllocationsTest.cpp
@@ -27,8 +27,8 @@ namespace {
 class PPMemoryAllocationsTest : public ::testing::Test {
 protected:
   PPMemoryAllocationsTest()
-      : FileMgr(FileMgrOpts), DiagID(new DiagnosticIDs()),
-        Diags(DiagID, DiagOpts, new IgnoringDiagConsumer()),
+      : FileMgr(FileMgrOpts),
+        Diags(DiagnosticIDs::create(), DiagOpts, new IgnoringDiagConsumer()),
         SourceMgr(Diags, FileMgr), TargetOpts(new TargetOptions) {
     TargetOpts->Triple = "x86_64-apple-darwin11.1.0";
     Target = TargetInfo::CreateTargetInfo(Diags, *TargetOpts);
@@ -36,7 +36,6 @@ protected:
 
   FileSystemOptions FileMgrOpts;
   FileManager FileMgr;
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
   DiagnosticOptions DiagOpts;
   DiagnosticsEngine Diags;
   SourceManager SourceMgr;
diff --git a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp b/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp
index 4d08f8d..44f6b04 100644
--- a/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp
+++ b/clang/unittests/Parse/ParseHLSLRootSignatureTest.cpp
@@ -71,8 +71,8 @@ public:
 class ParseHLSLRootSignatureTest : public ::testing::Test {
 protected:
   ParseHLSLRootSignatureTest()
-      : FileMgr(FileMgrOpts), DiagID(new DiagnosticIDs()),
-        Consumer(new ExpectedDiagConsumer()), Diags(DiagID, DiagOpts, Consumer),
+      : FileMgr(FileMgrOpts), Consumer(new ExpectedDiagConsumer()),
+        Diags(DiagnosticIDs::create(), DiagOpts, Consumer),
         SourceMgr(Diags, FileMgr), TargetOpts(new TargetOptions) {
     // This is an arbitrarily chosen target triple to create the target info.
     TargetOpts->Triple = "dxil";
@@ -114,7 +114,6 @@ protected:
 
   FileSystemOptions FileMgrOpts;
   FileManager FileMgr;
-  IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
   DiagnosticOptions DiagOpts;
   ExpectedDiagConsumer *Consumer;
   DiagnosticsEngine Diags;
diff --git a/clang/unittests/Sema/SemaNoloadLookupTest.cpp b/clang/unittests/Sema/SemaNoloadLookupTest.cpp
index 5a04f42..e565372 100644
--- a/clang/unittests/Sema/SemaNoloadLookupTest.cpp
+++ b/clang/unittests/Sema/SemaNoloadLookupTest.cpp
@@ -82,7 +82,7 @@ public:
     EXPECT_TRUE(Invocation);
 
     CompilerInstance Instance(std::move(Invocation));
-    Instance.setDiagnostics(Diags.get());
+    Instance.setDiagnostics(Diags);
     Instance.getFrontendOpts().OutputFile = CacheBMIPath;
     GenerateReducedModuleInterfaceAction Action;
     EXPECT_TRUE(Instance.ExecuteAction(Action));
diff --git a/clang/unittests/Serialization/ForceCheckFileInputTest.cpp b/clang/unittests/Serialization/ForceCheckFileInputTest.cpp
index 970eeef..92ff76b 100644
--- a/clang/unittests/Serialization/ForceCheckFileInputTest.cpp
+++ b/clang/unittests/Serialization/ForceCheckFileInputTest.cpp
@@ -87,7 +87,7 @@ export int aa = 43;
     Buf->release();
 
     CompilerInstance Instance(std::move(Invocation));
-    Instance.setDiagnostics(Diags.get());
+    Instance.setDiagnostics(Diags);
 
     Instance.getFrontendOpts().OutputFile = BMIPath;
 
@@ -122,7 +122,7 @@ export int aa = 43;
 
     CompilerInstance Clang(std::move(Invocation));
 
-    Clang.setDiagnostics(Diags.get());
+    Clang.setDiagnostics(Diags);
     FileManager *FM = Clang.createFileManager(CIOpts.VFS);
     Clang.createSourceManager(*FM);
 
diff --git a/clang/unittests/Serialization/LoadSpecLazilyTest.cpp b/clang/unittests/Serialization/LoadSpecLazilyTest.cpp
index 6315474..d7b5549 100644
--- a/clang/unittests/Serialization/LoadSpecLazilyTest.cpp
+++ b/clang/unittests/Serialization/LoadSpecLazilyTest.cpp
@@ -80,7 +80,7 @@ public:
     EXPECT_TRUE(Invocation);
 
     CompilerInstance Instance(std::move(Invocation));
-    Instance.setDiagnostics(Diags.get());
+    Instance.setDiagnostics(Diags);
     Instance.getFrontendOpts().OutputFile = CacheBMIPath;
     // Avoid memory leaks.
     Instance.getFrontendOpts().DisableFree = false;
diff --git a/clang/unittests/Serialization/ModuleCacheTest.cpp b/clang/unittests/Serialization/ModuleCacheTest.cpp
index de6e13a..1f64401 100644
--- a/clang/unittests/Serialization/ModuleCacheTest.cpp
+++ b/clang/unittests/Serialization/ModuleCacheTest.cpp
@@ -121,7 +121,7 @@ TEST_F(ModuleCacheTest, CachedModuleNewPath) {
       createInvocationAndEnableFree(Args, CIOpts);
   ASSERT_TRUE(Invocation);
   CompilerInstance Instance(std::move(Invocation));
-  Instance.setDiagnostics(Diags.get());
+  Instance.setDiagnostics(Diags);
   SyntaxOnlyAction Action;
   ASSERT_TRUE(Instance.ExecuteAction(Action));
   ASSERT_FALSE(Diags->hasErrorOccurred());
@@ -145,7 +145,7 @@ TEST_F(ModuleCacheTest, CachedModuleNewPath) {
   CompilerInstance Instance2(std::move(Invocation2),
                              Instance.getPCHContainerOperations(),
                              &Instance.getModuleCache());
-  Instance2.setDiagnostics(Diags.get());
+  Instance2.setDiagnostics(Diags);
   SyntaxOnlyAction Action2;
   ASSERT_FALSE(Instance2.ExecuteAction(Action2));
   ASSERT_TRUE(Diags->hasErrorOccurred());
@@ -171,7 +171,7 @@ TEST_F(ModuleCacheTest, CachedModuleNewPathAllowErrors) {
       createInvocationAndEnableFree(Args, CIOpts);
   ASSERT_TRUE(Invocation);
   CompilerInstance Instance(std::move(Invocation));
-  Instance.setDiagnostics(Diags.get());
+  Instance.setDiagnostics(Diags);
   SyntaxOnlyAction Action;
   ASSERT_TRUE(Instance.ExecuteAction(Action));
   ASSERT_FALSE(Diags->hasErrorOccurred());
@@ -189,7 +189,7 @@ TEST_F(ModuleCacheTest, CachedModuleNewPathAllowErrors) {
   CompilerInstance Instance2(std::move(Invocation2),
                              Instance.getPCHContainerOperations(),
                              &Instance.getModuleCache());
-  Instance2.setDiagnostics(Diags.get());
+  Instance2.setDiagnostics(Diags);
   SyntaxOnlyAction Action2;
   ASSERT_FALSE(Instance2.ExecuteAction(Action2));
   ASSERT_TRUE(Diags->hasErrorOccurred());
diff --git a/clang/unittests/Serialization/NoCommentsTest.cpp b/clang/unittests/Serialization/NoCommentsTest.cpp
index 05efeef..ed96c7c 100644
--- a/clang/unittests/Serialization/NoCommentsTest.cpp
+++ b/clang/unittests/Serialization/NoCommentsTest.cpp
@@ -99,7 +99,7 @@ void foo() {}
   ASSERT_TRUE(Invocation);
 
   CompilerInstance Instance(std::move(Invocation));
-  Instance.setDiagnostics(Diags.get());
+  Instance.setDiagnostics(Diags);
   Instance.getFrontendOpts().OutputFile = CacheBMIPath;
   GenerateReducedModuleInterfaceAction Action;
   ASSERT_TRUE(Instance.ExecuteAction(Action));
diff --git a/clang/unittests/Serialization/PreambleInNamedModulesTest.cpp b/clang/unittests/Serialization/PreambleInNamedModulesTest.cpp
index c43520f..f9d7736 100644
--- a/clang/unittests/Serialization/PreambleInNamedModulesTest.cpp
+++ b/clang/unittests/Serialization/PreambleInNamedModulesTest.cpp
@@ -101,7 +101,7 @@ export using ::E;
 
   PreambleCallbacks Callbacks;
   llvm::ErrorOr<PrecompiledPreamble> BuiltPreamble = PrecompiledPreamble::Build(
-      *Invocation, Buffer.get(), Bounds, *Diags, VFS,
+      *Invocation, Buffer.get(), Bounds, Diags, VFS,
       std::make_shared<PCHContainerOperations>(),
       /*StoreInMemory=*/false, /*StoragePath=*/TestDir, Callbacks);
 
@@ -112,7 +112,7 @@ export using ::E;
   BuiltPreamble->OverridePreamble(*Invocation, VFS, Buffer.get());
 
   auto Clang = std::make_unique<CompilerInstance>(std::move(Invocation));
-  Clang->setDiagnostics(Diags.get());
+  Clang->setDiagnostics(Diags);
 
   if (auto VFSWithRemapping = createVFSFromCompilerInvocation(
           Clang->getInvocation(), Clang->getDiagnostics(), VFS))
diff --git a/clang/unittests/Serialization/VarDeclConstantInitTest.cpp b/clang/unittests/Serialization/VarDeclConstantInitTest.cpp
index 5b2988e..743f851 100644
--- a/clang/unittests/Serialization/VarDeclConstantInitTest.cpp
+++ b/clang/unittests/Serialization/VarDeclConstantInitTest.cpp
@@ -106,7 +106,7 @@ export namespace Fibonacci
   Invocation->getFrontendOpts().DisableFree = false;
 
   CompilerInstance Instance(std::move(Invocation));
-  Instance.setDiagnostics(Diags.get());
+  Instance.setDiagnostics(Diags);
 
   std::string CacheBMIPath = llvm::Twine(TestDir + "/Cached.pcm").str();
   Instance.getFrontendOpts().OutputFile = CacheBMIPath;
diff --git a/clang/unittests/Support/TimeProfilerTest.cpp b/clang/unittests/Support/TimeProfilerTest.cpp
index 85d36b5..f70149d 100644
--- a/clang/unittests/Support/TimeProfilerTest.cpp
+++ b/clang/unittests/Support/TimeProfilerTest.cpp
@@ -46,8 +46,7 @@ std::string teardownProfiler() {
 bool compileFromString(StringRef Code, StringRef Standard, StringRef File,
                        llvm::StringMap<std::string> Headers = {}) {
 
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS(
-      new llvm::vfs::InMemoryFileSystem());
+  auto FS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   FS->addFile(File, 0, MemoryBuffer::getMemBuffer(Code));
   for (const auto &Header : Headers) {
     FS->addFile(Header.getKey(), 0,
diff --git a/clang/unittests/Tooling/CompilationDatabaseTest.cpp b/clang/unittests/Tooling/CompilationDatabaseTest.cpp
index c1febaf..2d68e09 100644
--- a/clang/unittests/Tooling/CompilationDatabaseTest.cpp
+++ b/clang/unittests/Tooling/CompilationDatabaseTest.cpp
@@ -971,7 +971,8 @@ TEST_F(TargetAndModeTest, TargetAndMode) {
 
 class ExpandResponseFilesTest : public MemDBTest {
 public:
-  ExpandResponseFilesTest() : FS(new llvm::vfs::InMemoryFileSystem) {}
+  ExpandResponseFilesTest()
+      : FS(llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>()) {}
 
 protected:
   void addFile(StringRef File, StringRef Content) {
diff --git a/clang/unittests/Tooling/DependencyScanning/DependencyScannerTest.cpp b/clang/unittests/Tooling/DependencyScanning/DependencyScannerTest.cpp
index 1cdb539..b16dd8e 100644
--- a/clang/unittests/Tooling/DependencyScanning/DependencyScannerTest.cpp
+++ b/clang/unittests/Tooling/DependencyScanning/DependencyScannerTest.cpp
@@ -85,7 +85,7 @@ TEST(DependencyScanner, ScanDepsReuseFilemanager) {
   StringRef CWD = "/root";
   FixedCompilationDatabase CDB(CWD, Compilation);
 
-  auto VFS = new llvm::vfs::InMemoryFileSystem();
+  auto VFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   VFS->setCurrentWorkingDirectory(CWD);
   auto Sept = llvm::sys::path::get_separator();
   std::string HeaderPath =
@@ -134,7 +134,7 @@ TEST(DependencyScanner, ScanDepsReuseFilemanagerSkippedFile) {
   StringRef CWD = "/root";
   FixedCompilationDatabase CDB(CWD, Compilation);
 
-  auto VFS = new llvm::vfs::InMemoryFileSystem();
+  auto VFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   VFS->setCurrentWorkingDirectory(CWD);
   auto Sept = llvm::sys::path::get_separator();
   std::string HeaderPath =
@@ -176,7 +176,7 @@ TEST(DependencyScanner, ScanDepsReuseFilemanagerHasInclude) {
   StringRef CWD = "/root";
   FixedCompilationDatabase CDB(CWD, Compilation);
 
-  auto VFS = new llvm::vfs::InMemoryFileSystem();
+  auto VFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   VFS->setCurrentWorkingDirectory(CWD);
   auto Sept = llvm::sys::path::get_separator();
   std::string HeaderPath =
@@ -218,7 +218,7 @@ TEST(DependencyScanner, ScanDepsWithFS) {
                                           "test.cpp.o"};
   StringRef CWD = "/root";
 
-  auto VFS = new llvm::vfs::InMemoryFileSystem();
+  auto VFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   VFS->setCurrentWorkingDirectory(CWD);
   auto Sept = llvm::sys::path::get_separator();
   std::string HeaderPath =
@@ -256,7 +256,7 @@ TEST(DependencyScanner, ScanDepsWithModuleLookup) {
   };
   StringRef CWD = "/root";
 
-  auto VFS = new llvm::vfs::InMemoryFileSystem();
+  auto VFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   VFS->setCurrentWorkingDirectory(CWD);
   auto Sept = llvm::sys::path::get_separator();
   std::string OtherPath =
@@ -306,7 +306,7 @@ TEST(DependencyScanner, ScanDepsWithModuleLookup) {
 TEST(DependencyScanner, ScanDepsWithDiagConsumer) {
   StringRef CWD = "/root";
 
-  auto VFS = new llvm::vfs::InMemoryFileSystem();
+  auto VFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   VFS->setCurrentWorkingDirectory(CWD);
   auto Sept = llvm::sys::path::get_separator();
   std::string HeaderPath =
diff --git a/clang/unittests/Tooling/RefactoringTest.cpp b/clang/unittests/Tooling/RefactoringTest.cpp
index 35d1143..aff7523e 100644
--- a/clang/unittests/Tooling/RefactoringTest.cpp
+++ b/clang/unittests/Tooling/RefactoringTest.cpp
@@ -1035,8 +1035,7 @@ static constexpr bool usesWindowsPaths() {
 
 TEST(DeduplicateByFileTest, PathsWithDots) {
   std::map<std::string, Replacements> FileToReplaces;
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> VFS(
-      new llvm::vfs::InMemoryFileSystem());
+  auto VFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   FileManager FileMgr(FileSystemOptions(), VFS);
   StringRef Path1 = usesWindowsPaths() ? "a\\b\\..\\.\\c.h" : "a/b/.././c.h";
   StringRef Path2 = usesWindowsPaths() ? "a\\c.h" : "a/c.h";
@@ -1051,8 +1050,7 @@ TEST(DeduplicateByFileTest, PathsWithDots) {
 
 TEST(DeduplicateByFileTest, PathWithDotSlash) {
   std::map<std::string, Replacements> FileToReplaces;
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> VFS(
-      new llvm::vfs::InMemoryFileSystem());
+  auto VFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   FileManager FileMgr(FileSystemOptions(), VFS);
   StringRef Path1 = usesWindowsPaths() ? ".\\a\\b\\c.h" : "./a/b/c.h";
   StringRef Path2 = usesWindowsPaths() ? "a\\b\\c.h" : "a/b/c.h";
@@ -1067,8 +1065,7 @@ TEST(DeduplicateByFileTest, PathWithDotSlash) {
 
 TEST(DeduplicateByFileTest, NonExistingFilePath) {
   std::map<std::string, Replacements> FileToReplaces;
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> VFS(
-      new llvm::vfs::InMemoryFileSystem());
+  auto VFS = llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   FileManager FileMgr(FileSystemOptions(), VFS);
   StringRef Path1 = usesWindowsPaths() ? ".\\a\\b\\c.h" : "./a/b/c.h";
   StringRef Path2 = usesWindowsPaths() ? "a\\b\\c.h" : "a/b/c.h";
diff --git a/clang/unittests/Tooling/RewriterTestContext.h b/clang/unittests/Tooling/RewriterTestContext.h
index 2d697e2..020ef60 100644
--- a/clang/unittests/Tooling/RewriterTestContext.h
+++ b/clang/unittests/Tooling/RewriterTestContext.h
@@ -49,11 +49,12 @@ struct RewriterDiagnosticConsumer : public DiagnosticConsumer {
 class RewriterTestContext {
  public:
    RewriterTestContext()
-       : Diagnostics(IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
-                     DiagOpts),
-         InMemoryFileSystem(new llvm::vfs::InMemoryFileSystem),
+       : Diagnostics(DiagnosticIDs::create(), DiagOpts),
+         InMemoryFileSystem(
+             llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>()),
          OverlayFileSystem(
-             new llvm::vfs::OverlayFileSystem(llvm::vfs::getRealFileSystem())),
+             llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(
+                 llvm::vfs::getRealFileSystem())),
          Files(FileSystemOptions(), OverlayFileSystem),
          Sources(Diagnostics, Files), Rewrite(Sources, Options) {
      Diagnostics.setClient(&DiagnosticPrinter, false);
diff --git a/clang/unittests/Tooling/Syntax/TokensTest.cpp b/clang/unittests/Tooling/Syntax/TokensTest.cpp
index b5f4445..e86793f 100644
--- a/clang/unittests/Tooling/Syntax/TokensTest.cpp
+++ b/clang/unittests/Tooling/Syntax/TokensTest.cpp
@@ -133,7 +133,7 @@ public:
     CI->getPreprocessorOpts().addRemappedFile(
         FileName, llvm::MemoryBuffer::getMemBufferCopy(Code).release());
     CompilerInstance Compiler(std::move(CI));
-    Compiler.setDiagnostics(Diags.get());
+    Compiler.setDiagnostics(Diags);
     Compiler.setFileManager(FileMgr.get());
     Compiler.setSourceManager(SourceMgr.get());
 
@@ -250,9 +250,10 @@ public:
   // Data fields.
   DiagnosticOptions DiagOpts;
   llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
-      new DiagnosticsEngine(new DiagnosticIDs, DiagOpts);
+      llvm::makeIntrusiveRefCnt<DiagnosticsEngine>(DiagnosticIDs::create(),
+                                                   DiagOpts);
   IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS =
-      new llvm::vfs::InMemoryFileSystem;
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   llvm::IntrusiveRefCntPtr<FileManager> FileMgr =
       new FileManager(FileSystemOptions(), FS);
   llvm::IntrusiveRefCntPtr<SourceManager> SourceMgr =
diff --git a/clang/unittests/Tooling/Syntax/TreeTestBase.cpp b/clang/unittests/Tooling/Syntax/TreeTestBase.cpp
index 9f22b1d..4a25863 100644
--- a/clang/unittests/Tooling/Syntax/TreeTestBase.cpp
+++ b/clang/unittests/Tooling/Syntax/TreeTestBase.cpp
@@ -152,7 +152,7 @@ SyntaxTreeTest::buildTree(StringRef Code, const TestClangConfig &ClangConfig) {
   Invocation->getPreprocessorOpts().addRemappedFile(
       FileName, llvm::MemoryBuffer::getMemBufferCopy(Code).release());
   CompilerInstance Compiler(Invocation);
-  Compiler.setDiagnostics(Diags.get());
+  Compiler.setDiagnostics(Diags);
   Compiler.setFileManager(FileMgr.get());
   Compiler.setSourceManager(SourceMgr.get());
 
diff --git a/clang/unittests/Tooling/Syntax/TreeTestBase.h b/clang/unittests/Tooling/Syntax/TreeTestBase.h
index 6110cff..fce89e2 100644
--- a/clang/unittests/Tooling/Syntax/TreeTestBase.h
+++ b/clang/unittests/Tooling/Syntax/TreeTestBase.h
@@ -42,9 +42,10 @@ protected:
   // Data fields.
   DiagnosticOptions DiagOpts;
   IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
-      new DiagnosticsEngine(new DiagnosticIDs, DiagOpts);
+      llvm::makeIntrusiveRefCnt<DiagnosticsEngine>(DiagnosticIDs::create(),
+                                                   DiagOpts);
   IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS =
-      new llvm::vfs::InMemoryFileSystem;
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   IntrusiveRefCntPtr<FileManager> FileMgr =
       new FileManager(FileSystemOptions(), FS);
   IntrusiveRefCntPtr<SourceManager> SourceMgr =
diff --git a/clang/unittests/Tooling/ToolingTest.cpp b/clang/unittests/Tooling/ToolingTest.cpp
index 32af4b6..c72676f 100644
--- a/clang/unittests/Tooling/ToolingTest.cpp
+++ b/clang/unittests/Tooling/ToolingTest.cpp
@@ -153,8 +153,8 @@ TEST(buildASTFromCode, ReportsErrors) {
 }
 
 TEST(buildASTFromCode, FileSystem) {
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   InMemoryFileSystem->addFile("included_file.h", 0,
                               llvm::MemoryBuffer::getMemBufferCopy("class X;"));
   std::unique_ptr<ASTUnit> AST = buildASTFromCodeWithArgs(
@@ -188,10 +188,11 @@ TEST(newFrontendActionFactory, CreatesFrontendActionFactoryFromFactoryType) {
 }
 
 TEST(ToolInvocation, TestMapVirtualFile) {
-  llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayFileSystem(
-      new llvm::vfs::OverlayFileSystem(llvm::vfs::getRealFileSystem()));
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto OverlayFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(
+          llvm::vfs::getRealFileSystem());
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   OverlayFileSystem->pushOverlay(InMemoryFileSystem);
   llvm::IntrusiveRefCntPtr<FileManager> Files(
       new FileManager(FileSystemOptions(), OverlayFileSystem));
@@ -214,10 +215,11 @@ TEST(ToolInvocation, TestVirtualModulesCompilation) {
   // mapped module.modulemap is found on the include path. In the future, expand
   // this test to run a full modules enabled compilation, so we make sure we can
   // rerun modules compilations with a virtual file system.
-  llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayFileSystem(
-      new llvm::vfs::OverlayFileSystem(llvm::vfs::getRealFileSystem()));
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto OverlayFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(
+          llvm::vfs::getRealFileSystem());
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   OverlayFileSystem->pushOverlay(InMemoryFileSystem);
   llvm::IntrusiveRefCntPtr<FileManager> Files(
       new FileManager(FileSystemOptions(), OverlayFileSystem));
@@ -240,10 +242,11 @@ TEST(ToolInvocation, TestVirtualModulesCompilation) {
 }
 
 TEST(ToolInvocation, DiagnosticsEngineProperlyInitializedForCC1Construction) {
-  llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayFileSystem(
-      new llvm::vfs::OverlayFileSystem(llvm::vfs::getRealFileSystem()));
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto OverlayFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(
+          llvm::vfs::getRealFileSystem());
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   OverlayFileSystem->pushOverlay(InMemoryFileSystem);
   llvm::IntrusiveRefCntPtr<FileManager> Files(
       new FileManager(FileSystemOptions(), OverlayFileSystem));
@@ -269,10 +272,11 @@ TEST(ToolInvocation, DiagnosticsEngineProperlyInitializedForCC1Construction) {
 }
 
 TEST(ToolInvocation, CustomDiagnosticOptionsOverwriteParsedOnes) {
-  llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayFileSystem(
-      new llvm::vfs::OverlayFileSystem(llvm::vfs::getRealFileSystem()));
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto OverlayFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(
+          llvm::vfs::getRealFileSystem());
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   OverlayFileSystem->pushOverlay(InMemoryFileSystem);
   llvm::IntrusiveRefCntPtr<FileManager> Files(
       new FileManager(FileSystemOptions(), OverlayFileSystem));
@@ -315,10 +319,11 @@ struct DiagnosticConsumerExpectingSourceManager : public DiagnosticConsumer {
 };
 
 TEST(ToolInvocation, DiagConsumerExpectingSourceManager) {
-  llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayFileSystem(
-      new llvm::vfs::OverlayFileSystem(llvm::vfs::getRealFileSystem()));
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto OverlayFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(
+          llvm::vfs::getRealFileSystem());
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   OverlayFileSystem->pushOverlay(InMemoryFileSystem);
   llvm::IntrusiveRefCntPtr<FileManager> Files(
       new FileManager(FileSystemOptions(), OverlayFileSystem));
@@ -341,10 +346,11 @@ TEST(ToolInvocation, DiagConsumerExpectingSourceManager) {
 }
 
 TEST(ToolInvocation, CC1Args) {
-  llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayFileSystem(
-      new llvm::vfs::OverlayFileSystem(llvm::vfs::getRealFileSystem()));
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto OverlayFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(
+          llvm::vfs::getRealFileSystem());
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   OverlayFileSystem->pushOverlay(InMemoryFileSystem);
   llvm::IntrusiveRefCntPtr<FileManager> Files(
       new FileManager(FileSystemOptions(), OverlayFileSystem));
@@ -361,10 +367,11 @@ TEST(ToolInvocation, CC1Args) {
 }
 
 TEST(ToolInvocation, CC1ArgsInvalid) {
-  llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayFileSystem(
-      new llvm::vfs::OverlayFileSystem(llvm::vfs::getRealFileSystem()));
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto OverlayFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(
+          llvm::vfs::getRealFileSystem());
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   OverlayFileSystem->pushOverlay(InMemoryFileSystem);
   llvm::IntrusiveRefCntPtr<FileManager> Files(
       new FileManager(FileSystemOptions(), OverlayFileSystem));
@@ -398,7 +405,7 @@ struct CommandLineExtractorTest : public ::testing::Test {
 
 public:
   CommandLineExtractorTest()
-      : InMemoryFS(new llvm::vfs::InMemoryFileSystem),
+      : InMemoryFS(llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>()),
         Diags(CompilerInstance::createDiagnostics(*InMemoryFS, DiagOpts)),
         Driver("clang", llvm::sys::getDefaultTargetTriple(), *Diags,
                "clang LLVM compiler", overlayRealFS(InMemoryFS)) {}
@@ -755,10 +762,11 @@ TEST(ClangToolTest, NoOutputCommands) {
 
 TEST(ClangToolTest, BaseVirtualFileSystemUsage) {
   FixedCompilationDatabase Compilations("/", std::vector<std::string>());
-  llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> OverlayFileSystem(
-      new llvm::vfs::OverlayFileSystem(llvm::vfs::getRealFileSystem()));
-  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem(
-      new llvm::vfs::InMemoryFileSystem);
+  auto OverlayFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::OverlayFileSystem>(
+          llvm::vfs::getRealFileSystem());
+  auto InMemoryFileSystem =
+      llvm::makeIntrusiveRefCnt<llvm::vfs::InMemoryFileSystem>();
   OverlayFileSystem->pushOverlay(InMemoryFileSystem);
 
   InMemoryFileSystem->addFile(
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index d4fb56e..89891fd 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -1401,14 +1401,12 @@ void Intrinsic::emitBodyAsBuiltinCall() {
       if (LocalCK == ClassB || (T.isHalf() && !T.isScalarForMangling())) {
         CastToType.makeInteger(8, true);
         Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
-      } else if (LocalCK == ClassI) {
-        if (CastToType.isInteger()) {
-          CastToType.makeSigned();
-          Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
-        }
+      } else if (LocalCK == ClassI &&
+                 (CastToType.isInteger() || CastToType.isPoly())) {
+        CastToType.makeSigned();
+        Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")";
       }
     }
-
     S += Arg + ", ";
   }