72 files changed, 1572 insertions, 922 deletions
diff --git a/clang/cmake/modules/CMakeLists.txt b/clang/cmake/modules/CMakeLists.txt
index d2d6812..9ad2f98 100644
--- a/clang/cmake/modules/CMakeLists.txt
+++ b/clang/cmake/modules/CMakeLists.txt
@@ -8,15 +8,19 @@ include(FindPrefixFromConfig)
 # the usual CMake convention seems to be ${Project}Targets.cmake.
 set(CLANG_INSTALL_PACKAGE_DIR "${CMAKE_INSTALL_PACKAGEDIR}/clang" CACHE STRING
   "Path for CMake subdirectory for Clang (defaults to '${CMAKE_INSTALL_PACKAGEDIR}/clang')")
-# CMAKE_INSTALL_PACKAGEDIR might be absolute, so don't reuse below.
-set(clang_cmake_builddir "${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}/cmake/clang")
 
 # Keep this in sync with llvm/cmake/CMakeLists.txt!
 set(LLVM_INSTALL_PACKAGE_DIR "${CMAKE_INSTALL_PACKAGEDIR}/llvm" CACHE STRING
   "Path for CMake subdirectory for LLVM (defaults to '${CMAKE_INSTALL_PACKAGEDIR}/llvm')")
 # CMAKE_INSTALL_PACKAGEDIR might be absolute, so don't reuse below.
-string(REPLACE "${CMAKE_CFG_INTDIR}" "." llvm_cmake_builddir "${LLVM_LIBRARY_DIR}")
-set(llvm_cmake_builddir "${llvm_cmake_builddir}/cmake/llvm")
+string(REPLACE "${CMAKE_CFG_INTDIR}" "." llvm_builddir "${LLVM_LIBRARY_DIR}")
+set(llvm_cmake_builddir "${llvm_builddir}/cmake/llvm")
+if(CLANG_BUILT_STANDALONE)
+  # CMAKE_INSTALL_PACKAGEDIR might be absolute, so don't reuse below.
+  set(clang_cmake_builddir "${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}/cmake/clang")
+else()
+  set(clang_cmake_builddir "${llvm_builddir}/cmake/clang")
+endif()
 
 get_property(CLANG_EXPORTS GLOBAL PROPERTY CLANG_EXPORTS)
 export(TARGETS ${CLANG_EXPORTS} FILE ${clang_cmake_builddir}/ClangTargets.cmake)
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index afe3d46..4f62a67 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -304,6 +304,8 @@ Attribute Changes in Clang
 
 Improvements to Clang's diagnostics
 -----------------------------------
+- Diagnostics messages now refer to ``structured binding`` instead of ``decomposition``,
+  to align with `P0615R0 <https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/p0615r0.html>`_ changing the term. (#GH157880)
 - Added a separate diagnostic group ``-Wfunction-effect-redeclarations``, for the more pedantic
   diagnostics for function effects (``[[clang::nonblocking]]`` and ``[[clang::nonallocating]]``).
   Moved the warning for a missing (though implied) attribute on a redeclaration into this group.
@@ -518,6 +520,7 @@ X86 Support
 - Remove `[no-]evex512` feature request from intrinsics and builtins.
 - Change features `avx10.x-[256,512]` to `avx10.x`.
 - `-march=wildcatlake` is now supported.
+- `-march=novalake` is now supported.
 
 Arm and AArch64 Support
 ^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index dcfa4e3..fd0b304 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -3465,12 +3465,6 @@ Check for an out-of-bound pointer being returned to callers.
    return x; // warn: undefined or garbage returned
  }
 
-
-alpha.security.cert
-^^^^^^^^^^^^^^^^^^^
-
-SEI CERT checkers which tries to find errors based on their `C coding rules <https://wiki.sei.cmu.edu/confluence/display/c/2+Rules>`_.
-
 alpha.unix
 ^^^^^^^^^^
 
diff --git a/clang/include/clang/AST/TypeBase.h b/clang/include/clang/AST/TypeBase.h
index 5892566..f07861f 100644
--- a/clang/include/clang/AST/TypeBase.h
+++ b/clang/include/clang/AST/TypeBase.h
@@ -4378,8 +4378,6 @@ protected:
   unsigned NumRows;
   unsigned NumColumns;
 
-  static constexpr unsigned MaxElementsPerDimension = (1 << 20) - 1;
-
   ConstantMatrixType(QualType MatrixElementType, unsigned NRows,
                      unsigned NColumns, QualType CanonElementType);
 
@@ -4398,16 +4396,6 @@ public:
     return getNumRows() * getNumColumns();
   }
 
-  /// Returns true if \p NumElements is a valid matrix dimension.
-  static constexpr bool isDimensionValid(size_t NumElements) {
-    return NumElements > 0 && NumElements <= MaxElementsPerDimension;
-  }
-
-  /// Returns the maximum number of elements per dimension.
-  static constexpr unsigned getMaxElementsPerDimension() {
-    return MaxElementsPerDimension;
-  }
-
   void Profile(llvm::FoldingSetNodeID &ID) {
     Profile(ID, getElementType(), getNumRows(), getNumColumns(),
             getTypeClass());
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 279c0c7..62c70fba 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -1359,23 +1359,17 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVect
   def pshufb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">;
 }
 
-let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
   def vpconflictdi_128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>)">;
-}
-
-let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
-  def vpconflictdi_256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>)">;
-}
-
-let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
   def vpconflictsi_128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>)">;
 }
 
-let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+  def vpconflictdi_256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>)">;
   def vpconflictsi_256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>)">;
 }
 
-let Features = "avx512cd", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512cd", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
   def vpconflictdi_512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>)">;
   def vpconflictsi_512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>)">;
 }
diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td
index c724136..e5e071f 100644
--- a/clang/include/clang/Basic/DiagnosticParseKinds.td
+++ b/clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -505,7 +505,7 @@ def err_expected_coloncolon_after_super : Error<
   "expected '::' after '__super'">;
 
 def ext_decomp_decl_empty : ExtWarn<
-  "ISO C++17 does not allow a decomposition group to be empty">,
+  "ISO C++17 does not allow a structured binding group to be empty">,
   InGroup<DiagGroup<"empty-decomposition">>;
 
 def err_function_parameter_limit_exceeded : Error<
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 40bc7b9..12fd7b08 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -31,12 +31,12 @@ defm constexpr_body_multiple_return : CXX14Compat<
 defm variable_template : CXX14Compat<"variable templates are">;
 
 // C++17 compatibility with C++14 and earlier.
-defm decomp_decl : CXX17Compat<"decomposition declarations are">;
+defm decomp_decl : CXX17Compat<"structured binding declarations are">;
 defm inline_variable : CXX17Compat<"inline variables are">;
 
 // C++20 compatibility with C++17 and earlier.
 defm decomp_decl_spec
-    : CXX20Compat<"decomposition declaration declared '%0' is">;
+    : CXX20Compat<"structured binding declaration declared '%0' is">;
 defm constexpr_local_var_no_init : CXX20Compat<
   "uninitialized variable in a constexpr %select{function|constructor}0 is">;
 defm constexpr_function_try_block : CXX20Compat<
@@ -591,58 +591,58 @@ def warn_modifying_shadowing_decl :
 
 // C++ decomposition declarations
 def err_decomp_decl_context : Error<
-  "decomposition declaration not permitted in this context">;
+  "structured binding declaration not permitted in this context">;
 def err_decomp_decl_spec
-    : Error<"decomposition declaration cannot be declared '%0'">;
+    : Error<"structured binding declaration cannot be declared '%0'">;
 def err_decomp_decl_type : Error<
-  "decomposition declaration cannot be declared with type %0; "
+  "structured binding declaration cannot be declared with type %0; "
   "declared type must be 'auto' or reference to 'auto'">;
 def err_decomp_decl_constraint : Error<
-  "decomposition declaration cannot be declared with constrained 'auto'">;
+  "structured binding declaration cannot be declared with constrained 'auto'">;
 def err_decomp_decl_parens : Error<
-  "decomposition declaration cannot be declared with parentheses">;
+  "structured binding declaration cannot be declared with parentheses">;
 def err_decomp_decl_template : Error<
-  "decomposition declaration cannot be a template">;
+  "structured binding declaration cannot be a template">;
 def err_decomp_decl_not_alone : Error<
-  "decomposition declaration must be the only declaration in its group">;
+  "structured binding declaration must be the only declaration in its group">;
 def err_decomp_decl_requires_init : Error<
-  "decomposition declaration %0 requires an initializer">;
+  "structured binding declaration %0 requires an initializer">;
 def err_decomp_decl_wrong_number_bindings : Error<
-  "type %0 decomposes into %3 %plural{1:element|:elements}2, but "
+  "type %0 binds to %3 %plural{1:element|:elements}2, but "
   "%select{%plural{0:no|:only %1}1|%1}4 "
   "%plural{1:name was|:names were}1 provided">;
 def err_decomp_decl_unbindable_type : Error<
-  "cannot decompose %select{union|non-class, non-array}1 type %2">;
+  "cannot bind %select{union|non-class, non-array}1 type %2">;
 def err_decomp_decl_multiple_bases_with_members : Error<
-  "cannot decompose class type %1: "
+  "cannot bind class type %1: "
   "%select{its base classes %2 and|both it and its base class}0 %3 "
   "have non-static data members">;
 def err_decomp_decl_ambiguous_base : Error<
-  "cannot decompose members of ambiguous base class %1 of %0:%2">;
+  "cannot bind members of ambiguous base class %1 of %0:%2">;
 def err_decomp_decl_inaccessible_base : Error<
-  "cannot decompose members of inaccessible base class %1 of %0">,
+  "cannot bind members of inaccessible base class %1 of %0">,
   AccessControl;
 def err_decomp_decl_inaccessible_field : Error<
-  "cannot decompose %select{private|protected}0 member %1 of %3">,
+  "cannot bind %select{private|protected}0 member %1 of %3">,
   AccessControl;
 def err_decomp_decl_lambda : Error<
-  "cannot decompose lambda closure type">;
+  "cannot bind lambda closure type">;
 def err_decomp_decl_anon_union_member : Error<
-  "cannot decompose class type %0 because it has an anonymous "
+  "cannot bind class type %0 because it has an anonymous "
   "%select{struct|union}1 member">;
 def err_decomp_decl_std_tuple_element_not_specialized : Error<
-  "cannot decompose this type; 'std::tuple_element<%0>::type' "
+  "cannot bind this type; 'std::tuple_element<%0>::type' "
   "does not name a type">;
 def err_decomp_decl_std_tuple_size_not_constant : Error<
-  "cannot decompose this type; 'std::tuple_size<%0>::value' "
+  "cannot bind this type; 'std::tuple_size<%0>::value' "
   "is not a valid integral constant expression">;
 def err_decomp_decl_std_tuple_size_invalid
-    : Error<"cannot decompose this type; 'std::tuple_size<%0>::value' "
+    : Error<"cannot bind this type; 'std::tuple_size<%0>::value' "
             "is not a valid size: %1">;
 def note_in_binding_decl_init : Note<
   "in implicit initialization of binding declaration %0">;
 def err_arg_is_not_destructurable : Error<
-  "type %0 cannot be decomposed">;
+  "type %0 cannot be bound">;
 
 def err_std_type_trait_not_class_template : Error<
   "unsupported standard library implementation: "
@@ -2620,7 +2620,7 @@ def err_auto_variable_cannot_appear_in_own_initializer
           "declared with deduced type %2 cannot appear in its own initializer">;
 def err_binding_cannot_appear_in_own_initializer : Error<
   "binding %0 cannot appear in the initializer of its own "
-  "decomposition declaration">;
+  "structured binding declaration">;
 def err_new_array_of_auto : Error<
   "cannot allocate array of 'auto'">;
 def err_auto_not_allowed : Error<
diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def
index 5f70b51..8d6b8a1 100644
--- a/clang/include/clang/Basic/LangOptions.def
+++ b/clang/include/clang/Basic/LangOptions.def
@@ -433,6 +433,7 @@ ENUM_LANGOPT(RegisterStaticDestructors, RegisterStaticDestructorsKind, 2,
 LANGOPT(RegCall4, 1, 0, NotCompatible, "Set __regcall4 as a default calling convention to respect __regcall ABI v.4")
 
 LANGOPT(MatrixTypes, 1, 0, NotCompatible, "Enable or disable the builtin matrix type")
+VALUE_LANGOPT(MaxMatrixDimension, 32, (1 << 20) - 1, NotCompatible, "maximum allowed matrix dimension")
 
 LANGOPT(CXXAssumptions, 1, 1, NotCompatible, "Enable or disable codegen and compile-time checks for C++23's [[assume]] attribute")
 
diff --git a/clang/include/clang/CIR/Dialect/IR/CIRAttrConstraints.td b/clang/include/clang/CIR/Dialect/IR/CIRAttrConstraints.td
index 8f72ff4..2548d46 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIRAttrConstraints.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIRAttrConstraints.td
@@ -39,13 +39,31 @@ def CIR_AnyIntOrFloatAttr : AnyAttrOf<[CIR_AnyIntAttr, CIR_AnyFPAttr],
 }
 
 //===----------------------------------------------------------------------===//
+// Exceptions constraints
+//===----------------------------------------------------------------------===//
+
+def CIR_AnyCatchAllAttr
+    : CIR_AttrConstraint<"::cir::CatchAllAttr", "catch all attribute">;
+
+def CIR_AnyUnwindAttr
+    : CIR_AttrConstraint<"::cir::UnwindAttr", "unwind attribute">;
+
+//===----------------------------------------------------------------------===//
 // GlobalViewAttr constraints
 //===----------------------------------------------------------------------===//
 
-def CIR_AnyGlobalViewAttr : CIR_AttrConstraint<"::cir::GlobalViewAttr", "GlobalView attribute">;
+def CIR_AnyGlobalViewAttr
+    : CIR_AttrConstraint<"::cir::GlobalViewAttr", "GlobalView attribute">;
 
-def CIR_AnyIntOrGlobalViewAttr : AnyAttrOf<[CIR_AnyIntAttr, CIR_AnyGlobalViewAttr],
-    "integer or global view attribute"> {
+def CIR_AnyIntOrGlobalViewAttr
+    : AnyAttrOf<[CIR_AnyIntAttr, CIR_AnyGlobalViewAttr],
+                "integer or global view attribute"> {
+  string cppType = "::mlir::TypedAttr";
+}
+
+def CIR_TryHandlerAttr
+    : AnyAttrOf<[CIR_AnyGlobalViewAttr, CIR_AnyCatchAllAttr, CIR_AnyUnwindAttr],
+                "catch all or unwind or global view attribute"> {
   string cppType = "::mlir::TypedAttr";
 }
 
@@ -61,4 +79,7 @@ def CIR_IntOrGlobalViewArrayAttr : TypedArrayAttrBase<CIR_AnyIntOrGlobalViewAttr
   string cppType = "::mlir::ArrayAttr";
 }
 
-#endif // CLANG_CIR_DIALECT_IR_CIRATTRCONSTRAINTS_TD
-\ No newline at end of file
+def CIR_TryHandlerArrayAttr : TypedArrayAttrBase<CIR_TryHandlerAttr,
+                         "catch all or unwind or global view array attribute">;
+
+#endif // CLANG_CIR_DIALECT_IR_CIRATTRCONSTRAINTS_TD
diff --git a/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td b/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td
index 610e349..69dbad3 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td
@@ -968,4 +968,19 @@ def CIR_TypeInfoAttr : CIR_Attr<"TypeInfo", "typeinfo", [TypedAttrInterface]> {
   }];
 }
 
+//===----------------------------------------------------------------------===//
+// CatchAllAttr & UnwindAttr
+//===----------------------------------------------------------------------===//
+
+// Represents the catch_all region.
+def CIR_CatchAllAttr : CIR_UnitAttr<"CatchAll", "all"> {
+  let storageType = [{ CatchAllAttr }];
+}
+
+// Represents the unwind region where unwind continues or
+// the program std::terminate's.
+def CIR_UnwindAttr : CIR_UnitAttr<"Unwind", "unwind"> {
+  let storageType = [{ CatchUnwind }];
+}
+
 #endif // CLANG_CIR_DIALECT_IR_CIRATTRS_TD
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
index baab156..4b26f81 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIROps.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -644,7 +644,7 @@ def CIR_StoreOp : CIR_Op<"store", [
 
 defvar CIR_ReturnableScopes = [
   "FuncOp", "ScopeOp", "IfOp", "SwitchOp", "CaseOp",
-  "DoWhileOp", "WhileOp", "ForOp"
+  "DoWhileOp", "WhileOp", "ForOp", "TryOp"
 ];
 
 def CIR_ReturnOp : CIR_Op<"return", [
@@ -791,7 +791,7 @@ def CIR_ConditionOp : CIR_Op<"condition", [
 
 defvar CIR_YieldableScopes = [
   "ArrayCtor", "ArrayDtor", "CaseOp", "DoWhileOp", "ForOp", "GlobalOp", "IfOp",
-  "ScopeOp", "SwitchOp", "TernaryOp", "WhileOp"
+  "ScopeOp", "SwitchOp", "TernaryOp", "WhileOp", "TryOp"
 ];
 
 def CIR_YieldOp : CIR_Op<"yield", [
@@ -4326,6 +4326,89 @@ def CIR_AllocExceptionOp : CIR_Op<"alloc.exception"> {
 }
 
 //===----------------------------------------------------------------------===//
+// TryOp
+//===----------------------------------------------------------------------===//
+
+def CIR_TryOp : CIR_Op<"try",[
+  DeclareOpInterfaceMethods<RegionBranchOpInterface>,
+  RecursivelySpeculatable, AutomaticAllocationScope, NoRegionArguments
+]> {
+  let summary = "C++ try block";
+  let description = [{
+    Holds the lexical scope of `try {}`. Note that resources used on catch
+    clauses are usually allocated in the same parent as `cir.try`.
+
+    `synthetic`: use `cir.try` to represent try/catches not originally
+    present in the source code. For example, a synthetic `cir.try` region
+    is created around the constructor call when `operator new` is used
+    so that the memory allocated will be freed if the constructor throws
+    an exception.
+
+    `cleanup`: indicates that there are cleanups that must be performed
+    when exiting the try region via exception, even if the exception is not
+    caught.
+
+    Example:
+
+    ```mlir
+    cir.try {
+      cir.call exception @function() : () -> ()
+      cir.yield
+    } catch [type #cir.global_view<@_ZTIPf> : !cir.ptr<!u8i>] {
+      ...
+      cir.yield
+    } unwind {
+      cir.resume
+    }
+    ```
+  }];
+
+  let arguments = (ins
+    UnitAttr:$synthetic,
+    UnitAttr:$cleanup,
+    CIR_TryHandlerArrayAttr:$handler_types
+  );
+
+  let regions = (region
+    AnyRegion:$try_region,
+    VariadicRegion<MinSizedRegion<1>>:$handler_regions
+  );
+
+  let assemblyFormat = [{
+    (`synthetic` $synthetic^)?
+    (`cleanup` $cleanup^)?
+    $try_region
+    custom<TryHandlerRegions>($handler_regions, $handler_types)
+    attr-dict
+  }];
+
+  let builders = [
+    OpBuilder<(ins
+      "llvm::function_ref<void(mlir::OpBuilder &, "
+        "mlir::Location)>":$tryBuilder,
+      "llvm::function_ref<void(mlir::OpBuilder &, mlir::Location, "
+        "mlir::OperationState &)>":$handlersBuilder),
+    [{
+      assert(tryBuilder && "expected builder callback for 'cir.try' body");
+      assert(handlersBuilder
+        && "expected builder callback for 'handlers' body");
+
+      OpBuilder::InsertionGuard guard($_builder);
+
+      // Try body region
+      mlir::Region *tryBodyRegion = $_state.addRegion();
+
+      // Create try body region and set insertion point
+      $_builder.createBlock(tryBodyRegion);
+      tryBuilder($_builder, $_state.location);
+      handlersBuilder($_builder, $_state.location, $_state);
+    }]>
+  ];
+
+  let hasLLVMLowering = false;
+}
+
+//===----------------------------------------------------------------------===//
 // Atomic operations
 //===----------------------------------------------------------------------===//
 
diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
index b83bbcd..ffae3b9 100644
--- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
+++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
@@ -76,9 +76,6 @@ def SecurityAlpha : Package<"security">, ParentPackage<Alpha>;
 def CERT : Package<"cert">, ParentPackage<Security>;
 def ENV : Package<"env">, ParentPackage<CERT>;
 
-def CERTAlpha : Package<"cert">, ParentPackage<SecurityAlpha>;
-def POSAlpha : Package<"pos">, ParentPackage<CERTAlpha>;
-
 def Unix : Package<"unix">;
 def UnixAlpha : Package<"unix">, ParentPackage<Alpha>;
 def CString : Package<"cstring">, ParentPackage<Unix>;
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index e403b3e..32c8f62 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -4712,8 +4712,8 @@ QualType ASTContext::getConstantMatrixType(QualType ElementTy, unsigned NumRows,
 
   assert(MatrixType::isValidElementType(ElementTy) &&
          "need a valid element type");
-  assert(ConstantMatrixType::isDimensionValid(NumRows) &&
-         ConstantMatrixType::isDimensionValid(NumColumns) &&
+  assert(NumRows > 0 && NumRows <= LangOpts.MaxMatrixDimension &&
+         NumColumns > 0 && NumColumns <= LangOpts.MaxMatrixDimension &&
          "need valid matrix dimensions");
   void *InsertPos = nullptr;
   if (ConstantMatrixType *MTP = MatrixTypes.FindNodeOrInsertPos(ID, InsertPos))
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index b69f360..a0d2c76 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3101,6 +3101,33 @@ static bool interp__builtin_vec_set(InterpState &S, CodePtr OpPC,
   return true;
 }
 
+static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC,
+                                            const CallExpr *Call) {
+  assert(Call->getNumArgs() == 1);
+
+  QualType Arg0Type = Call->getArg(0)->getType();
+  const auto *VecT = Arg0Type->castAs<VectorType>();
+  PrimType ElemT = *S.getContext().classify(VecT->getElementType());
+  unsigned NumElems = VecT->getNumElements();
+  bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
+  const Pointer &Src = S.Stk.pop<Pointer>();
+  const Pointer &Dst = S.Stk.peek<Pointer>();
+
+  for (unsigned I = 0; I != NumElems; ++I) {
+    INT_TYPE_SWITCH_NO_BOOL(ElemT, {
+      APSInt ElemI = Src.elem<T>(I).toAPSInt();
+      APInt ConflictMask(ElemI.getBitWidth(), 0);
+      for (unsigned J = 0; J != I; ++J) {
+        APSInt ElemJ = Src.elem<T>(J).toAPSInt();
+        ConflictMask.setBitVal(J, ElemI == ElemJ);
+      }
+      Dst.elem<T>(I) = static_cast<T>(APSInt(ConflictMask, DestUnsigned));
+    });
+  }
+  Dst.initializeAllElements();
+  return true;
+}
+
 bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
                       uint32_t BuiltinID) {
   if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
@@ -3891,7 +3918,13 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
         [](const APSInt &Lo, const APSInt &Hi, const APSInt &Amt) {
           return llvm::APIntOps::fshr(Hi, Lo, Amt);
         });
-
+  case X86::BI__builtin_ia32_vpconflictsi_128:
+  case X86::BI__builtin_ia32_vpconflictsi_256:
+  case X86::BI__builtin_ia32_vpconflictsi_512:
+  case X86::BI__builtin_ia32_vpconflictdi_128:
+  case X86::BI__builtin_ia32_vpconflictdi_256:
+  case X86::BI__builtin_ia32_vpconflictdi_512:
+    return interp__builtin_ia32_vpconflict(S, OpPC, Call);
   case clang::X86::BI__builtin_ia32_blendpd:
   case clang::X86::BI__builtin_ia32_blendpd256:
   case clang::X86::BI__builtin_ia32_blendps:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index a07eb22..16141b2 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12179,6 +12179,37 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
 
     return Success(APValue(ResultElements.data(), ResultElements.size()), E);
   }
+  case X86::BI__builtin_ia32_vpconflictsi_128:
+  case X86::BI__builtin_ia32_vpconflictsi_256:
+  case X86::BI__builtin_ia32_vpconflictsi_512:
+  case X86::BI__builtin_ia32_vpconflictdi_128:
+  case X86::BI__builtin_ia32_vpconflictdi_256:
+  case X86::BI__builtin_ia32_vpconflictdi_512: {
+    APValue Source;
+
+    if (!EvaluateAsRValue(Info, E->getArg(0), Source))
+      return false;
+
+    unsigned SourceLen = Source.getVectorLength();
+    SmallVector<APValue, 32> ResultElements;
+    ResultElements.reserve(SourceLen);
+
+    const auto *VecT = E->getType()->castAs<VectorType>();
+    bool DestUnsigned =
+        VecT->getElementType()->isUnsignedIntegerOrEnumerationType();
+
+    for (unsigned I = 0; I != SourceLen; ++I) {
+      const APValue &EltI = Source.getVectorElt(I);
+
+      APInt ConflictMask(EltI.getInt().getBitWidth(), 0);
+      for (unsigned J = 0; J != I; ++J) {
+        const APValue &EltJ = Source.getVectorElt(J);
+        ConflictMask.setBitVal(J, EltI.getInt() == EltJ.getInt());
+      }
+      ResultElements.push_back(APValue(APSInt(ConflictMask, DestUnsigned)));
+    }
+    return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+  }
   case X86::BI__builtin_ia32_blendpd:
   case X86::BI__builtin_ia32_blendpd256:
   case X86::BI__builtin_ia32_blendps:
diff --git a/clang/lib/Basic/LangOptions.cpp b/clang/lib/Basic/LangOptions.cpp
index 641a3db..19b5576 100644
--- a/clang/lib/Basic/LangOptions.cpp
+++ b/clang/lib/Basic/LangOptions.cpp
@@ -132,8 +132,12 @@ void LangOptions::setLangDefaults(LangOptions &Opts, Language Lang,
   Opts.NamedLoops = Std.isC2y();
 
   Opts.HLSL = Lang == Language::HLSL;
-  if (Opts.HLSL && Opts.IncludeDefaultHeader)
-    Includes.push_back("hlsl.h");
+  if (Opts.HLSL) {
+    if (Opts.IncludeDefaultHeader)
+      Includes.push_back("hlsl.h");
+    // Set maximum matrix dimension to 4 for HLSL
+    Opts.MaxMatrixDimension = 4;
+  }
 
   // Set OpenCL Version.
   Opts.OpenCL = Std.isOpenCL();
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index ef4973c..e71f10c 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -626,6 +626,7 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
   case CK_Lunarlake:
   case CK_Pantherlake:
   case CK_Wildcatlake:
+  case CK_Novalake:
   case CK_Sierraforest:
   case CK_Grandridge:
   case CK_Graniterapids:
@@ -1615,6 +1616,7 @@ std::optional<unsigned> X86TargetInfo::getCPUCacheLineSize() const {
     case CK_Lunarlake:
     case CK_Pantherlake:
     case CK_Wildcatlake:
+    case CK_Novalake:
     case CK_Sierraforest:
     case CK_Grandridge:
     case CK_Graniterapids:
diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
index 7af3dc1..0712de2 100644
--- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
+++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp
@@ -2915,6 +2915,130 @@ LogicalResult cir::TypeInfoAttr::verify(
 }
 
 //===----------------------------------------------------------------------===//
+// TryOp
+//===----------------------------------------------------------------------===//
+
+void cir::TryOp::getSuccessorRegions(
+    mlir::RegionBranchPoint point,
+    llvm::SmallVectorImpl<mlir::RegionSuccessor> &regions) {
+  // The `try` and the `catchers` region branch back to the parent operation.
+  if (!point.isParent()) {
+    regions.push_back(mlir::RegionSuccessor());
+    return;
+  }
+
+  regions.push_back(mlir::RegionSuccessor(&getTryRegion()));
+
+  // TODO(CIR): If we know a target function never throws a specific type, we
+  // can remove the catch handler.
+  for (mlir::Region &handlerRegion : this->getHandlerRegions())
+    regions.push_back(mlir::RegionSuccessor(&handlerRegion));
+}
+
+static void
+printTryHandlerRegions(mlir::OpAsmPrinter &printer, cir::TryOp op,
+                       mlir::MutableArrayRef<mlir::Region> handlerRegions,
+                       mlir::ArrayAttr handlerTypes) {
+  if (!handlerTypes)
+    return;
+
+  for (const auto [typeIdx, typeAttr] : llvm::enumerate(handlerTypes)) {
+    if (typeIdx)
+      printer << " ";
+
+    if (mlir::isa<cir::CatchAllAttr>(typeAttr)) {
+      printer << "catch all ";
+    } else if (mlir::isa<cir::UnwindAttr>(typeAttr)) {
+      printer << "unwind ";
+    } else {
+      printer << "catch [type ";
+      printer.printAttribute(typeAttr);
+      printer << "] ";
+    }
+
+    printer.printRegion(handlerRegions[typeIdx],
+                        /*printEntryBLockArgs=*/false,
+                        /*printBlockTerminators=*/true);
+  }
+}
+
+static mlir::ParseResult parseTryHandlerRegions(
+    mlir::OpAsmParser &parser,
+    llvm::SmallVectorImpl<std::unique_ptr<mlir::Region>> &handlerRegions,
+    mlir::ArrayAttr &handlerTypes) {
+
+  auto parseCheckedCatcherRegion = [&]() -> mlir::ParseResult {
+    handlerRegions.emplace_back(new mlir::Region);
+
+    mlir::Region &currRegion = *handlerRegions.back();
+    mlir::SMLoc regionLoc = parser.getCurrentLocation();
+    if (parser.parseRegion(currRegion)) {
+      handlerRegions.clear();
+      return failure();
+    }
+
+    if (!currRegion.empty() && !(currRegion.back().mightHaveTerminator() &&
+                                 currRegion.back().getTerminator()))
+      return parser.emitError(
+          regionLoc, "blocks are expected to be explicitly terminated");
+
+    return success();
+  };
+
+  bool hasCatchAll = false;
+  llvm::SmallVector<mlir::Attribute, 4> catcherAttrs;
+  while (parser.parseOptionalKeyword("catch").succeeded()) {
+    bool hasLSquare = parser.parseOptionalLSquare().succeeded();
+
+    llvm::StringRef attrStr;
+    if (parser.parseOptionalKeyword(&attrStr, {"all", "type"}).failed())
+      return parser.emitError(parser.getCurrentLocation(),
+                              "expected 'all' or 'type' keyword");
+
+    bool isCatchAll = attrStr == "all";
+    if (isCatchAll) {
+      if (hasCatchAll)
+        return parser.emitError(parser.getCurrentLocation(),
+                                "can't have more than one catch all");
+      hasCatchAll = true;
+    }
+
+    mlir::Attribute exceptionRTTIAttr;
+    if (!isCatchAll && parser.parseAttribute(exceptionRTTIAttr).failed())
+      return parser.emitError(parser.getCurrentLocation(),
+                              "expected valid RTTI info attribute");
+
+    catcherAttrs.push_back(isCatchAll
+                               ? cir::CatchAllAttr::get(parser.getContext())
+                               : exceptionRTTIAttr);
+
+    if (hasLSquare && isCatchAll)
+      return parser.emitError(parser.getCurrentLocation(),
+                              "catch all dosen't need RTTI info attribute");
+
+    if (hasLSquare && parser.parseRSquare().failed())
+      return parser.emitError(parser.getCurrentLocation(),
+                              "expected `]` after RTTI info attribute");
+
+    if (parseCheckedCatcherRegion().failed())
+      return mlir::failure();
+  }
+
+  if (parser.parseOptionalKeyword("unwind").succeeded()) {
+    if (hasCatchAll)
+      return parser.emitError(parser.getCurrentLocation(),
+                              "unwind can't be used with catch all");
+
+    catcherAttrs.push_back(cir::UnwindAttr::get(parser.getContext()));
+    if (parseCheckedCatcherRegion().failed())
+      return mlir::failure();
+  }
+
+  handlerTypes = parser.getBuilder().getArrayAttr(catcherAttrs);
+  return mlir::success();
+}
+
+//===----------------------------------------------------------------------===//
 // TableGen'd op method definitions
 //===----------------------------------------------------------------------===//
 
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 9ee810c..92dba32 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -4283,15 +4283,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
 
     CharUnits Align = CGM.getNaturalTypeAlignment(
         E->getType()->getAs<VectorType>()->getElementType(), nullptr);
-    llvm::Value *AlignVal =
-        llvm::ConstantInt::get(Int32Ty, Align.getQuantity());
 
     llvm::Value *Result;
     if (BuiltinID == Builtin::BI__builtin_masked_load) {
-      Function *F =
-          CGM.getIntrinsic(Intrinsic::masked_load, {RetTy, Ptr->getType()});
-      Result =
-          Builder.CreateCall(F, {Ptr, AlignVal, Mask, PassThru}, "masked_load");
+      Result = Builder.CreateMaskedLoad(RetTy, Ptr, Align.getAsAlign(), Mask,
+                                        PassThru, "masked_load");
     } else {
       Function *F = CGM.getIntrinsic(Intrinsic::masked_expandload, {RetTy});
       Result =
@@ -4307,8 +4303,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     llvm::Type *RetTy = CGM.getTypes().ConvertType(E->getType());
     CharUnits Align = CGM.getNaturalTypeAlignment(
         E->getType()->getAs<VectorType>()->getElementType(), nullptr);
-    llvm::Value *AlignVal =
-        llvm::ConstantInt::get(Int32Ty, Align.getQuantity());
 
     llvm::Value *PassThru = llvm::PoisonValue::get(RetTy);
     if (E->getNumArgs() > 3)
@@ -4318,12 +4312,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
         E->getType()->getAs<VectorType>()->getElementType());
     llvm::Value *PtrVec = Builder.CreateGEP(ElemTy, Ptr, Idx);
 
-    llvm::Value *Result;
-    Function *F =
-        CGM.getIntrinsic(Intrinsic::masked_gather, {RetTy, PtrVec->getType()});
-
-    Result = Builder.CreateCall(F, {PtrVec, AlignVal, Mask, PassThru},
-                                "masked_gather");
+    llvm::Value *Result = Builder.CreateMaskedGather(
+        RetTy, PtrVec, Align.getAsAlign(), Mask, PassThru, "masked_gather");
     return RValue::get(Result);
   }
   case Builtin::BI__builtin_masked_store:
@@ -4338,13 +4328,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     CharUnits Align = CGM.getNaturalTypeAlignment(
         E->getArg(1)->getType()->getAs<VectorType>()->getElementType(),
         nullptr);
-    llvm::Value *AlignVal =
-        llvm::ConstantInt::get(Int32Ty, Align.getQuantity());
 
     if (BuiltinID == Builtin::BI__builtin_masked_store) {
-      llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::masked_store,
-                                           {ValLLTy, Ptr->getType()});
-      Builder.CreateCall(F, {Val, Ptr, AlignVal, Mask});
+      Builder.CreateMaskedStore(Val, Ptr, Align.getAsAlign(), Mask);
     } else {
       llvm::Function *F =
           CGM.getIntrinsic(llvm::Intrinsic::masked_compressstore, {ValLLTy});
@@ -4361,17 +4347,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     CharUnits Align = CGM.getNaturalTypeAlignment(
         E->getArg(2)->getType()->getAs<VectorType>()->getElementType(),
         nullptr);
-    llvm::Value *AlignVal =
-        llvm::ConstantInt::get(Int32Ty, Align.getQuantity());
 
     llvm::Type *ElemTy = CGM.getTypes().ConvertType(
         E->getArg(1)->getType()->getAs<VectorType>()->getElementType());
     llvm::Value *PtrVec = Builder.CreateGEP(ElemTy, Ptr, Idx);
 
-    Function *F = CGM.getIntrinsic(Intrinsic::masked_scatter,
-                                   {Val->getType(), PtrVec->getType()});
-
-    Builder.CreateCall(F, {Val, PtrVec, AlignVal, Mask});
+    Builder.CreateMaskedScatter(Val, PtrVec, Align.getAsAlign(), Mask);
     return RValue();
   }
   case Builtin::BI__builtin_isinf_sign: {
diff --git a/clang/lib/Headers/avx512cdintrin.h b/clang/lib/Headers/avx512cdintrin.h
index b161440..fb6dcb6 100644
--- a/clang/lib/Headers/avx512cdintrin.h
+++ b/clang/lib/Headers/avx512cdintrin.h
@@ -15,111 +15,98 @@
 #define __AVX512CDINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS                                                     \
+  constexpr __attribute__((__always_inline__, __nodebug__,                     \
+                           __target__("avx512cd"), __min_vector_width__(512)))
+#else
 #define __DEFAULT_FN_ATTRS                                                     \
   __attribute__((__always_inline__, __nodebug__, __target__("avx512cd"),       \
                  __min_vector_width__(512)))
-
-#if defined(__cplusplus) && (__cplusplus >= 201103L)
-#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
-#else
-#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
 #endif
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_conflict_epi64 (__m512i __A)
-{
-  return (__m512i) __builtin_ia32_vpconflictdi_512 ((__v8di) __A);
+_mm512_conflict_epi64(__m512i __A) {
+  return (__m512i)__builtin_ia32_vpconflictdi_512((__v8di)__A);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_conflict_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
-{
-  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
-                                             (__v8di)_mm512_conflict_epi64(__A),
-                                             (__v8di)__W);
+_mm512_mask_conflict_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
+  return (__m512i)__builtin_ia32_selectq_512(
+      (__mmask8)__U, (__v8di)_mm512_conflict_epi64(__A), (__v8di)__W);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_conflict_epi64 (__mmask8 __U, __m512i __A)
-{
+_mm512_maskz_conflict_epi64(__mmask8 __U, __m512i __A) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                              (__v8di)_mm512_conflict_epi64(__A),
-                                             (__v8di)_mm512_setzero_si512 ());
+                                             (__v8di)_mm512_setzero_si512());
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_conflict_epi32 (__m512i __A)
-{
-  return (__m512i) __builtin_ia32_vpconflictsi_512 ((__v16si) __A);
+_mm512_conflict_epi32(__m512i __A) {
+  return (__m512i)__builtin_ia32_vpconflictsi_512((__v16si)__A);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_conflict_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
-{
-  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
-                                            (__v16si)_mm512_conflict_epi32(__A),
-                                            (__v16si)__W);
+_mm512_mask_conflict_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
+  return (__m512i)__builtin_ia32_selectd_512(
+      (__mmask16)__U, (__v16si)_mm512_conflict_epi32(__A), (__v16si)__W);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_conflict_epi32 (__mmask16 __U, __m512i __A)
-{
-  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
-                                            (__v16si)_mm512_conflict_epi32(__A),
-                                            (__v16si)_mm512_setzero_si512());
+_mm512_maskz_conflict_epi32(__mmask16 __U, __m512i __A) {
+  return (__m512i)__builtin_ia32_selectd_512(
+      (__mmask16)__U, (__v16si)_mm512_conflict_epi32(__A),
+      (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm512_lzcnt_epi32(__m512i __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_lzcnt_epi32(__m512i __A) {
   return (__m512i)__builtin_elementwise_clzg((__v16si)__A,
                                              (__v16si)_mm512_set1_epi32(32));
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_lzcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
-  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
-                                             (__v16si)_mm512_lzcnt_epi32(__A),
-                                             (__v16si)__W);
+  return (__m512i)__builtin_ia32_selectd_512(
+      (__mmask16)__U, (__v16si)_mm512_lzcnt_epi32(__A), (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_lzcnt_epi32(__mmask16 __U, __m512i __A) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                              (__v16si)_mm512_lzcnt_epi32(__A),
                                              (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm512_lzcnt_epi64(__m512i __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_lzcnt_epi64(__m512i __A) {
   return (__m512i)__builtin_elementwise_clzg(
       (__v8di)__A, (__v8di)_mm512_set1_epi64((long long)64));
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mask_lzcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
-  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
-                                             (__v8di)_mm512_lzcnt_epi64(__A),
-                                             (__v8di)__W);
+  return (__m512i)__builtin_ia32_selectq_512(
+      (__mmask8)__U, (__v8di)_mm512_lzcnt_epi64(__A), (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_lzcnt_epi64(__mmask8 __U, __m512i __A) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                              (__v8di)_mm512_lzcnt_epi64(__A),
                                              (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_broadcastmb_epi64(__mmask8 __A) {
-  return (__m512i) _mm512_set1_epi64((long long) __A);
+  return (__m512i)_mm512_set1_epi64((long long)__A);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_broadcastmw_epi32(__mmask16 __A) {
   return (__m512i)_mm512_set1_epi32((int)__A);
 }
 
 #undef __DEFAULT_FN_ATTRS
-#undef __DEFAULT_FN_ATTRS_CONSTEXPR
 
 #endif
diff --git a/clang/lib/Headers/avx512dqintrin.h b/clang/lib/Headers/avx512dqintrin.h
index fb65bf9..3681cca 100644
--- a/clang/lib/Headers/avx512dqintrin.h
+++ b/clang/lib/Headers/avx512dqintrin.h
@@ -1083,17 +1083,15 @@ _mm512_broadcast_f32x2(__m128 __A) {
                                          0, 1, 0, 1, 0, 1, 0, 1);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_broadcast_f32x2(__m512 __O, __mmask16 __M, __m128 __A) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
                                              (__v16sf)_mm512_broadcast_f32x2(__A),
                                              (__v16sf)__O);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcast_f32x2(__mmask16 __M, __m128 __A) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
                                              (__v16sf)_mm512_broadcast_f32x2(__A),
                                              (__v16sf)_mm512_setzero_ps());
@@ -1106,17 +1104,15 @@ _mm512_broadcast_f32x8(__m256 __A) {
                                          0, 1, 2, 3, 4, 5, 6, 7);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
                                            (__v16sf)_mm512_broadcast_f32x8(__A),
                                            (__v16sf)__O);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
                                            (__v16sf)_mm512_broadcast_f32x8(__A),
                                            (__v16sf)_mm512_setzero_ps());
@@ -1128,17 +1124,15 @@ _mm512_broadcast_f64x2(__m128d __A) {
                                           0, 1, 0, 1, 0, 1, 0, 1);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
                                             (__v8df)_mm512_broadcast_f64x2(__A),
                                             (__v8df)__O);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
                                             (__v8df)_mm512_broadcast_f64x2(__A),
                                             (__v8df)_mm512_setzero_pd());
@@ -1151,17 +1145,15 @@ _mm512_broadcast_i32x2(__m128i __A) {
                                           0, 1, 0, 1, 0, 1, 0, 1);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_broadcast_i32x2(__m512i __O, __mmask16 __M, __m128i __A) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
                                              (__v16si)_mm512_broadcast_i32x2(__A),
                                              (__v16si)__O);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcast_i32x2(__mmask16 __M, __m128i __A) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
                                              (__v16si)_mm512_broadcast_i32x2(__A),
                                              (__v16si)_mm512_setzero_si512());
@@ -1174,17 +1166,15 @@ _mm512_broadcast_i32x8(__m256i __A) {
                                           0, 1, 2, 3, 4, 5, 6, 7);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
                                            (__v16si)_mm512_broadcast_i32x8(__A),
                                            (__v16si)__O);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
                                            (__v16si)_mm512_broadcast_i32x8(__A),
                                            (__v16si)_mm512_setzero_si512());
@@ -1196,17 +1186,15 @@ _mm512_broadcast_i64x2(__m128i __A) {
                                           0, 1, 0, 1, 0, 1, 0, 1);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
                                             (__v8di)_mm512_broadcast_i64x2(__A),
                                             (__v8di)__O);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
                                             (__v8di)_mm512_broadcast_i64x2(__A),
                                             (__v8di)_mm512_setzero_si512());
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 80e5842..07de036 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -225,17 +225,15 @@ _mm512_broadcastd_epi32(__m128i __A) {
                                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A) {
   return (__m512i)__builtin_ia32_selectd_512(__M,
                                              (__v16si) _mm512_broadcastd_epi32(__A),
                                              (__v16si) __O);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcastd_epi32(__mmask16 __M, __m128i __A) {
   return (__m512i)__builtin_ia32_selectd_512(__M,
                                              (__v16si) _mm512_broadcastd_epi32(__A),
                                              (__v16si) _mm512_setzero_si512());
@@ -247,18 +245,14 @@ _mm512_broadcastq_epi64(__m128i __A) {
                                           0, 0, 0, 0, 0, 0, 0, 0);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
-{
-  return (__m512i)__builtin_ia32_selectq_512(__M,
-                                             (__v8di) _mm512_broadcastq_epi64(__A),
-                                             (__v8di) __O);
-
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A) {
+  return (__m512i)__builtin_ia32_selectq_512(
+      __M, (__v8di)_mm512_broadcastq_epi64(__A), (__v8di)__O);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A) {
   return (__m512i)__builtin_ia32_selectq_512(__M,
                                              (__v8di) _mm512_broadcastq_epi64(__A),
                                              (__v8di) _mm512_setzero_si512());
@@ -321,9 +315,8 @@ _mm512_set1_epi32(int __s)
     __s, __s, __s, __s, __s, __s, __s, __s };
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_set1_epi32(__mmask16 __M, int __A) {
   return (__m512i)__builtin_ia32_selectd_512(__M,
                                              (__v16si)_mm512_set1_epi32(__A),
                                              (__v16si)_mm512_setzero_si512());
@@ -335,9 +328,8 @@ _mm512_set1_epi64(long long __d)
   return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
-{
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_set1_epi64(__mmask8 __M, long long __A) {
   return (__m512i)__builtin_ia32_selectq_512(__M,
                                              (__v8di)_mm512_set1_epi64(__A),
                                              (__v8di)_mm512_setzero_si512());
@@ -6552,17 +6544,15 @@ _mm512_broadcast_f32x4(__m128 __A) {
                                          0, 1, 2, 3, 0, 1, 2, 3);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
                                            (__v16sf)_mm512_broadcast_f32x4(__A),
                                            (__v16sf)__O);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
                                            (__v16sf)_mm512_broadcast_f32x4(__A),
                                            (__v16sf)_mm512_setzero_ps());
@@ -6597,17 +6587,15 @@ _mm512_broadcast_i32x4(__m128i __A) {
                                           0, 1, 2, 3, 0, 1, 2, 3);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
                                            (__v16si)_mm512_broadcast_i32x4(__A),
                                            (__v16si)__O);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
                                            (__v16si)_mm512_broadcast_i32x4(__A),
                                            (__v16si)_mm512_setzero_si512());
@@ -6635,33 +6623,29 @@ _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
                                             (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A) {
   return (__m512d)__builtin_ia32_selectpd_512(__M,
                                               (__v8df) _mm512_broadcastsd_pd(__A),
                                               (__v8df) __O);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
-{
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A) {
   return (__m512d)__builtin_ia32_selectpd_512(__M,
                                               (__v8df) _mm512_broadcastsd_pd(__A),
                                               (__v8df) _mm512_setzero_pd());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A) {
   return (__m512)__builtin_ia32_selectps_512(__M,
                                              (__v16sf) _mm512_broadcastss_ps(__A),
                                              (__v16sf) __O);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcastss_ps(__mmask16 __M, __m128 __A) {
   return (__m512)__builtin_ia32_selectps_512(__M,
                                              (__v16sf) _mm512_broadcastss_ps(__A),
                                              (__v16sf) _mm512_setzero_ps());
@@ -8381,17 +8365,15 @@ _mm512_movehdup_ps (__m512 __A)
                          1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                              (__v16sf)_mm512_movehdup_ps(__A),
                                              (__v16sf)__W);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_movehdup_ps(__mmask16 __U, __m512 __A) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                              (__v16sf)_mm512_movehdup_ps(__A),
                                              (__v16sf)_mm512_setzero_ps());
@@ -8404,44 +8386,38 @@ _mm512_moveldup_ps (__m512 __A)
                          0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                              (__v16sf)_mm512_moveldup_ps(__A),
                                              (__v16sf)__W);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
-_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
-{
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                              (__v16sf)_mm512_moveldup_ps(__A),
                                              (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B) {
   return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B),
                                      _mm_setzero_ps());
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
-{
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
-{
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B) {
   return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B),
                                      _mm_setzero_pd());
 }
@@ -8884,17 +8860,15 @@ _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
 }
 #endif
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_set1_epi32(__m512i __O, __mmask16 __M, int __A) {
   return (__m512i) __builtin_ia32_selectd_512(__M,
                                               (__v16si) _mm512_set1_epi32(__A),
                                               (__v16si) __O);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_set1_epi64(__m512i __O, __mmask8 __M, long long __A) {
   return (__m512i) __builtin_ia32_selectq_512(__M,
                                               (__v8di) _mm512_set1_epi64(__A),
                                               (__v8di) __O);
diff --git a/clang/lib/Headers/avx512vlcdintrin.h b/clang/lib/Headers/avx512vlcdintrin.h
index cb98e7c..7719680f 100644
--- a/clang/lib/Headers/avx512vlcdintrin.h
+++ b/clang/lib/Headers/avx512vlcdintrin.h
@@ -14,203 +14,182 @@
 #define __AVX512VLCDINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS128                                                  \
+  constexpr __attribute__((__always_inline__, __nodebug__,                     \
+                           __target__("avx512vl,avx512cd"),                    \
+                           __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256                                                  \
+  constexpr __attribute__((__always_inline__, __nodebug__,                     \
+                           __target__("avx512vl,avx512cd"),                    \
+                           __min_vector_width__(256)))
+#else
 #define __DEFAULT_FN_ATTRS128                                                  \
   __attribute__((__always_inline__, __nodebug__,                               \
                  __target__("avx512vl,avx512cd"), __min_vector_width__(128)))
 #define __DEFAULT_FN_ATTRS256                                                  \
   __attribute__((__always_inline__, __nodebug__,                               \
                  __target__("avx512vl,avx512cd"), __min_vector_width__(256)))
-
-#if defined(__cplusplus) && (__cplusplus >= 201103L)
-#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
-#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
-#else
-#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
-#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
 #endif
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_broadcastmb_epi64(__mmask8 __A) {
   return (__m128i) _mm_set1_epi64x((long long) __A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_broadcastmb_epi64(__mmask8 __A) {
-  return (__m256i) _mm256_set1_epi64x((long long)__A);
+  return (__m256i)_mm256_set1_epi64x((long long)__A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_broadcastmw_epi32(__mmask16 __A) {
   return (__m128i) _mm_set1_epi32((int)__A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_broadcastmw_epi32(__mmask16 __A) {
   return (__m256i) _mm256_set1_epi32((int)__A);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_conflict_epi64 (__m128i __A)
-{
-  return (__m128i) __builtin_ia32_vpconflictdi_128 ((__v2di) __A);
+_mm_conflict_epi64(__m128i __A) {
+  return (__m128i)__builtin_ia32_vpconflictdi_128((__v2di)__A);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
-                                             (__v2di)_mm_conflict_epi64(__A),
-                                             (__v2di)__W);
+_mm_mask_conflict_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
+  return (__m128i)__builtin_ia32_selectq_128(
+      (__mmask8)__U, (__v2di)_mm_conflict_epi64(__A), (__v2di)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A)
-{
+_mm_maskz_conflict_epi64(__mmask8 __U, __m128i __A) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
                                              (__v2di)_mm_conflict_epi64(__A),
                                              (__v2di)_mm_setzero_si128());
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_conflict_epi64 (__m256i __A)
-{
-  return (__m256i) __builtin_ia32_vpconflictdi_256 ((__v4di) __A);
+_mm256_conflict_epi64(__m256i __A) {
+  return (__m256i)__builtin_ia32_vpconflictdi_256((__v4di)__A);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
-{
-  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
-                                             (__v4di)_mm256_conflict_epi64(__A),
-                                             (__v4di)__W);
+_mm256_mask_conflict_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
+  return (__m256i)__builtin_ia32_selectq_256(
+      (__mmask8)__U, (__v4di)_mm256_conflict_epi64(__A), (__v4di)__W);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
-{
+_mm256_maskz_conflict_epi64(__mmask8 __U, __m256i __A) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                              (__v4di)_mm256_conflict_epi64(__A),
                                              (__v4di)_mm256_setzero_si256());
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_conflict_epi32 (__m128i __A)
-{
-  return (__m128i) __builtin_ia32_vpconflictsi_128 ((__v4si) __A);
+_mm_conflict_epi32(__m128i __A) {
+  return (__m128i)__builtin_ia32_vpconflictsi_128((__v4si)__A);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
-{
-  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
-                                             (__v4si)_mm_conflict_epi32(__A),
-                                             (__v4si)__W);
+_mm_mask_conflict_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
+  return (__m128i)__builtin_ia32_selectd_128(
+      (__mmask8)__U, (__v4si)_mm_conflict_epi32(__A), (__v4si)__W);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
-{
+_mm_maskz_conflict_epi32(__mmask8 __U, __m128i __A) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                              (__v4si)_mm_conflict_epi32(__A),
                                              (__v4si)_mm_setzero_si128());
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_conflict_epi32 (__m256i __A)
-{
-  return (__m256i) __builtin_ia32_vpconflictsi_256 ((__v8si) __A);
+_mm256_conflict_epi32(__m256i __A) {
+  return (__m256i)__builtin_ia32_vpconflictsi_256((__v8si)__A);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
-{
-  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
-                                             (__v8si)_mm256_conflict_epi32(__A),
-                                             (__v8si)__W);
+_mm256_mask_conflict_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
+  return (__m256i)__builtin_ia32_selectd_256(
+      (__mmask8)__U, (__v8si)_mm256_conflict_epi32(__A), (__v8si)__W);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
-{
+_mm256_maskz_conflict_epi32(__mmask8 __U, __m256i __A) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                              (__v8si)_mm256_conflict_epi32(__A),
                                              (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_lzcnt_epi32(__m128i __A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_lzcnt_epi32(__m128i __A) {
   return (__m128i)__builtin_elementwise_clzg((__v4si)__A,
                                              (__v4si)_mm_set1_epi32(32));
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_lzcnt_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
-  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
-                                             (__v4si)_mm_lzcnt_epi32(__A),
-                                             (__v4si)__W);
+  return (__m128i)__builtin_ia32_selectd_128(
+      (__mmask8)__U, (__v4si)_mm_lzcnt_epi32(__A), (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_lzcnt_epi32(__mmask8 __U, __m128i __A) {
-  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
-                                             (__v4si)_mm_lzcnt_epi32(__A),
-                                             (__v4si)_mm_setzero_si128());
+  return (__m128i)__builtin_ia32_selectd_128(
+      (__mmask8)__U, (__v4si)_mm_lzcnt_epi32(__A), (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_lzcnt_epi32(__m256i __A) {
   return (__m256i)__builtin_elementwise_clzg((__v8si)__A,
                                              (__v8si)_mm256_set1_epi32(32));
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_lzcnt_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
-  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
-                                             (__v8si)_mm256_lzcnt_epi32(__A),
-                                             (__v8si)__W);
+  return (__m256i)__builtin_ia32_selectd_256(
+      (__mmask8)__U, (__v8si)_mm256_lzcnt_epi32(__A), (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_lzcnt_epi32(__mmask8 __U, __m256i __A) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                              (__v8si)_mm256_lzcnt_epi32(__A),
                                              (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_lzcnt_epi64(__m128i __A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_lzcnt_epi64(__m128i __A) {
   return (__m128i)__builtin_elementwise_clzg(
       (__v2di)__A, (__v2di)_mm_set1_epi64x((long long)64));
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_lzcnt_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
-  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
-                                             (__v2di)_mm_lzcnt_epi64(__A),
-                                             (__v2di)__W);
+  return (__m128i)__builtin_ia32_selectq_128(
+      (__mmask8)__U, (__v2di)_mm_lzcnt_epi64(__A), (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_lzcnt_epi64(__mmask8 __U, __m128i __A) {
-  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
-                                             (__v2di)_mm_lzcnt_epi64(__A),
-                                             (__v2di)_mm_setzero_si128());
+  return (__m128i)__builtin_ia32_selectq_128(
+      (__mmask8)__U, (__v2di)_mm_lzcnt_epi64(__A), (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_lzcnt_epi64(__m256i __A) {
   return (__m256i)__builtin_elementwise_clzg(
       (__v4di)__A, (__v4di)_mm256_set1_epi64x((long long)64));
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_lzcnt_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
-  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
-                                             (__v4di)_mm256_lzcnt_epi64(__A),
-                                             (__v4di)__W);
+  return (__m256i)__builtin_ia32_selectq_256(
+      (__mmask8)__U, (__v4di)_mm256_lzcnt_epi64(__A), (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_lzcnt_epi64(__mmask8 __U, __m256i __A) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                              (__v4di)_mm256_lzcnt_epi64(__A),
@@ -219,7 +198,5 @@ _mm256_maskz_lzcnt_epi64(__mmask8 __U, __m256i __A) {
 
 #undef __DEFAULT_FN_ATTRS128
 #undef __DEFAULT_FN_ATTRS256
-#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
-#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
 
 #endif /* __AVX512VLCDINTRIN_H */
diff --git a/clang/lib/Headers/avx512vldqintrin.h b/clang/lib/Headers/avx512vldqintrin.h
index 68bd52e..ee7974e 100644
--- a/clang/lib/Headers/avx512vldqintrin.h
+++ b/clang/lib/Headers/avx512vldqintrin.h
@@ -968,17 +968,15 @@ _mm256_broadcast_f32x2(__m128 __A) {
                                          0, 1, 0, 1, 0, 1, 0, 1);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_broadcast_f32x2(__m256 __O, __mmask8 __M, __m128 __A) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
                                              (__v8sf)_mm256_broadcast_f32x2(__A),
                                              (__v8sf)__O);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_broadcast_f32x2(__mmask8 __M, __m128 __A) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
                                              (__v8sf)_mm256_broadcast_f32x2(__A),
                                              (__v8sf)_mm256_setzero_ps());
@@ -990,17 +988,15 @@ _mm256_broadcast_f64x2(__m128d __A) {
                                           0, 1, 0, 1);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A)
-{
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M,
                                             (__v4df)_mm256_broadcast_f64x2(__A),
                                             (__v4df)__O);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
-{
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M,
                                             (__v4df)_mm256_broadcast_f64x2(__A),
                                             (__v4df)_mm256_setzero_pd());
@@ -1012,17 +1008,15 @@ _mm_broadcast_i32x2(__m128i __A) {
                                           0, 1, 0, 1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_broadcast_i32x2(__m128i __O, __mmask8 __M, __m128i __A) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
                                              (__v4si)_mm_broadcast_i32x2(__A),
                                              (__v4si)__O);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
                                              (__v4si)_mm_broadcast_i32x2(__A),
                                              (__v4si)_mm_setzero_si128());
@@ -1034,17 +1028,15 @@ _mm256_broadcast_i32x2(__m128i __A) {
                                           0, 1, 0, 1, 0, 1, 0, 1);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_broadcast_i32x2(__m256i __O, __mmask8 __M, __m128i __A) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
                                              (__v8si)_mm256_broadcast_i32x2(__A),
                                              (__v8si)__O);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
                                              (__v8si)_mm256_broadcast_i32x2(__A),
                                              (__v8si)_mm256_setzero_si256());
@@ -1056,17 +1048,15 @@ _mm256_broadcast_i64x2(__m128i __A) {
                                           0, 1, 0, 1);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
                                             (__v4di)_mm256_broadcast_i64x2(__A),
                                             (__v4di)__O);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
                                             (__v4di)_mm256_broadcast_i64x2(__A),
                                             (__v4di)_mm256_setzero_si256());
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index 965741f..676b5a0 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -5101,69 +5101,55 @@ _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
                                               (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A)
-{
-   return (__m128i)__builtin_ia32_selectd_128(__M,
-                                              (__v4si) _mm_set1_epi32(__A),
-                                              (__v4si)__O);
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A) {
+  return (__m128i)__builtin_ia32_selectd_128(__M, (__v4si)_mm_set1_epi32(__A),
+                                             (__v4si)__O);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_set1_epi32( __mmask8 __M, int __A)
-{
-   return (__m128i)__builtin_ia32_selectd_128(__M,
-                                              (__v4si) _mm_set1_epi32(__A),
-                                              (__v4si)_mm_setzero_si128());
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_set1_epi32(__mmask8 __M, int __A) {
+  return (__m128i)__builtin_ia32_selectd_128(__M, (__v4si)_mm_set1_epi32(__A),
+                                             (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A)
-{
-   return (__m256i)__builtin_ia32_selectd_256(__M,
-                                              (__v8si) _mm256_set1_epi32(__A),
-                                              (__v8si)__O);
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A) {
+  return (__m256i)__builtin_ia32_selectd_256(
+      __M, (__v8si)_mm256_set1_epi32(__A), (__v8si)__O);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_set1_epi32( __mmask8 __M, int __A)
-{
-   return (__m256i)__builtin_ia32_selectd_256(__M,
-                                              (__v8si) _mm256_set1_epi32(__A),
-                                              (__v8si)_mm256_setzero_si256());
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_set1_epi32(__mmask8 __M, int __A) {
+  return (__m256i)__builtin_ia32_selectd_256(
+      __M, (__v8si)_mm256_set1_epi32(__A), (__v8si)_mm256_setzero_si256());
 }
 
-
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_set1_epi64(__m128i __O, __mmask8 __M, long long __A) {
   return (__m128i) __builtin_ia32_selectq_128(__M,
                                               (__v2di) _mm_set1_epi64x(__A),
                                               (__v2di) __O);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_set1_epi64(__mmask8 __M, long long __A) {
   return (__m128i) __builtin_ia32_selectq_128(__M,
                                               (__v2di) _mm_set1_epi64x(__A),
                                               (__v2di) _mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_set1_epi64(__m256i __O, __mmask8 __M, long long __A) {
   return (__m256i) __builtin_ia32_selectq_256(__M,
                                               (__v4di) _mm256_set1_epi64x(__A),
                                               (__v4di) __O) ;
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
-{
-   return (__m256i) __builtin_ia32_selectq_256(__M,
-                                               (__v4di) _mm256_set1_epi64x(__A),
-                                               (__v4di) _mm256_setzero_si256());
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_set1_epi64(__mmask8 __M, long long __A) {
+  return (__m256i)__builtin_ia32_selectq_256(
+      __M, (__v4di)_mm256_set1_epi64x(__A), (__v4di)_mm256_setzero_si256());
 }
 
 #define _mm_fixupimm_pd(A, B, C, imm) \
@@ -5610,130 +5596,113 @@ _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
            (__mmask8) __U);
 }
 
-
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
-{
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_unpackhi_pd(__A, __B),
                                               (__v2df)__W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B)
-{
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_unpackhi_pd(__A, __B),
                                               (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
-{
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                            (__v4df)_mm256_unpackhi_pd(__A, __B),
                                            (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B)
-{
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                            (__v4df)_mm256_unpackhi_pd(__A, __B),
                                            (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_unpackhi_ps(__A, __B),
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_unpackhi_ps(__A, __B),
                                              (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                            (__v8sf)_mm256_unpackhi_ps(__A, __B),
                                            (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                            (__v8sf)_mm256_unpackhi_ps(__A, __B),
                                            (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
-{
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_unpacklo_pd(__A, __B),
                                               (__v2df)__W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
-_mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B)
-{
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_unpacklo_pd(__A, __B),
                                               (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
-{
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                            (__v4df)_mm256_unpacklo_pd(__A, __B),
                                            (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B)
-{
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                            (__v4df)_mm256_unpacklo_pd(__A, __B),
                                            (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_unpacklo_ps(__A, __B),
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_unpacklo_ps(__A, __B),
                                              (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                            (__v8sf)_mm256_unpacklo_ps(__A, __B),
                                            (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                            (__v8sf)_mm256_unpacklo_ps(__A, __B),
                                            (__v8sf)_mm256_setzero_ps());
@@ -6055,129 +6024,117 @@ _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
                                        _mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                            (__v4si)_mm_unpackhi_epi32(__A, __B),
                                            (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                            (__v4si)_mm_unpackhi_epi32(__A, __B),
                                            (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A,
+                           __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                         (__v8si)_mm256_unpackhi_epi32(__A, __B),
                                         (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                         (__v8si)_mm256_unpackhi_epi32(__A, __B),
                                         (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
                                            (__v2di)_mm_unpackhi_epi64(__A, __B),
                                            (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
                                            (__v2di)_mm_unpackhi_epi64(__A, __B),
                                            (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A,
+                           __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                         (__v4di)_mm256_unpackhi_epi64(__A, __B),
                                         (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                         (__v4di)_mm256_unpackhi_epi64(__A, __B),
                                         (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                            (__v4si)_mm_unpacklo_epi32(__A, __B),
                                            (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                            (__v4si)_mm_unpacklo_epi32(__A, __B),
                                            (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A,
+                           __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                         (__v8si)_mm256_unpacklo_epi32(__A, __B),
                                         (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                         (__v8si)_mm256_unpacklo_epi32(__A, __B),
                                         (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
                                            (__v2di)_mm_unpacklo_epi64(__A, __B),
                                            (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
                                            (__v2di)_mm_unpacklo_epi64(__A, __B),
                                            (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A,
+                           __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                         (__v4di)_mm256_unpacklo_epi64(__A, __B),
                                         (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                         (__v4di)_mm256_unpacklo_epi64(__A, __B),
                                         (__v4di)_mm256_setzero_si256());
@@ -6594,17 +6551,15 @@ _mm256_broadcast_f32x4(__m128 __A) {
                                          0, 1, 2, 3, 0, 1, 2, 3);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
                                             (__v8sf)_mm256_broadcast_f32x4(__A),
                                             (__v8sf)__O);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_broadcast_f32x4(__mmask8 __M, __m128 __A) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
                                             (__v8sf)_mm256_broadcast_f32x4(__A),
                                             (__v8sf)_mm256_setzero_ps());
@@ -6616,129 +6571,113 @@ _mm256_broadcast_i32x4(__m128i __A) {
                                           0, 1, 2, 3, 0, 1, 2, 3);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
                                             (__v8si)_mm256_broadcast_i32x4(__A),
                                             (__v8si)__O);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
                                             (__v8si)_mm256_broadcast_i32x4(__A),
                                             (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
-{
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_broadcastsd_pd(__m256d __O, __mmask8 __M, __m128d __A) {
   return (__m256d)__builtin_ia32_selectpd_256(__M,
                                               (__v4df) _mm256_broadcastsd_pd(__A),
                                               (__v4df) __O);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
-_mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
-{
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A) {
   return (__m256d)__builtin_ia32_selectpd_256(__M,
                                               (__v4df) _mm256_broadcastsd_pd(__A),
                                               (__v4df) _mm256_setzero_pd());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_broadcastss_ps(__m128 __O, __mmask8 __M, __m128 __A) {
   return (__m128)__builtin_ia32_selectps_128(__M,
                                              (__v4sf) _mm_broadcastss_ps(__A),
                                              (__v4sf) __O);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_broadcastss_ps(__mmask8 __M, __m128 __A) {
   return (__m128)__builtin_ia32_selectps_128(__M,
                                              (__v4sf) _mm_broadcastss_ps(__A),
                                              (__v4sf) _mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_broadcastss_ps(__m256 __O, __mmask8 __M, __m128 __A) {
   return (__m256)__builtin_ia32_selectps_256(__M,
                                              (__v8sf) _mm256_broadcastss_ps(__A),
                                              (__v8sf) __O);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_broadcastss_ps(__mmask8 __M, __m128 __A) {
   return (__m256)__builtin_ia32_selectps_256(__M,
                                              (__v8sf) _mm256_broadcastss_ps(__A),
                                              (__v8sf) _mm256_setzero_ps());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_broadcastd_epi32(__m128i __O, __mmask8 __M, __m128i __A) {
   return (__m128i)__builtin_ia32_selectd_128(__M,
                                              (__v4si) _mm_broadcastd_epi32(__A),
                                              (__v4si) __O);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A) {
   return (__m128i)__builtin_ia32_selectd_128(__M,
                                              (__v4si) _mm_broadcastd_epi32(__A),
                                              (__v4si) _mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_broadcastd_epi32(__m256i __O, __mmask8 __M, __m128i __A) {
   return (__m256i)__builtin_ia32_selectd_256(__M,
                                              (__v8si) _mm256_broadcastd_epi32(__A),
                                              (__v8si) __O);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A) {
   return (__m256i)__builtin_ia32_selectd_256(__M,
                                              (__v8si) _mm256_broadcastd_epi32(__A),
                                              (__v8si) _mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_broadcastq_epi64(__m128i __O, __mmask8 __M, __m128i __A) {
   return (__m128i)__builtin_ia32_selectq_128(__M,
                                              (__v2di) _mm_broadcastq_epi64(__A),
                                              (__v2di) __O);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
-{
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A) {
   return (__m128i)__builtin_ia32_selectq_128(__M,
                                              (__v2di) _mm_broadcastq_epi64(__A),
                                              (__v2di) _mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_broadcastq_epi64(__m256i __O, __mmask8 __M, __m128i __A) {
   return (__m256i)__builtin_ia32_selectq_256(__M,
                                              (__v4di) _mm256_broadcastq_epi64(__A),
                                              (__v4di) __O);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
-{
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A) {
   return (__m256i)__builtin_ia32_selectq_256(__M,
                                              (__v4di) _mm256_broadcastq_epi64(__A),
                                              (__v4di) _mm256_setzero_si256());
@@ -8003,65 +7942,57 @@ _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
                                  (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
                                  (__v4di)_mm256_setzero_si256()))
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_movehdup_ps(__m128 __W, __mmask8 __U, __m128 __A) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_movehdup_ps(__A),
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_movehdup_ps(__mmask8 __U, __m128 __A) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_movehdup_ps(__A),
                                              (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_movehdup_ps(__m256 __W, __mmask8 __U, __m256 __A) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_movehdup_ps(__A),
                                              (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_movehdup_ps(__mmask8 __U, __m256 __A) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_movehdup_ps(__A),
                                              (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_mask_moveldup_ps(__m128 __W, __mmask8 __U, __m128 __A) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_moveldup_ps(__A),
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
-_mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
-{
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_maskz_moveldup_ps(__mmask8 __U, __m128 __A) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_moveldup_ps(__A),
                                              (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_mask_moveldup_ps(__m256 __W, __mmask8 __U, __m256 __A) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_moveldup_ps(__A),
                                              (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
-_mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
-{
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_maskz_moveldup_ps(__mmask8 __U, __m256 __A) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_moveldup_ps(__A),
                                              (__v8sf)_mm256_setzero_ps());
diff --git a/clang/lib/Sema/HLSLExternalSemaSource.cpp b/clang/lib/Sema/HLSLExternalSemaSource.cpp
index e118dda..f28a037 100644
--- a/clang/lib/Sema/HLSLExternalSemaSource.cpp
+++ b/clang/lib/Sema/HLSLExternalSemaSource.cpp
@@ -159,7 +159,8 @@ void HLSLExternalSemaSource::defineHLSLMatrixAlias() {
                                                   SourceLocation(), ColsParam));
   TemplateParams.emplace_back(ColsParam);
 
-  const unsigned MaxMatDim = 4;
+  const unsigned MaxMatDim = SemaPtr->getLangOpts().MaxMatrixDimension;
+
   auto *MaxRow = IntegerLiteral::Create(
       AST, llvm::APInt(AST.getIntWidth(AST.IntTy), MaxMatDim), AST.IntTy,
       SourceLocation());
diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp
index ddf17d8..5360f8a 100644
--- a/clang/lib/Sema/SemaCast.cpp
+++ b/clang/lib/Sema/SemaCast.cpp
@@ -2927,6 +2927,8 @@ bool CastOperation::CheckHLSLCStyleCast(CheckedConversionKind CCK) {
       SrcExpr = Self.ImpCastExprToType(
           SrcExpr.get(), Self.Context.getArrayParameterType(SrcTy),
           CK_HLSLArrayRValue, VK_PRValue, nullptr, CCK);
+    else
+      SrcExpr = Self.DefaultLvalueConversion(SrcExpr.get());
     Kind = CK_HLSLElementwiseCast;
     return true;
   }
@@ -2935,6 +2937,7 @@ bool CastOperation::CheckHLSLCStyleCast(CheckedConversionKind CCK) {
   // If the relative order of this and the HLSLElementWise cast checks
   // are changed, it might change which cast handles what in a few cases
   if (Self.HLSL().CanPerformAggregateSplatCast(SrcExpr.get(), DestType)) {
+    SrcExpr = Self.DefaultLvalueConversion(SrcExpr.get());
     const VectorType *VT = SrcTy->getAs<VectorType>();
     // change splat from vec1 case to splat from scalar
     if (VT && VT->getNumElements() == 1)
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 4f409ca..652527a 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -16239,9 +16239,9 @@ getAndVerifyMatrixDimension(Expr *Expr, StringRef Name, Sema &S) {
     return {};
   }
   uint64_t Dim = Value->getZExtValue();
-  if (!ConstantMatrixType::isDimensionValid(Dim)) {
+  if (Dim == 0 || Dim > S.Context.getLangOpts().MaxMatrixDimension) {
     S.Diag(Expr->getBeginLoc(), diag::err_builtin_matrix_invalid_dimension)
-        << Name << ConstantMatrixType::getMaxElementsPerDimension();
+        << Name << S.Context.getLangOpts().MaxMatrixDimension;
     return {};
   }
   return Dim;
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index 638904d..7c1fb12 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -2517,12 +2517,18 @@ QualType Sema::BuildMatrixType(QualType ElementTy, Expr *NumRows, Expr *NumCols,
     Diag(AttrLoc, diag::err_attribute_zero_size) << "matrix" << ColRange;
     return QualType();
   }
-  if (!ConstantMatrixType::isDimensionValid(MatrixRows)) {
+  if (MatrixRows > Context.getLangOpts().MaxMatrixDimension &&
+      MatrixColumns > Context.getLangOpts().MaxMatrixDimension) {
+    Diag(AttrLoc, diag::err_attribute_size_too_large)
+        << RowRange << ColRange << "matrix row and column";
+    return QualType();
+  }
+  if (MatrixRows > Context.getLangOpts().MaxMatrixDimension) {
     Diag(AttrLoc, diag::err_attribute_size_too_large)
         << RowRange << "matrix row";
     return QualType();
   }
-  if (!ConstantMatrixType::isDimensionValid(MatrixColumns)) {
+  if (MatrixColumns > Context.getLangOpts().MaxMatrixDimension) {
     Diag(AttrLoc, diag::err_attribute_size_too_large)
         << ColRange << "matrix column";
     return QualType();
diff --git a/clang/test/Analysis/zero-size-non-pod-array.cpp b/clang/test/Analysis/zero-size-non-pod-array.cpp
index 628be0d7..8a32a49 100644
--- a/clang/test/Analysis/zero-size-non-pod-array.cpp
+++ b/clang/test/Analysis/zero-size-non-pod-array.cpp
@@ -95,7 +95,7 @@ void zeroSizeArrayBinding() {
     // Note: This is an error in gcc but a warning in clang.
     // In MSVC the declaration of 'S arr[0]' is already an error
     // and it doesn't recognize this syntax as a structured binding.
-    auto [] = arr; //expected-warning{{ISO C++17 does not allow a decomposition group to be empty}}
+    auto [] = arr; //expected-warning{{ISO C++17 does not allow a structured binding group to be empty}}
 
     clang_analyzer_eval(S::CtorInvocationCount == 0); //expected-warning{{TRUE}}
 }
diff --git a/clang/test/CIR/CodeGen/throws.cpp b/clang/test/CIR/CodeGen/throws.cpp
index 4255d43..89cb072 100644
--- a/clang/test/CIR/CodeGen/throws.cpp
+++ b/clang/test/CIR/CodeGen/throws.cpp
@@ -144,3 +144,55 @@ void throw_complex_expr() {
 // OGCG: store float 0x3FF19999A0000000, ptr %[[EXCEPTION_REAL]], align 16
 // OGCG: store float 0x40019999A0000000, ptr %[[EXCEPTION_IMAG]], align 4
 // OGCG: call void @__cxa_throw(ptr %[[EXCEPTION_ADDR]], ptr @_ZTICf, ptr null)
+
+void throw_vector_type() {
+  typedef int vi4 __attribute__((vector_size(16)));
+  vi4 a;
+  throw a;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"]
+// CIR: %[[EXCEPTION_ADDR:.*]] = cir.alloc.exception 16 -> !cir.ptr<!cir.vector<4 x !s32i>>
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
+// CIR: cir.store{{.*}} %[[TMP_A]], %[[EXCEPTION_ADDR]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>
+// CIR: cir.throw %[[EXCEPTION_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, @_ZTIDv4_i
+// CIR: cir.unreachable
+
+// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[EXCEPTION_ADDR:.*]] = call ptr @__cxa_allocate_exception(i64 16)
+// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// LLVM: store <4 x i32> %[[TMP_A]], ptr %[[EXCEPTION_ADDR]], align 16
+// LLVM: call void @__cxa_throw(ptr %[[EXCEPTION_ADDR]], ptr @_ZTIDv4_i, ptr null)
+
+// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[EXCEPTION_ADDR:.*]] = call ptr @__cxa_allocate_exception(i64 16)
+// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// OGCG: store <4 x i32> %[[TMP_A]], ptr %[[EXCEPTION_ADDR]], align 16
+// OGCG: call void @__cxa_throw(ptr %[[EXCEPTION_ADDR]], ptr @_ZTIDv4_i, ptr null)
+// OGCG: unreachable
+
+void throw_ext_vector_type() {
+  typedef int vi4 __attribute__((ext_vector_type(4)));
+  vi4 a;
+  throw a;
+}
+
+// CIR: %[[A_ADDR:.*]] = cir.alloca !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>, ["a"]
+// CIR: %[[EXCEPTION_ADDR:.*]] = cir.alloc.exception 16 -> !cir.ptr<!cir.vector<4 x !s32i>>
+// CIR: %[[TMP_A:.*]] = cir.load{{.*}} %[[A_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
+// CIR: cir.store{{.*}} %[[TMP_A]], %[[EXCEPTION_ADDR]] : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>>
+// CIR: cir.throw %[[EXCEPTION_ADDR]] : !cir.ptr<!cir.vector<4 x !s32i>>, @_ZTIDv4_i
+// CIR: cir.unreachable
+
+// LLVM: %[[A_ADDR:.*]] = alloca <4 x i32>, i64 1, align 16
+// LLVM: %[[EXCEPTION_ADDR:.*]] = call ptr @__cxa_allocate_exception(i64 16)
+// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// LLVM: store <4 x i32> %[[TMP_A]], ptr %[[EXCEPTION_ADDR]], align 16
+// LLVM: call void @__cxa_throw(ptr %[[EXCEPTION_ADDR]], ptr @_ZTIDv4_i, ptr null)
+
+// OGCG: %[[A_ADDR:.*]] = alloca <4 x i32>, align 16
+// OGCG: %[[EXCEPTION_ADDR:.*]] = call ptr @__cxa_allocate_exception(i64 16)
+// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr %[[A_ADDR]], align 16
+// OGCG: store <4 x i32> %[[TMP_A]], ptr %[[EXCEPTION_ADDR]], align 16
+// OGCG: call void @__cxa_throw(ptr %[[EXCEPTION_ADDR]], ptr @_ZTIDv4_i, ptr null)
+// OGCG: unreachable
diff --git a/clang/test/CIR/IR/invalid-try-catch.cir b/clang/test/CIR/IR/invalid-try-catch.cir
new file mode 100644
index 0000000..04a4d25
--- /dev/null
+++ b/clang/test/CIR/IR/invalid-try-catch.cir
@@ -0,0 +1,156 @@
+// RUN: cir-opt %s -verify-diagnostics -split-input-file
+
+module {
+
+cir.func dso_local @invalid_catch_without_all_or_type() {
+  cir.scope {
+    cir.try {
+      cir.yield
+     // expected-error @below {{'cir.try' expected 'all' or 'type' keyword}}
+    } catch [invalid_keyword {
+      cir.yield
+    }
+  }
+  cir.return
+}
+
+}
+
+// -----
+
+module {
+
+cir.func dso_local @invalid_catch_rtti_type() {
+  cir.scope {
+    // expected-error @below {{'cir.try' op attribute 'handler_types' failed to satisfy constraint: catch all or unwind or global view array attribute}}
+    cir.try {
+      cir.yield
+    } catch [type #cir.undef] {
+      cir.yield
+    }
+  }
+  cir.return
+}
+
+}
+
+// -----
+
+module {
+
+cir.func dso_local @invalid_catch_empty_block() {
+  cir.scope {
+    // expected-error @below {{'cir.try' op region #1 ('handler_regions') failed to verify constraint: region with at least 1 blocks}}
+    cir.try {
+      cir.yield
+    } catch all {
+    }
+  }
+  cir.return
+}
+
+}
+
+// -----
+
+!s32i = !cir.int<s, 32>
+
+module {
+
+cir.func dso_local @invalid_catch_not_terminated() {
+  %a = cir.alloca !s32i, !cir.ptr<!s32i>, ["a", init]
+  cir.scope {
+    cir.try {
+      cir.yield
+    } 
+    // expected-error @below {{'cir.try' blocks are expected to be explicitly terminated}}
+    catch all {
+       %tmp_a = cir.load %a : !cir.ptr<!s32i>, !s32i
+    }
+  }
+  cir.return
+}
+
+}
+
+// -----
+
+module {
+
+cir.func dso_local @invalid_catch_multiple_catch_all() {
+  cir.scope {
+    cir.try {
+      cir.yield
+    } catch all {
+      cir.yield
+    } 
+    // expected-error @below {{op 'cir.try' can't have more than one catch all}}
+    catch all {
+      cir.yield
+    }
+  }
+  cir.return
+}
+
+}
+
+// -----
+
+module {
+
+cir.func dso_local @invalid_catch_without_type_info() {
+  cir.scope {
+    cir.try {
+      cir.yield
+    } 
+    // expected-error @below {{expected attribute value}}
+    // expected-error @below {{op 'cir.try' expected valid RTTI info attribute}}
+    catch [type] {
+      cir.yield
+    }
+  }
+  cir.return
+}
+
+}
+
+// -----
+
+module {
+
+cir.func dso_local @invalid_catch_all_with_type_info() {
+  cir.scope {
+    cir.try {
+      cir.yield
+    }
+    // expected-error @below {{op 'cir.try' catch all dosen't need RTTI info attribute}}
+    catch [all] {
+      cir.yield
+    }
+  }
+  cir.return
+}
+
+}
+
+// -----
+
+module {
+
+cir.func dso_local @invalid_unwind_with_catch_all() {
+  cir.scope {
+    cir.try {
+      cir.yield
+    }
+    catch all {
+      cir.yield
+    }
+    // expected-error @below {{op 'cir.try' unwind can't be used with catch all}}
+    unwind {
+
+    }
+  }
+  cir.return
+}
+
+}
diff --git a/clang/test/CIR/IR/try-catch.cir b/clang/test/CIR/IR/try-catch.cir
new file mode 100644
index 0000000..7becd0b
--- /dev/null
+++ b/clang/test/CIR/IR/try-catch.cir
@@ -0,0 +1,84 @@
+// RUN: cir-opt %s --verify-roundtrip | FileCheck %s
+
+!u8i = !cir.int<u, 8>
+
+module {
+
+cir.global "private" constant external @_ZTIi : !cir.ptr<!u8i>
+cir.global "private" constant external @_ZTIPKc : !cir.ptr<!u8i>
+
+cir.func dso_local @empty_try_block_with_catch_all() {
+  cir.scope {
+    cir.try {
+      cir.yield
+    } catch all {
+      cir.yield
+    }
+  }
+  cir.return
+}
+
+// CHECK: cir.func dso_local @empty_try_block_with_catch_all() {
+// CHECK:   cir.scope {
+// CHECK:     cir.try {
+// CHECK:       cir.yield
+// CHECK:     } catch all {
+// CHECK:       cir.yield
+// CHECK:     }
+// CHECK:   }
+// CHECK:   cir.return
+// CHECK: }
+
+cir.func dso_local @empty_try_block_with_catch_unwind() {
+  cir.scope {
+    cir.try {
+      cir.yield
+    } unwind {
+      cir.yield
+    }
+  }
+  cir.return
+}
+
+// CHECK: cir.func dso_local @empty_try_block_with_catch_unwind() {
+// CHECK:   cir.scope {
+// CHECK:     cir.try {
+// CHECK:       cir.yield
+// CHECK:     } unwind {
+// CHECK:       cir.yield
+// CHECK:     }
+// CHECK:   }
+// CHECK:   cir.return
+// CHECK: }
+
+cir.func dso_local @empty_try_block_with_catch_ist() {
+  cir.scope {
+    cir.try {
+      cir.yield
+    } catch [type #cir.global_view<@_ZTIi> : !cir.ptr<!u8i>] {
+      cir.yield
+    } catch [type #cir.global_view<@_ZTIPKc> : !cir.ptr<!u8i>] {
+      cir.yield
+    } unwind {
+      cir.yield
+    }
+  }
+  cir.return
+}
+
+// CHECK: cir.func dso_local @empty_try_block_with_catch_ist() {
+// CHECK:   cir.scope {
+// CHECK:     cir.try {
+// CHECK:       cir.yield
+// CHECK:     } catch [type #cir.global_view<@_ZTIi> : !cir.ptr<!u8i>] {
+// CHECK:       cir.yield
+// CHECK:     } catch [type #cir.global_view<@_ZTIPKc> : !cir.ptr<!u8i>] {
+// CHECK:       cir.yield
+// CHECK:     } unwind {
+// CHECK:       cir.yield
+// CHECK:     }
+// CHECK:   }
+// CHECK:   cir.return
+// CHECK: }
+
+}
diff --git a/clang/test/CXX/dcl.decl/dcl.decomp/p2.cpp b/clang/test/CXX/dcl.decl/dcl.decomp/p2.cpp
index cad7d8e..717ea3aa 100644
--- a/clang/test/CXX/dcl.decl/dcl.decomp/p2.cpp
+++ b/clang/test/CXX/dcl.decl/dcl.decomp/p2.cpp
@@ -16,8 +16,8 @@ int array() {
   using X3 = X[3];
   auto [a3, b3, c3] = X3{1, 2, 3};
 
-  auto &[d, e] = arr; // expected-error {{type 'int[3]' decomposes into 3 elements, but only 2 names were provided}}
-  auto &[f, g, h, i] = arr; // expected-error {{type 'int[3]' decomposes into 3 elements, but 4 names were provided}}
+  auto &[d, e] = arr; // expected-error {{type 'int[3]' binds to 3 elements, but only 2 names were provided}}
+  auto &[f, g, h, i] = arr; // expected-error {{type 'int[3]' binds to 3 elements, but 4 names were provided}}
 
   auto &[r0, r1, r2] = arr;
   const auto &[cr0, cr1, cr2] = arr;
diff --git a/clang/test/CXX/dcl.decl/dcl.decomp/p3.cpp b/clang/test/CXX/dcl.decl/dcl.decomp/p3.cpp
index ce5eefc..b7fef12 100644
--- a/clang/test/CXX/dcl.decl/dcl.decomp/p3.cpp
+++ b/clang/test/CXX/dcl.decl/dcl.decomp/p3.cpp
@@ -5,10 +5,10 @@ using size_t = decltype(sizeof(0));
 struct A { int x, y; };
 struct B { int x, y; };
 
-void no_tuple_size_1() { auto [x, y] = A(); } // ok, decompose elementwise
+void no_tuple_size_1() { auto [x, y] = A(); } // ok, bind elementwise
 
 namespace std { template<typename T> struct tuple_size; }
-void no_tuple_size_2() { auto [x, y] = A(); } // ok, decompose elementwise
+void no_tuple_size_2() { auto [x, y] = A(); } // ok, bind elementwise
 
 struct Bad1 { int a, b; };
 template<> struct std::tuple_size<Bad1> {};
@@ -16,15 +16,15 @@ void no_tuple_size_3() { auto [x, y] = Bad1(); } // ok, omitting value is valid
 
 struct Bad2 {};
 template<> struct std::tuple_size<Bad2> { const int value = 5; };
-void no_tuple_size_4() { auto [x, y] = Bad2(); } // expected-error {{cannot decompose this type; 'std::tuple_size<Bad2>::value' is not a valid integral constant expression}}
+void no_tuple_size_4() { auto [x, y] = Bad2(); } // expected-error {{cannot bind this type; 'std::tuple_size<Bad2>::value' is not a valid integral constant expression}}
 
 template<> struct std::tuple_size<A> { static const int value = 3; };
 template<> struct std::tuple_size<B> { enum { value = 3 }; };
 
 void no_get_1() {
   {
-    auto [a0, a1] = A(); // expected-error {{decomposes into 3 elements}}
-    auto [b0, b1] = B(); // expected-error {{decomposes into 3 elements}}
+    auto [a0, a1] = A(); // expected-error {{binds to 3 elements}}
+    auto [b0, b1] = B(); // expected-error {{binds to 3 elements}}
   }
   auto [a0, a1, a2] = A(); // expected-error {{undeclared identifier 'get'}} expected-note {{in implicit initialization of binding declaration 'a0'}}
 }
diff --git a/clang/test/CXX/dcl.decl/dcl.decomp/p4.cpp b/clang/test/CXX/dcl.decl/dcl.decomp/p4.cpp
index 7141124..532a967 100644
--- a/clang/test/CXX/dcl.decl/dcl.decomp/p4.cpp
+++ b/clang/test/CXX/dcl.decl/dcl.decomp/p4.cpp
@@ -25,10 +25,10 @@ namespace NonPublicMembers {
   struct NonPublic4 : NonPublic2 {};
 
   void test() {
-    auto [a1] = NonPublic1(); // expected-error {{cannot decompose protected member 'a' of 'NonPublicMembers::NonPublic1'}}
-    auto [a2] = NonPublic2(); // expected-error {{cannot decompose private member 'a' of 'NonPublicMembers::NonPublic2'}}
-    auto [a3] = NonPublic3(); // expected-error {{cannot decompose members of inaccessible base class 'A' of 'NonPublicMembers::NonPublic3'}}
-    auto [a4] = NonPublic4(); // expected-error {{cannot decompose private member 'a' of 'NonPublicMembers::NonPublic2'}}
+    auto [a1] = NonPublic1(); // expected-error {{cannot bind protected member 'a' of 'NonPublicMembers::NonPublic1'}}
+    auto [a2] = NonPublic2(); // expected-error {{cannot bind private member 'a' of 'NonPublicMembers::NonPublic2'}}
+    auto [a3] = NonPublic3(); // expected-error {{cannot bind members of inaccessible base class 'A' of 'NonPublicMembers::NonPublic3'}}
+    auto [a4] = NonPublic4(); // expected-error {{cannot bind private member 'a' of 'NonPublicMembers::NonPublic2'}}
   }
 }
 
@@ -46,8 +46,8 @@ namespace AnonymousMember {
   };
 
   void test() {
-    auto [a1] = Struct(); // expected-error {{cannot decompose class type 'Struct' because it has an anonymous struct member}}
-    auto [a2] = Union(); // expected-error {{cannot decompose class type 'Union' because it has an anonymous union member}}
+    auto [a1] = Struct(); // expected-error {{cannot bind class type 'Struct' because it has an anonymous struct member}}
+    auto [a2] = Union(); // expected-error {{cannot bind class type 'Union' because it has an anonymous union member}}
   }
 }
 
@@ -73,12 +73,12 @@ namespace MultipleClasses {
   struct M : virtual J, L {};
 
   void test() {
-    auto [b] = B(); // expected-error {{cannot decompose class type 'B': both it and its base class 'A' have non-static data members}}
-    auto [d] = D(); // expected-error {{cannot decompose class type 'D': its base classes 'A' and 'C' have non-static data members}}
+    auto [b] = B(); // expected-error {{cannot bind class type 'B': both it and its base class 'A' have non-static data members}}
+    auto [d] = D(); // expected-error {{cannot bind class type 'D': its base classes 'A' and 'C' have non-static data members}}
     auto [e] = E();
-    auto [f] = F(); // expected-error-re {{cannot decompose members of ambiguous base class 'A' of 'F':{{.*}}struct MultipleClasses::F -> A{{.*}}struct MultipleClasses::F -> E -> A}}
+    auto [f] = F(); // expected-error-re {{cannot bind members of ambiguous base class 'A' of 'F':{{.*}}struct MultipleClasses::F -> A{{.*}}struct MultipleClasses::F -> E -> A}}
     auto [h] = H(); // ok, only one (virtual) base subobject even though there are two paths to it
-    auto [k] = K(); // expected-error {{cannot decompose members of ambiguous base class 'I'}}
+    auto [k] = K(); // expected-error {{cannot bind members of ambiguous base class 'I'}}
     auto [m] = M(); // ok, all paths to I are through the same virtual base subobject J
 
     same<decltype(m), int>();
@@ -214,7 +214,7 @@ namespace p0969r0 {
     auto &[x, y] = b;
   }
   void test_external(B b) {
-    auto &[x, y] = b; // expected-error {{cannot decompose members of inaccessible base class 'A' of 'p0969r0::B'}}
+    auto &[x, y] = b; // expected-error {{cannot bind members of inaccessible base class 'A' of 'p0969r0::B'}}
   }
 
   struct C {
@@ -229,13 +229,13 @@ namespace p0969r0 {
   struct D : C {
     static void test_member(D d, C c) {
       auto &[x1, y1] = d;
-      auto &[x2, y2] = c; // expected-error {{cannot decompose protected member 'y' of 'p0969r0::C'}}
+      auto &[x2, y2] = c; // expected-error {{cannot bind protected member 'y' of 'p0969r0::C'}}
     }
   };
   void test_friend(D d) {
     auto &[x, y] = d;
   }
   void test_external(D d) {
-    auto &[x, y] = d; // expected-error {{cannot decompose protected member 'y' of 'p0969r0::C'}}
+    auto &[x, y] = d; // expected-error {{cannot bind protected member 'y' of 'p0969r0::C'}}
   }
 }
diff --git a/clang/test/CXX/drs/cwg22xx.cpp b/clang/test/CXX/drs/cwg22xx.cpp
index 8c8ad9f..34119a1 100644
--- a/clang/test/CXX/drs/cwg22xx.cpp
+++ b/clang/test/CXX/drs/cwg22xx.cpp
@@ -202,7 +202,7 @@ namespace cwg2285 { // cwg2285: 4
   void test() {
     using T = int[1];
     auto [a] = T{a};
-    // since-cxx17-error@-1 {{binding 'a' cannot appear in the initializer of its own decomposition declaration}}
+    // since-cxx17-error@-1 {{binding 'a' cannot appear in the initializer of its own structured binding declaration}}
   }
 #endif
 } // namespace cwg2285
diff --git a/clang/test/CXX/drs/cwg23xx.cpp b/clang/test/CXX/drs/cwg23xx.cpp
index 1285665..72cf249 100644
--- a/clang/test/CXX/drs/cwg23xx.cpp
+++ b/clang/test/CXX/drs/cwg23xx.cpp
@@ -440,7 +440,7 @@ template <> struct tuple_size<cwg2386::Bad2> {
 namespace cwg2386 {
 void no_value() { auto [x, y] = Bad1(); }
 void wrong_value() { auto [x, y] = Bad2(); }
-// since-cxx17-error@-1 {{type 'Bad2' decomposes into 42 elements, but only 2 names were provided}}
+// since-cxx17-error@-1 {{type 'Bad2' binds to 42 elements, but only 2 names were provided}}
 #endif
 } // namespace cwg2386
 
diff --git a/clang/test/CXX/drs/cwg26xx.cpp b/clang/test/CXX/drs/cwg26xx.cpp
index bceef64..aa87f5a 100644
--- a/clang/test/CXX/drs/cwg26xx.cpp
+++ b/clang/test/CXX/drs/cwg26xx.cpp
@@ -183,17 +183,17 @@ T get_T();
 
 void use() {
   UnaryC auto [a, b] = get_S();
-  // since-cxx20-error@-1 {{decomposition declaration cannot be declared with constrained 'auto'}}
+  // since-cxx20-error@-1 {{structured binding declaration cannot be declared with constrained 'auto'}}
   BinaryC<int> auto [c, d] = get_S();
-  // since-cxx20-error@-1 {{decomposition declaration cannot be declared with constrained 'auto'}}
+  // since-cxx20-error@-1 {{structured binding declaration cannot be declared with constrained 'auto'}}
 }
 
 template<typename T>
 void TemplUse() {
   UnaryC auto [a, b] = get_T<T>();
-  // since-cxx20-error@-1 {{decomposition declaration cannot be declared with constrained 'auto'}}
+  // since-cxx20-error@-1 {{structured binding declaration cannot be declared with constrained 'auto'}}
   BinaryC<T> auto [c, d] = get_T<T>();
-  // since-cxx20-error@-1 {{decomposition declaration cannot be declared with constrained 'auto'}}
+  // since-cxx20-error@-1 {{structured binding declaration cannot be declared with constrained 'auto'}}
 }
 #endif
 } // namespace cwg2635
diff --git a/clang/test/CXX/stmt.stmt/stmt.iter/stmt.ranged/p1.cpp b/clang/test/CXX/stmt.stmt/stmt.iter/stmt.ranged/p1.cpp
index 93c2beb..938a3d0 100644
--- a/clang/test/CXX/stmt.stmt/stmt.iter/stmt.ranged/p1.cpp
+++ b/clang/test/CXX/stmt.stmt/stmt.iter/stmt.ranged/p1.cpp
@@ -75,7 +75,7 @@ namespace X {
   void test_D() {
 #if __cplusplus >= 201703L
     for (extern auto [x, y] : D()) {
-    } // expected-error@-1 {{decomposition declaration cannot be declared 'extern'}}
+    } // expected-error@-1 {{structured binding declaration cannot be declared 'extern'}}
       // expected-error@-2 {{loop variable '[x, y]' may not be declared 'extern'}}
 #endif
   }
diff --git a/clang/test/CXX/temp/temp.res/temp.local/p6.cpp b/clang/test/CXX/temp/temp.res/temp.local/p6.cpp
index 205df5f..beed7bf 100644
--- a/clang/test/CXX/temp/temp.res/temp.local/p6.cpp
+++ b/clang/test/CXX/temp/temp.res/temp.local/p6.cpp
@@ -165,7 +165,7 @@ A<0>::B a;
 
 template <typename T> int shadow() {  // expected-note{{template parameter is declared here}}
   using arr = int[1];
-  // expected-warning@+1 {{decomposition declarations are a C++17 extension}}
+  // expected-warning@+1 {{structured binding declarations are a C++17 extension}}
   auto [
     T // expected-error {{declaration of 'T' shadows template parameter}}
     ] = arr{};
diff --git a/clang/test/CodeGen/X86/avx512cd-builtins.c b/clang/test/CodeGen/X86/avx512cd-builtins.c
index 2890889..80a20b1 100644
--- a/clang/test/CodeGen/X86/avx512cd-builtins.c
+++ b/clang/test/CodeGen/X86/avx512cd-builtins.c
@@ -14,37 +14,53 @@
 __m512i test_mm512_conflict_epi64(__m512i __A) {
   // CHECK-LABEL: test_mm512_conflict_epi64
   // CHECK: call {{.*}}<8 x i64> @llvm.x86.avx512.conflict.q.512(<8 x i64> %{{.*}})
-  return _mm512_conflict_epi64(__A); 
+  return _mm512_conflict_epi64(__A);
 }
+
+TEST_CONSTEXPR(match_v8di(_mm512_conflict_epi64((__m512i)(__v8di){1, 2, 1, 3, 2, 4, 1, 5}), 0, 0, 1, 0, 2, 0, 5, 0));
+TEST_CONSTEXPR(match_v8di(_mm512_conflict_epi64((__m512i)(__v8di){5, 5, 5, 5, 5, 5, 5, 5}), 0, 1, 3, 7, 15, 31, 63, 127));
+TEST_CONSTEXPR(match_v8di(_mm512_conflict_epi64((__m512i)(__v8di){1, 2, 3, 4, 5, 6, 7, 8}), 0, 0, 0, 0, 0, 0, 0, 0));
 __m512i test_mm512_mask_conflict_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
   // CHECK-LABEL: test_mm512_mask_conflict_epi64
   // CHECK: call {{.*}}<8 x i64> @llvm.x86.avx512.conflict.q.512(<8 x i64> %{{.*}})
   // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
-  return _mm512_mask_conflict_epi64(__W,__U,__A); 
+  return _mm512_mask_conflict_epi64(__W,__U,__A);
 }
+
+TEST_CONSTEXPR(match_v8di(_mm512_mask_conflict_epi64((__m512i)(__v8di){0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}, 0x55, (__m512i)(__v8di){1, 2, 1, 3, 2, 4, 1, 5}), 0, 0xFF, 1, 0xFF, 2, 0xFF, 5, 0xFF));
 __m512i test_mm512_maskz_conflict_epi64(__mmask8 __U, __m512i __A) {
   // CHECK-LABEL: test_mm512_maskz_conflict_epi64
   // CHECK: call {{.*}}<8 x i64> @llvm.x86.avx512.conflict.q.512(<8 x i64> %{{.*}})
   // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
-  return _mm512_maskz_conflict_epi64(__U,__A); 
+  return _mm512_maskz_conflict_epi64(__U,__A);
 }
+
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_conflict_epi64(0x55, (__m512i)(__v8di){1, 2, 1, 3, 2, 4, 1, 5}), 0, 0, 1, 0, 2, 0, 5, 0));
 __m512i test_mm512_conflict_epi32(__m512i __A) {
   // CHECK-LABEL: test_mm512_conflict_epi32
   // CHECK: call <16 x i32> @llvm.x86.avx512.conflict.d.512(<16 x i32> %{{.*}})
-  return _mm512_conflict_epi32(__A); 
+  return _mm512_conflict_epi32(__A);
 }
+
+TEST_CONSTEXPR(match_v16si(_mm512_conflict_epi32((__m512i)(__v16si){1, 2, 1, 3, 2, 4, 1, 5, 6, 7, 6, 8, 7, 9, 6, 10}), 0, 0, 1, 0, 2, 0, 5, 0, 0, 0, 256, 0, 512, 0, 1280, 0));
+TEST_CONSTEXPR(match_v16si(_mm512_conflict_epi32((__m512i)(__v16si){9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9}), 0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095, 8191, 16383, 32767));
+TEST_CONSTEXPR(match_v16si(_mm512_conflict_epi32((__m512i)(__v16si){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0));
 __m512i test_mm512_mask_conflict_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
   // CHECK-LABEL: test_mm512_mask_conflict_epi32
   // CHECK: call <16 x i32> @llvm.x86.avx512.conflict.d.512(<16 x i32> %{{.*}})
   // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
-  return _mm512_mask_conflict_epi32(__W,__U,__A); 
+  return _mm512_mask_conflict_epi32(__W,__U,__A);
 }
+
+TEST_CONSTEXPR(match_v16si(_mm512_mask_conflict_epi32((__m512i)(__v16si){0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}, 0x5555, (__m512i)(__v16si){1, 2, 1, 3, 2, 4, 1, 5, 6, 7, 6, 8, 7, 9, 6, 10}), 0, 0xFF, 1, 0xFF, 2, 0xFF, 5, 0xFF, 0, 0xFF, 256, 0xFF, 512, 0xFF, 1280, 0xFF));
 __m512i test_mm512_maskz_conflict_epi32(__mmask16 __U, __m512i __A) {
   // CHECK-LABEL: test_mm512_maskz_conflict_epi32
   // CHECK: call <16 x i32> @llvm.x86.avx512.conflict.d.512(<16 x i32> %{{.*}})
   // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
-  return _mm512_maskz_conflict_epi32(__U,__A); 
+  return _mm512_maskz_conflict_epi32(__U,__A);
 }
+
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_conflict_epi32(0x5555, (__m512i)(__v16si){1, 2, 1, 3, 2, 4, 1, 5, 6, 7, 6, 8, 7, 9, 6, 10}), 0, 0, 1, 0, 2, 0, 5, 0, 0, 0, 256, 0, 512, 0, 1280, 0));
 __m512i test_mm512_lzcnt_epi32(__m512i __A) {
   // CHECK-LABEL: test_mm512_lzcnt_epi32
   // CHECK: call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %{{.*}}, i1 true)
diff --git a/clang/test/CodeGen/X86/avx512dq-builtins.c b/clang/test/CodeGen/X86/avx512dq-builtins.c
index 4112561..1b09959 100644
--- a/clang/test/CodeGen/X86/avx512dq-builtins.c
+++ b/clang/test/CodeGen/X86/avx512dq-builtins.c
@@ -1305,6 +1305,7 @@ __m512 test_mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, float const* _
   // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return _mm512_mask_broadcast_f32x8(__O, __M, _mm256_loadu_ps(__A)); 
 }
+TEST_CONSTEXPR(match_m512(_mm512_mask_broadcast_f32x8(_mm512_setzero_ps(), 0xAAAA, (__m256)(__v8sf){5.0f,5.0f,5.0f,5.0f,5.0f,5.0f,5.0f,5.0f}), 0,5,0,5,0,5,0,5,0,5,0,5,0,5,0,5));
 
 __m512 test_mm512_maskz_broadcast_f32x8(__mmask16 __M, float const* __A) {
   // CHECK-LABEL: test_mm512_maskz_broadcast_f32x8
@@ -1312,6 +1313,7 @@ __m512 test_mm512_maskz_broadcast_f32x8(__mmask16 __M, float const* __A) {
   // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return _mm512_maskz_broadcast_f32x8(__M, _mm256_loadu_ps(__A)); 
 }
+TEST_CONSTEXPR(match_m512(_mm512_maskz_broadcast_f32x8(0xAAAA, _mm256_set1_ps(7.0f)), 0,7,0,7,0,7,0,7,0,7,0,7,0,7,0,7));
 
 __m512d test_mm512_broadcast_f64x2(double const* __A) {
   // CHECK-LABEL: test_mm512_broadcast_f64x2
@@ -1327,6 +1329,8 @@ __m512d test_mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, double const*
   return _mm512_mask_broadcast_f64x2(__O, __M, _mm_loadu_pd(__A)); 
 }
 
+TEST_CONSTEXPR(match_m512d(_mm512_mask_broadcast_f64x2(_mm512_setzero_pd(), 0xAA, (__m128d)(__v2df){1,2}), 0,2,0,2,0,2,0,2));
+
 __m512d test_mm512_maskz_broadcast_f64x2(__mmask8 __M, double const* __A) {
   // CHECK-LABEL: test_mm512_maskz_broadcast_f64x2
   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -1334,6 +1338,8 @@ __m512d test_mm512_maskz_broadcast_f64x2(__mmask8 __M, double const* __A) {
   return _mm512_maskz_broadcast_f64x2(__M, _mm_loadu_pd(__A)); 
 }
 
+TEST_CONSTEXPR(match_m512d(_mm512_maskz_broadcast_f64x2(0xAA, (__m128d)(__v2df){1,2}), 0,2,0,2,0,2,0,2));
+
 __m512i test_mm512_broadcast_i32x2(__m128i __A) {
   // CHECK-LABEL: test_mm512_broadcast_i32x2
   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -1348,6 +1354,8 @@ __m512i test_mm512_mask_broadcast_i32x2(__m512i __O, __mmask16 __M, __m128i __A)
   return _mm512_mask_broadcast_i32x2(__O, __M, __A); 
 }
 
+TEST_CONSTEXPR(match_v16si(_mm512_mask_broadcast_i32x2(_mm512_setzero_si512(), 0xAAAA, (__m128i)(__v4si){0,1,2,3}), 0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1));
+
 __m512i test_mm512_maskz_broadcast_i32x2(__mmask16 __M, __m128i __A) {
   // CHECK-LABEL: test_mm512_maskz_broadcast_i32x2
   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -1355,6 +1363,8 @@ __m512i test_mm512_maskz_broadcast_i32x2(__mmask16 __M, __m128i __A) {
   return _mm512_maskz_broadcast_i32x2(__M, __A); 
 }
 
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_broadcast_i32x2(0xAAAA, (__m128i)(__v4si){0,1,2,3}), 0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1));
+
 __m512i test_mm512_broadcast_i32x8(__m256i const* __A) {
   // CHECK-LABEL: test_mm512_broadcast_i32x8
   // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -1368,6 +1378,7 @@ __m512i test_mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i cons
   // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
   return _mm512_mask_broadcast_i32x8(__O, __M, _mm256_loadu_si256(__A)); 
 }
+TEST_CONSTEXPR(match_v16si(_mm512_mask_broadcast_i32x8(_mm512_setzero_si512(), 0xAAAA, _mm256_set1_epi32(8)), 0,8,0,8,0,8,0,8,0,8,0,8,0,8,0,8));
 
 __m512i test_mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i const* __A) {
   // CHECK-LABEL: test_mm512_maskz_broadcast_i32x8
@@ -1376,6 +1387,8 @@ __m512i test_mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i const* __A) {
   return _mm512_maskz_broadcast_i32x8(__M, _mm256_loadu_si256(__A)); 
 }
 
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_broadcast_i32x8(0xAAAA, _mm256_set1_epi32(9)), 0,9,0,9,0,9,0,9,0,9,0,9,0,9,0,9));
+
 __m512i test_mm512_broadcast_i64x2(__m128i const* __A) {
   // CHECK-LABEL: test_mm512_broadcast_i64x2
   // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 7756f0d..3deaf8e 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -6836,6 +6836,8 @@ __m512 test_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, float const* _
   return _mm512_mask_broadcast_f32x4(__O, __M, _mm_loadu_ps(__A)); 
 }
 
+TEST_CONSTEXPR(match_m512(_mm512_mask_broadcast_f32x4(_mm512_setzero_ps(), 0xAAAA, (__m128)(__v4sf){1,2,3,4}), 0,2,0,4,0,2,0,4,0,2,0,4,0,2,0,4));
+
 __m512 test_mm512_maskz_broadcast_f32x4(__mmask16 __M, float const* __A) {
   // CHECK-LABEL: test_mm512_maskz_broadcast_f32x4
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
@@ -6843,6 +6845,8 @@ __m512 test_mm512_maskz_broadcast_f32x4(__mmask16 __M, float const* __A) {
   return _mm512_maskz_broadcast_f32x4(__M, _mm_loadu_ps(__A)); 
 }
 
+TEST_CONSTEXPR(match_m512(_mm512_maskz_broadcast_f32x4(0xAAAA, (__m128)(__v4sf){1,2,3,4}), 0,2,0,4,0,2,0,4,0,2,0,4,0,2,0,4));
+
 __m512d test_mm512_broadcast_f64x4(double const* __A) {
   // CHECK-LABEL: test_mm512_broadcast_f64x4
   // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
@@ -6885,6 +6889,8 @@ __m512i test_mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i const* __A) {
   return _mm512_maskz_broadcast_i32x4(__M, _mm_loadu_si128(__A)); 
 }
 
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_broadcast_i32x4(0xAAAA, (__m128i)(__v4si){0,1,2,3}), 0,1,0,3,0,1,0,3,0,1,0,3,0,1,0,3));
+
 __m512i test_mm512_broadcast_i64x4(__m256i const* __A) {
   // CHECK-LABEL: test_mm512_broadcast_i64x4
   // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
@@ -10903,6 +10909,8 @@ __m512i test_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
   return _mm512_mask_set1_epi32 ( __O, __M, __A);
 }
 
+TEST_CONSTEXPR(match_v16si(_mm512_mask_set1_epi32(_mm512_setzero_si512(), 0xAAAA, 13), 0,13,0,13,0,13,0,13,0,13,0,13,0,13,0,13));
+
 __m512i test_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
 {     
   // CHECK-LABEL: test_mm512_maskz_set1_epi32
@@ -10926,6 +10934,8 @@ __m512i test_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
     return _mm512_maskz_set1_epi32(__M, __A);
 }
 
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_set1_epi32(0xAAAA, 19), 0,19,0,19,0,19,0,19,0,19,0,19,0,19,0,19));
+
 
 __m512i test_mm512_set_epi8(char e63, char e62, char e61, char e60, char e59,
     char e58, char e57, char e56, char e55, char e54, char e53, char e52,
@@ -11145,6 +11155,8 @@ __m512i test_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
   return _mm512_mask_set1_epi64 (__O, __M, __A);
 }
 
+TEST_CONSTEXPR(match_v8di(_mm512_mask_set1_epi64(_mm512_setzero_si512(), 0xAA, 21), 0,21,0,21,0,21,0,21));
+
 __m512i test_mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
 {
   // CHECK-LABEL: test_mm512_maskz_set1_epi64
@@ -11160,6 +11172,8 @@ __m512i test_mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
   return _mm512_maskz_set1_epi64 (__M, __A);
 }
 
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_set1_epi64(0xAA, 23), 0, 23, 0, 23, 0, 23, 0, 23));
+
 
 __m512i test_mm512_set_epi64 (long long __A, long long __B, long long __C,
                               long long __D, long long __E, long long __F,
diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c
index 51385d5..9b6bfea9 100644
--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@@ -7201,6 +7201,8 @@ __m128i test_mm_mask_set1_epi32(__m128i __O, __mmask8 __M) {
   return _mm_mask_set1_epi32(__O, __M, 5); 
 }
 
+TEST_CONSTEXPR(match_v4si(_mm_mask_set1_epi32(_mm_setzero_si128(), 0xF, 7), 7, 7, 7, 7));
+
 __m128i test_mm_maskz_set1_epi32(__mmask8 __M) {
   // CHECK-LABEL: test_mm_maskz_set1_epi32
   // CHECK: insertelement <4 x i32> poison, i32 %{{.*}}, i32 0
@@ -7212,6 +7214,8 @@ __m128i test_mm_maskz_set1_epi32(__mmask8 __M) {
   return _mm_maskz_set1_epi32(__M, 5); 
 }
 
+TEST_CONSTEXPR(match_v4si(_mm_maskz_set1_epi32(0xA, 11), 0, 11, 0, 11));
+
 __m256i test_mm256_mask_set1_epi32(__m256i __O, __mmask8 __M) {
   // CHECK-LABEL: test_mm256_mask_set1_epi32
   // CHECK:  insertelement <8 x i32> poison, i32 %{{.*}}, i32 0
@@ -7226,6 +7230,8 @@ __m256i test_mm256_mask_set1_epi32(__m256i __O, __mmask8 __M) {
   return _mm256_mask_set1_epi32(__O, __M, 5); 
 }
 
+TEST_CONSTEXPR(match_v8si(_mm256_mask_set1_epi32(_mm256_setzero_si256(), 0xAA, 5), 0, 5, 0, 5, 0, 5, 0, 5));
+
 __m256i test_mm256_maskz_set1_epi32(__mmask8 __M) {
   // CHECK-LABEL: test_mm256_maskz_set1_epi32
   // CHECK:  insertelement <8 x i32> poison, i32 %{{.*}}, i32 0
@@ -7240,6 +7246,8 @@ __m256i test_mm256_maskz_set1_epi32(__mmask8 __M) {
   return _mm256_maskz_set1_epi32(__M, 5); 
 }
 
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_set1_epi32(0xAA, 13), 0, 13, 0, 13, 0, 13, 0, 13));
+
 __m128i test_mm_mask_set1_epi64(__m128i __O, __mmask8 __M, long long __A) {
   // CHECK-LABEL: test_mm_mask_set1_epi64
   // CHECK: insertelement <2 x i64> poison, i64 %{{.*}}, i32 0
@@ -7249,6 +7257,8 @@ __m128i test_mm_mask_set1_epi64(__m128i __O, __mmask8 __M, long long __A) {
   return _mm_mask_set1_epi64(__O, __M, __A); 
 }
 
+TEST_CONSTEXPR(match_v2di(_mm_mask_set1_epi64(_mm_setzero_si128(), 0x3, 9), 9, 9));
+
 __m128i test_mm_maskz_set1_epi64(__mmask8 __M, long long __A) {
   // CHECK-LABEL: test_mm_maskz_set1_epi64
   // CHECK: insertelement <2 x i64> poison, i64 %{{.*}}, i32 0
@@ -7258,6 +7268,8 @@ __m128i test_mm_maskz_set1_epi64(__mmask8 __M, long long __A) {
   return _mm_maskz_set1_epi64(__M, __A); 
 }
 
+TEST_CONSTEXPR(match_v2di(_mm_maskz_set1_epi64(0x2, 15), 0, 15));
+
 __m256i test_mm256_mask_set1_epi64(__m256i __O, __mmask8 __M, long long __A) {
   // CHECK-LABEL: test_mm256_mask_set1_epi64
   // CHECK: insertelement <4 x i64> poison, i64 %{{.*}}, i32 0
@@ -7269,6 +7281,8 @@ __m256i test_mm256_mask_set1_epi64(__m256i __O, __mmask8 __M, long long __A) {
   return _mm256_mask_set1_epi64(__O, __M, __A); 
 }
 
+TEST_CONSTEXPR(match_v4di(_mm256_mask_set1_epi64(_mm256_setzero_si256(), 0xF, 11), 11, 11, 11, 11));
+
 __m256i test_mm256_maskz_set1_epi64(__mmask8 __M, long long __A) {
   // CHECK-LABEL: test_mm256_maskz_set1_epi64
   // CHECK: insertelement <4 x i64> poison, i64 %{{.*}}, i32 0
@@ -7280,6 +7294,8 @@ __m256i test_mm256_maskz_set1_epi64(__mmask8 __M, long long __A) {
   return _mm256_maskz_set1_epi64(__M, __A); 
 }
 
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_set1_epi64(0xA, 17), 0, 17, 0, 17));
+
 __m128d test_mm_fixupimm_pd(__m128d __A, __m128d __B, __m128i __C) {
   // CHECK-LABEL: test_mm_fixupimm_pd
   // CHECK: @llvm.x86.avx512.mask.fixupimm.pd.128
@@ -7623,6 +7639,8 @@ __m128d test_mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d
   return _mm_mask_unpackhi_pd(__W, __U, __A, __B); 
 }
 
+TEST_CONSTEXPR(match_m128d(_mm_mask_unpackhi_pd(_mm_setzero_pd(), 0x3, (__m128d)(__v2df){1.0,2.0}, (__m128d)(__v2df){3.0,4.0}), 2.0,4.0));
+
 __m128d test_mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   // CHECK-LABEL: test_mm_maskz_unpackhi_pd
   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 3>
@@ -7637,6 +7655,8 @@ __m256d test_mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m2
   return _mm256_mask_unpackhi_pd(__W, __U, __A, __B); 
 }
 
+TEST_CONSTEXPR(match_m256d(_mm256_mask_unpackhi_pd(_mm256_setzero_pd(), 0xAA, (__m256d)(__v4df){1.0,2.0,3.0,4.0}, (__m256d)(__v4df){5.0,6.0,7.0,8.0}), 0,6.0,0,8.0));
+
 __m256d test_mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   // CHECK-LABEL: test_mm256_maskz_unpackhi_pd
   // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -7679,6 +7699,8 @@ __m128d test_mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d
   return _mm_mask_unpacklo_pd(__W, __U, __A, __B); 
 }
 
+TEST_CONSTEXPR(match_m128d(_mm_mask_unpacklo_pd(_mm_setzero_pd(), 0x3, (__m128d)(__v2df){1.0,2.0}, (__m128d)(__v2df){3.0,4.0}), 1.0,3.0));
+
 __m128d test_mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   // CHECK-LABEL: test_mm_maskz_unpacklo_pd
   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 2>
@@ -7686,6 +7708,8 @@ __m128d test_mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   return _mm_maskz_unpacklo_pd(__U, __A, __B); 
 }
 
+TEST_CONSTEXPR(match_m128d(_mm_maskz_unpacklo_pd(0x2, (__m128d)(__v2df){1.0,2.0}, (__m128d)(__v2df){3.0,4.0}), 0.0,3.0));
+
 __m256d test_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   // CHECK-LABEL: test_mm256_mask_unpacklo_pd
   // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
@@ -7693,6 +7717,8 @@ __m256d test_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m2
   return _mm256_mask_unpacklo_pd(__W, __U, __A, __B); 
 }
 
+TEST_CONSTEXPR(match_m256d(_mm256_mask_unpacklo_pd(_mm256_setzero_pd(), 0xAA, (__m256d)(__v4df){1.0,2.0,3.0,4.0}, (__m256d)(__v4df){5.0,6.0,7.0,8.0}), 0,5.0,0,7.0));
+
 __m256d test_mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   // CHECK-LABEL: test_mm256_maskz_unpacklo_pd
   // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
@@ -7700,6 +7726,8 @@ __m256d test_mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   return _mm256_maskz_unpacklo_pd(__U, __A, __B); 
 }
 
+TEST_CONSTEXPR(match_m256d(_mm256_maskz_unpacklo_pd(0x0A, (__m256d)(__v4df){1.0,2.0,3.0,4.0}, (__m256d)(__v4df){5.0,6.0,7.0,8.0}), 0.0,5.0,0.0,7.0));
+
 __m128 test_mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   // CHECK-LABEL: test_mm_mask_unpacklo_ps
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
@@ -8039,6 +8067,8 @@ __m128i test_mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m1
   return _mm_mask_unpackhi_epi32(__W, __U, __A, __B); 
 }
 
+TEST_CONSTEXPR(match_v4si(_mm_mask_unpackhi_epi32(_mm_setzero_si128(), 0xA, (__m128i)(__v4si){0,1,2,3}, (__m128i)(__v4si){4,5,6,7}), 0,6,0,7));
+
 __m128i test_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
   // CHECK-LABEL: test_mm_maskz_unpackhi_epi32
   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
@@ -8046,6 +8076,8 @@ __m128i test_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
   return _mm_maskz_unpackhi_epi32(__U, __A, __B); 
 }
 
+TEST_CONSTEXPR(match_v4si(_mm_maskz_unpackhi_epi32(0x5, (__m128i)(__v4si){0,1,2,3}, (__m128i)(__v4si){4,5,6,7}), 2,0,3,0));
+
 __m256i test_mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
   // CHECK-LABEL: test_mm256_mask_unpackhi_epi32
   // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
@@ -8997,6 +9029,8 @@ __m256 test_mm256_maskz_broadcast_f32x4(__mmask8 __M, __m128 __A) {
   return _mm256_maskz_broadcast_f32x4(__M, __A); 
 }
 
+TEST_CONSTEXPR(match_m256(_mm256_maskz_broadcast_f32x4(0xAA, (__m128)(__v4sf){0,1,2,3}), 0,1,0,3,0,1,0,3));
+
 __m256i test_mm256_broadcast_i32x4(__m128i const* __A) {
   // CHECK-LABEL: test_mm256_broadcast_i32x4
   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
@@ -9018,6 +9052,8 @@ __m256i test_mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i const* __A) {
   return _mm256_maskz_broadcast_i32x4(__M, _mm_loadu_si128(__A)); 
 }
 
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_broadcast_i32x4(0xAA, (__m128i)(__v4si){0,1,2,3}), 0,1,0,3,0,1,0,3));
+
 __m256d test_mm256_mask_broadcastsd_pd(__m256d __O, __mmask8 __M, __m128d __A) {
   // CHECK-LABEL: test_mm256_mask_broadcastsd_pd
   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> zeroinitializer
@@ -10324,6 +10360,8 @@ __m128 test_mm_mask_movehdup_ps(__m128 __W, __mmask8 __U, __m128 __A) {
   return _mm_mask_movehdup_ps(__W, __U, __A);
 }
 
+TEST_CONSTEXPR(match_m128(_mm_mask_movehdup_ps(_mm_setzero_ps(), 0xF, (__m128)(__v4sf){1.f,2.f,3.f,4.f}), 2.f,2.f,4.f,4.f));
+
 __m128 test_mm_maskz_movehdup_ps(__mmask8 __U, __m128 __A) {
   // CHECK-LABEL: test_mm_maskz_movehdup_ps
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
@@ -10338,6 +10376,8 @@ __m256 test_mm256_mask_movehdup_ps(__m256 __W, __mmask8 __U, __m256 __A) {
   return _mm256_mask_movehdup_ps(__W, __U, __A);
 }
 
+TEST_CONSTEXPR(match_m256(_mm256_mask_movehdup_ps(_mm256_setzero_ps(), 0xAA, (__m256)(__v8sf){1,2,3,4,5,6,7,8}), 0,2,0,4,0,6,0,8));
+
 __m256 test_mm256_maskz_movehdup_ps(__mmask8 __U, __m256 __A) {
   // CHECK-LABEL: test_mm256_maskz_movehdup_ps
   // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
@@ -10352,6 +10392,8 @@ __m128 test_mm_mask_moveldup_ps(__m128 __W, __mmask8 __U, __m128 __A) {
   return _mm_mask_moveldup_ps(__W, __U, __A);
 }
 
+TEST_CONSTEXPR(match_m128(_mm_mask_moveldup_ps(_mm_setzero_ps(), 0xF, (__m128)(__v4sf){1.f,2.f,3.f,4.f}), 1.f,1.f,3.f,3.f));
+
 __m128 test_mm_maskz_moveldup_ps(__mmask8 __U, __m128 __A) {
   // CHECK-LABEL: test_mm_maskz_moveldup_ps
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
@@ -10366,6 +10408,8 @@ __m256 test_mm256_mask_moveldup_ps(__m256 __W, __mmask8 __U, __m256 __A) {
   return _mm256_mask_moveldup_ps(__W, __U, __A);
 }
 
+TEST_CONSTEXPR(match_m256(_mm256_mask_moveldup_ps(_mm256_setzero_ps(), 0xAA, (__m256)(__v8sf){1,2,3,4,5,6,7,8}), 0,1,0,3,0,5,0,7));
+
 __m256 test_mm256_maskz_moveldup_ps(__mmask8 __U, __m256 __A) {
   // CHECK-LABEL: test_mm256_maskz_moveldup_ps
   // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
diff --git a/clang/test/CodeGen/X86/avx512vlcd-builtins.c b/clang/test/CodeGen/X86/avx512vlcd-builtins.c
index 56c04a0..29fc6fd 100644
--- a/clang/test/CodeGen/X86/avx512vlcd-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlcd-builtins.c
@@ -66,83 +66,114 @@ TEST_CONSTEXPR(match_v8si(_mm256_broadcastmw_epi32((__mmask16)(0xcafe)), 0xcafe,
 __m128i test_mm_conflict_epi64(__m128i __A) {
   // CHECK-LABEL: test_mm_conflict_epi64
   // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.conflict.q.128(<2 x i64> %{{.*}})
-  return _mm_conflict_epi64(__A); 
+  return _mm_conflict_epi64(__A);
 }
 
+TEST_CONSTEXPR(match_v2di(_mm_conflict_epi64((__m128i)(__v2di){1, 2}), 0, 0));
+TEST_CONSTEXPR(match_v2di(_mm_conflict_epi64((__m128i)(__v2di){5, 5}), 0, 1));
+
 __m128i test_mm_mask_conflict_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
   // CHECK-LABEL: test_mm_mask_conflict_epi64
   // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.conflict.q.128(<2 x i64> %{{.*}})
   // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
-  return _mm_mask_conflict_epi64(__W, __U, __A); 
+  return _mm_mask_conflict_epi64(__W, __U, __A);
 }
 
+TEST_CONSTEXPR(match_v2di(_mm_mask_conflict_epi64((__m128i)(__v2di){0xFF, 0xFF}, 0x2, (__m128i)(__v2di){5, 5}), 0xFF, 1));
+
 __m128i test_mm_maskz_conflict_epi64(__mmask8 __U, __m128i __A) {
   // CHECK-LABEL: test_mm_maskz_conflict_epi64
   // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.conflict.q.128(<2 x i64> %{{.*}})
   // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
-  return _mm_maskz_conflict_epi64(__U, __A); 
+  return _mm_maskz_conflict_epi64(__U, __A);
 }
 
+TEST_CONSTEXPR(match_v2di(_mm_maskz_conflict_epi64(0x2, (__m128i)(__v2di){5, 5}), 0, 1));
+
 __m256i test_mm256_conflict_epi64(__m256i __A) {
   // CHECK-LABEL: test_mm256_conflict_epi64
   // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.conflict.q.256(<4 x i64> %{{.*}})
-  return _mm256_conflict_epi64(__A); 
+  return _mm256_conflict_epi64(__A);
 }
 
+TEST_CONSTEXPR(match_v4di(_mm256_conflict_epi64((__m256i)(__v4di){1, 2, 1, 3}), 0, 0, 1, 0));
+TEST_CONSTEXPR(match_v4di(_mm256_conflict_epi64((__m256i)(__v4di){7, 7, 7, 7}), 0, 1, 3, 7));
+TEST_CONSTEXPR(match_v4di(_mm256_conflict_epi64((__m256i)(__v4di){1, 2, 3, 4}), 0, 0, 0, 0));
+
 __m256i test_mm256_mask_conflict_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
   // CHECK-LABEL: test_mm256_mask_conflict_epi64
   // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.conflict.q.256(<4 x i64> %{{.*}})
   // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
-  return _mm256_mask_conflict_epi64(__W, __U, __A); 
+  return _mm256_mask_conflict_epi64(__W, __U, __A);
 }
 
+TEST_CONSTEXPR(match_v4di(_mm256_mask_conflict_epi64((__m256i)(__v4di){0xFF, 0xFF, 0xFF, 0xFF}, 0x5, (__m256i)(__v4di){1, 2, 1, 3}), 0, 0xFF, 1, 0xFF));
+
 __m256i test_mm256_maskz_conflict_epi64(__mmask8 __U, __m256i __A) {
   // CHECK-LABEL: test_mm256_maskz_conflict_epi64
   // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.conflict.q.256(<4 x i64> %{{.*}})
   // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
-  return _mm256_maskz_conflict_epi64(__U, __A); 
+  return _mm256_maskz_conflict_epi64(__U, __A);
 }
 
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_conflict_epi64(0x5, (__m256i)(__v4di){1, 2, 1, 3}), 0, 0, 1, 0));
+
 __m128i test_mm_conflict_epi32(__m128i __A) {
   // CHECK-LABEL: test_mm_conflict_epi32
   // CHECK: call <4 x i32> @llvm.x86.avx512.conflict.d.128(<4 x i32> %{{.*}})
-  return _mm_conflict_epi32(__A); 
+  return _mm_conflict_epi32(__A);
 }
 
+TEST_CONSTEXPR(match_v4si(_mm_conflict_epi32((__m128i)(__v4si){1, 2, 1, 3}), 0, 0, 1, 0));
+TEST_CONSTEXPR(match_v4si(_mm_conflict_epi32((__m128i)(__v4si){3, 3, 3, 3}), 0, 1, 3, 7));
+TEST_CONSTEXPR(match_v4si(_mm_conflict_epi32((__m128i)(__v4si){1, 2, 3, 4}), 0, 0, 0, 0));
+
 __m128i test_mm_mask_conflict_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
   // CHECK-LABEL: test_mm_mask_conflict_epi32
   // CHECK: call <4 x i32> @llvm.x86.avx512.conflict.d.128(<4 x i32> %{{.*}})
   // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
-  return _mm_mask_conflict_epi32(__W, __U, __A); 
+  return _mm_mask_conflict_epi32(__W, __U, __A);
 }
 
+TEST_CONSTEXPR(match_v4si(_mm_mask_conflict_epi32((__m128i)(__v4si){0xFF, 0xFF, 0xFF, 0xFF}, 0x5, (__m128i)(__v4si){1, 2, 1, 3}), 0, 0xFF, 1, 0xFF));
+
 __m128i test_mm_maskz_conflict_epi32(__mmask8 __U, __m128i __A) {
   // CHECK-LABEL: test_mm_maskz_conflict_epi32
   // CHECK: call <4 x i32> @llvm.x86.avx512.conflict.d.128(<4 x i32> %{{.*}})
   // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
-  return _mm_maskz_conflict_epi32(__U, __A); 
+  return _mm_maskz_conflict_epi32(__U, __A);
 }
 
+TEST_CONSTEXPR(match_v4si(_mm_maskz_conflict_epi32(0x5, (__m128i)(__v4si){1, 2, 1, 3}), 0, 0, 1, 0));
+
 __m256i test_mm256_conflict_epi32(__m256i __A) {
   // CHECK-LABEL: test_mm256_conflict_epi32
   // CHECK: call <8 x i32> @llvm.x86.avx512.conflict.d.256(<8 x i32> %{{.*}})
-  return _mm256_conflict_epi32(__A); 
+  return _mm256_conflict_epi32(__A);
 }
 
+TEST_CONSTEXPR(match_v8si(_mm256_conflict_epi32((__m256i)(__v8si){1, 2, 1, 3, 2, 4, 1, 5}), 0, 0, 1, 0, 2, 0, 5, 0));
+TEST_CONSTEXPR(match_v8si(_mm256_conflict_epi32((__m256i)(__v8si){4, 4, 4, 4, 4, 4, 4, 4}), 0, 1, 3, 7, 15, 31, 63, 127));
+TEST_CONSTEXPR(match_v8si(_mm256_conflict_epi32((__m256i)(__v8si){1, 2, 3, 4, 5, 6, 7, 8}), 0, 0, 0, 0, 0, 0, 0, 0));
+
 __m256i test_mm256_mask_conflict_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
   // CHECK-LABEL: test_mm256_mask_conflict_epi32
   // CHECK: call <8 x i32> @llvm.x86.avx512.conflict.d.256(<8 x i32> %{{.*}})
   // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
-  return _mm256_mask_conflict_epi32(__W, __U, __A); 
+  return _mm256_mask_conflict_epi32(__W, __U, __A);
 }
 
+TEST_CONSTEXPR(match_v8si(_mm256_mask_conflict_epi32((__m256i)(__v8si){0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}, /*0101 0101=*/0x55, (__m256i)(__v8si){1, 2, 1, 3, 2, 4, 1, 5}), 0, 0xFF, 1, 0xFF, 2, 0xFF, 5, 0xFF));
+
 __m256i test_mm256_maskz_conflict_epi32(__mmask8 __U, __m256i __A) {
   // CHECK-LABEL: test_mm256_maskz_conflict_epi32
   // CHECK: call <8 x i32> @llvm.x86.avx512.conflict.d.256(<8 x i32> %{{.*}})
   // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
-  return _mm256_maskz_conflict_epi32(__U, __A); 
+  return _mm256_maskz_conflict_epi32(__U, __A);
 }
 
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_conflict_epi32(0x55, (__m256i)(__v8si){1, 2, 1, 3, 2, 4, 1, 5}), 0, 0, 1, 0, 2, 0, 5, 0));
+
 __m128i test_mm_lzcnt_epi32(__m128i __A) {
   // CHECK-LABEL: test_mm_lzcnt_epi32
   // CHECK: call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %{{.*}}, i1 true)
diff --git a/clang/test/CodeGen/X86/avx512vldq-builtins.c b/clang/test/CodeGen/X86/avx512vldq-builtins.c
index 9388457..4773b60 100644
--- a/clang/test/CodeGen/X86/avx512vldq-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vldq-builtins.c
@@ -987,6 +987,8 @@ __m256 test_mm256_mask_broadcast_f32x2(__m256 __O, __mmask8 __M, __m128 __A) {
   return _mm256_mask_broadcast_f32x2(__O, __M, __A); 
 }
 
+TEST_CONSTEXPR(match_m256(_mm256_mask_broadcast_f32x2(_mm256_setzero_ps(), 0xAA, (__m128)(__v4sf){1.f,2.f,3.f,4.f}), 0,2.f,0,2.f,0,2.f,0,2.f));
+
 __m256 test_mm256_maskz_broadcast_f32x2(__mmask8 __M, __m128 __A) {
   // CHECK-LABEL: test_mm256_maskz_broadcast_f32x2
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -994,6 +996,8 @@ __m256 test_mm256_maskz_broadcast_f32x2(__mmask8 __M, __m128 __A) {
   return _mm256_maskz_broadcast_f32x2(__M, __A); 
 }
 
+TEST_CONSTEXPR(match_m256(_mm256_maskz_broadcast_f32x2(0xAA, (__m128)(__v4sf){1.f,2.f,3.f,4.f}), 0,2.f,0,2.f,0,2.f,0,2.f));
+
 __m256d test_mm256_broadcast_f64x2(double const* __A) {
   // CHECK-LABEL: test_mm256_broadcast_f64x2
   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
@@ -1008,6 +1012,8 @@ __m256d test_mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, double const*
   return _mm256_mask_broadcast_f64x2(__O, __M, _mm_loadu_pd(__A)); 
 }
 
+TEST_CONSTEXPR(match_m256d(_mm256_mask_broadcast_f64x2(_mm256_setzero_pd(), 0xA, (__m128d)(__v2df){1.0,2.0}), 0,2.0,0,2.0));
+
 __m256d test_mm256_maskz_broadcast_f64x2(__mmask8 __M, double const* __A) {
   // CHECK-LABEL: test_mm256_maskz_broadcast_f64x2
   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
@@ -1015,6 +1021,8 @@ __m256d test_mm256_maskz_broadcast_f64x2(__mmask8 __M, double const* __A) {
   return _mm256_maskz_broadcast_f64x2(__M, _mm_loadu_pd(__A)); 
 }
 
+TEST_CONSTEXPR(match_m256d(_mm256_maskz_broadcast_f64x2(0xA, (__m128d)(__v2df){1.0,2.0}), 0,2.0,0,2.0));
+
 __m128i test_mm_broadcast_i32x2(__m128i __A) {
   // CHECK-LABEL: test_mm_broadcast_i32x2
   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
@@ -1029,6 +1037,8 @@ __m128i test_mm_mask_broadcast_i32x2(__m128i __O, __mmask8 __M, __m128i __A) {
   return _mm_mask_broadcast_i32x2(__O, __M, __A); 
 }
 
+TEST_CONSTEXPR(match_v4si(_mm_mask_broadcast_i32x2(_mm_setzero_si128(), 0xF, (__m128i)(__v4si){0,1,2,3}), 0,1,0,1));
+
 __m128i test_mm_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A) {
   // CHECK-LABEL: test_mm_maskz_broadcast_i32x2
   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
@@ -1036,6 +1046,8 @@ __m128i test_mm_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A) {
   return _mm_maskz_broadcast_i32x2(__M, __A); 
 }
 
+TEST_CONSTEXPR(match_v4si(_mm_maskz_broadcast_i32x2(0xF, (__m128i)(__v4si){0,1,2,3}), 0,1,0,1));
+
 __m256i test_mm256_broadcast_i32x2(__m128i __A) {
   // CHECK-LABEL: test_mm256_broadcast_i32x2
   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -1050,6 +1062,8 @@ __m256i test_mm256_mask_broadcast_i32x2(__m256i __O, __mmask8 __M, __m128i __A)
   return _mm256_mask_broadcast_i32x2(__O, __M, __A); 
 }
 
+TEST_CONSTEXPR(match_v8si(_mm256_mask_broadcast_i32x2(_mm256_setzero_si256(), 0xAA, (__m128i)(__v4si){0,1,2,3}), 0,1,0,1,0,1,0,1));
+
 __m256i test_mm256_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A) {
   // CHECK-LABEL: test_mm256_maskz_broadcast_i32x2
   // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
@@ -1057,6 +1071,8 @@ __m256i test_mm256_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A) {
   return _mm256_maskz_broadcast_i32x2(__M, __A); 
 }
 
+TEST_CONSTEXPR(match_v8si(_mm256_maskz_broadcast_i32x2(0xAA, (__m128i)(__v4si){0,1,2,3}), 0,1,0,1,0,1,0,1));
+
 __m256i test_mm256_broadcast_i64x2(__m128i const* __A) {
   // CHECK-LABEL: test_mm256_broadcast_i64x2
   // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
@@ -1078,6 +1094,8 @@ __m256i test_mm256_maskz_broadcast_i64x2(__mmask8 __M, __m128i const* __A) {
   return _mm256_maskz_broadcast_i64x2(__M, _mm_loadu_si128(__A)); 
 }
 
+TEST_CONSTEXPR(match_v4di(_mm256_maskz_broadcast_i64x2(0xF, (__m128i)(__v2di){1,2}), 1,2,1,2));
+
 __m128d test_mm256_extractf64x2_pd(__m256d __A) {
   // CHECK-LABEL: test_mm256_extractf64x2_pd
   // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <2 x i32> <i32 2, i32 3>
diff --git a/clang/test/CodeGen/arm-mve-intrinsics/load-store.c b/clang/test/CodeGen/arm-mve-intrinsics/load-store.c
index 2dde75f..50f7011 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/load-store.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/load-store.c
@@ -8,8 +8,8 @@
 
 // CHECK-LABEL: @test_vld1q_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x half>, ptr [[BASE:%.*]], align 2
-// CHECK-NEXT:    ret <8 x half> [[TMP1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x half>, ptr [[BASE:%.*]], align 2
+// CHECK-NEXT:    ret <8 x half> [[TMP0]]
 //
 float16x8_t test_vld1q_f16(const float16_t *base)
 {
@@ -22,8 +22,8 @@ float16x8_t test_vld1q_f16(const float16_t *base)
 
 // CHECK-LABEL: @test_vld1q_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[BASE:%.*]], align 4
-// CHECK-NEXT:    ret <4 x float> [[TMP1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[BASE:%.*]], align 4
+// CHECK-NEXT:    ret <4 x float> [[TMP0]]
 //
 float32x4_t test_vld1q_f32(const float32_t *base)
 {
@@ -36,8 +36,8 @@ float32x4_t test_vld1q_f32(const float32_t *base)
 
 // CHECK-LABEL: @test_vld1q_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[BASE:%.*]], align 1
-// CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr [[BASE:%.*]], align 1
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
 //
 int8x16_t test_vld1q_s8(const int8_t *base)
 {
@@ -50,8 +50,8 @@ int8x16_t test_vld1q_s8(const int8_t *base)
 
 // CHECK-LABEL: @test_vld1q_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[BASE:%.*]], align 2
-// CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i16>, ptr [[BASE:%.*]], align 2
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
 //
 int16x8_t test_vld1q_s16(const int16_t *base)
 {
@@ -64,8 +64,8 @@ int16x8_t test_vld1q_s16(const int16_t *base)
 
 // CHECK-LABEL: @test_vld1q_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[BASE:%.*]], align 4
-// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[BASE:%.*]], align 4
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
 int32x4_t test_vld1q_s32(const int32_t *base)
 {
@@ -78,8 +78,8 @@ int32x4_t test_vld1q_s32(const int32_t *base)
 
 // CHECK-LABEL: @test_vld1q_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[BASE:%.*]], align 1
-// CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr [[BASE:%.*]], align 1
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
 //
 uint8x16_t test_vld1q_u8(const uint8_t *base)
 {
@@ -92,8 +92,8 @@ uint8x16_t test_vld1q_u8(const uint8_t *base)
 
 // CHECK-LABEL: @test_vld1q_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[BASE:%.*]], align 2
-// CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i16>, ptr [[BASE:%.*]], align 2
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
 //
 uint16x8_t test_vld1q_u16(const uint16_t *base)
 {
@@ -106,8 +106,8 @@ uint16x8_t test_vld1q_u16(const uint16_t *base)
 
 // CHECK-LABEL: @test_vld1q_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[BASE:%.*]], align 4
-// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[BASE:%.*]], align 4
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
 uint32x4_t test_vld1q_u32(const uint32_t *base)
 {
@@ -120,10 +120,10 @@ uint32x4_t test_vld1q_u32(const uint32_t *base)
 
 // CHECK-LABEL: @test_vld1q_z_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <8 x half> @llvm.masked.load.v8f16.p0(ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP2]], <8 x half> zeroinitializer)
-// CHECK-NEXT:    ret <8 x half> [[TMP3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.masked.load.v8f16.p0(ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP1]], <8 x half> zeroinitializer)
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
 //
 float16x8_t test_vld1q_z_f16(const float16_t *base, mve_pred16_t p)
 {
@@ -136,10 +136,10 @@ float16x8_t test_vld1q_z_f16(const float16_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vld1q_z_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP2]], <4 x float> zeroinitializer)
-// CHECK-NEXT:    ret <4 x float> [[TMP3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP1]], <4 x float> zeroinitializer)
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
 //
 float32x4_t test_vld1q_z_f32(const float32_t *base, mve_pred16_t p)
 {
@@ -152,10 +152,10 @@ float32x4_t test_vld1q_z_f32(const float32_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vld1q_z_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP2]], <16 x i8> zeroinitializer)
-// CHECK-NEXT:    ret <16 x i8> [[TMP3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> zeroinitializer)
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
 //
 int8x16_t test_vld1q_z_s8(const int8_t *base, mve_pred16_t p)
 {
@@ -168,10 +168,10 @@ int8x16_t test_vld1q_z_s8(const int8_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vld1q_z_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP2]], <8 x i16> zeroinitializer)
-// CHECK-NEXT:    ret <8 x i16> [[TMP3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP1]], <8 x i16> zeroinitializer)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
 //
 int16x8_t test_vld1q_z_s16(const int16_t *base, mve_pred16_t p)
 {
@@ -184,10 +184,10 @@ int16x8_t test_vld1q_z_s16(const int16_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vld1q_z_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP2]], <4 x i32> zeroinitializer)
-// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP1]], <4 x i32> zeroinitializer)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 //
 int32x4_t test_vld1q_z_s32(const int32_t *base, mve_pred16_t p)
 {
@@ -200,10 +200,10 @@ int32x4_t test_vld1q_z_s32(const int32_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vld1q_z_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP2]], <16 x i8> zeroinitializer)
-// CHECK-NEXT:    ret <16 x i8> [[TMP3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> zeroinitializer)
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
 //
 uint8x16_t test_vld1q_z_u8(const uint8_t *base, mve_pred16_t p)
 {
@@ -216,10 +216,10 @@ uint8x16_t test_vld1q_z_u8(const uint8_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vld1q_z_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP2]], <8 x i16> zeroinitializer)
-// CHECK-NEXT:    ret <8 x i16> [[TMP3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP1]], <8 x i16> zeroinitializer)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
 //
 uint16x8_t test_vld1q_z_u16(const uint16_t *base, mve_pred16_t p)
 {
@@ -232,10 +232,10 @@ uint16x8_t test_vld1q_z_u16(const uint16_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vld1q_z_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP2]], <4 x i32> zeroinitializer)
-// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP1]], <4 x i32> zeroinitializer)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 //
 uint32x4_t test_vld1q_z_u32(const uint32_t *base, mve_pred16_t p)
 {
@@ -248,8 +248,8 @@ uint32x4_t test_vld1q_z_u32(const uint32_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vldrbq_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[BASE:%.*]], align 1
-// CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr [[BASE:%.*]], align 1
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
 //
 int8x16_t test_vldrbq_s8(const int8_t *base)
 {
@@ -258,9 +258,9 @@ int8x16_t test_vldrbq_s8(const int8_t *base)
 
 // CHECK-LABEL: @test_vldrbq_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[BASE:%.*]], align 1
-// CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i8> [[TMP1]] to <8 x i16>
-// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[BASE:%.*]], align 1
+// CHECK-NEXT:    [[TMP1:%.*]] = sext <8 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 //
 int16x8_t test_vldrbq_s16(const int8_t *base)
 {
@@ -269,9 +269,9 @@ int16x8_t test_vldrbq_s16(const int8_t *base)
 
 // CHECK-LABEL: @test_vldrbq_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr [[BASE:%.*]], align 1
-// CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i8> [[TMP1]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i8>, ptr [[BASE:%.*]], align 1
+// CHECK-NEXT:    [[TMP1:%.*]] = sext <4 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
 int32x4_t test_vldrbq_s32(const int8_t *base)
 {
@@ -280,8 +280,8 @@ int32x4_t test_vldrbq_s32(const int8_t *base)
 
 // CHECK-LABEL: @test_vldrbq_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[BASE:%.*]], align 1
-// CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr [[BASE:%.*]], align 1
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
 //
 uint8x16_t test_vldrbq_u8(const uint8_t *base)
 {
@@ -290,9 +290,9 @@ uint8x16_t test_vldrbq_u8(const uint8_t *base)
 
 // CHECK-LABEL: @test_vldrbq_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[BASE:%.*]], align 1
-// CHECK-NEXT:    [[TMP2:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16>
-// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[BASE:%.*]], align 1
+// CHECK-NEXT:    [[TMP1:%.*]] = zext <8 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 //
 uint16x8_t test_vldrbq_u16(const uint8_t *base)
 {
@@ -301,9 +301,9 @@ uint16x8_t test_vldrbq_u16(const uint8_t *base)
 
 // CHECK-LABEL: @test_vldrbq_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr [[BASE:%.*]], align 1
-// CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i8>, ptr [[BASE:%.*]], align 1
+// CHECK-NEXT:    [[TMP1:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
 uint32x4_t test_vldrbq_u32(const uint8_t *base)
 {
@@ -312,10 +312,10 @@ uint32x4_t test_vldrbq_u32(const uint8_t *base)
 
 // CHECK-LABEL: @test_vldrbq_z_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP2]], <16 x i8> zeroinitializer)
-// CHECK-NEXT:    ret <16 x i8> [[TMP3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> zeroinitializer)
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
 //
 int8x16_t test_vldrbq_z_s8(const int8_t *base, mve_pred16_t p)
 {
@@ -324,11 +324,11 @@ int8x16_t test_vldrbq_z_s8(const int8_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vldrbq_z_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr [[BASE:%.*]], i32 1, <8 x i1> [[TMP2]], <8 x i8> zeroinitializer)
-// CHECK-NEXT:    [[TMP4:%.*]] = sext <8 x i8> [[TMP3]] to <8 x i16>
-// CHECK-NEXT:    ret <8 x i16> [[TMP4]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr [[BASE:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i8> zeroinitializer)
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT:    ret <8 x i16> [[TMP3]]
 //
 int16x8_t test_vldrbq_z_s16(const int8_t *base, mve_pred16_t p)
 {
@@ -337,11 +337,11 @@ int16x8_t test_vldrbq_z_s16(const int8_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vldrbq_z_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr [[BASE:%.*]], i32 1, <4 x i1> [[TMP2]], <4 x i8> zeroinitializer)
-// CHECK-NEXT:    [[TMP4:%.*]] = sext <4 x i8> [[TMP3]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP4]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr [[BASE:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x i8> zeroinitializer)
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 //
 int32x4_t test_vldrbq_z_s32(const int8_t *base, mve_pred16_t p)
 {
@@ -350,10 +350,10 @@ int32x4_t test_vldrbq_z_s32(const int8_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vldrbq_z_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP2]], <16 x i8> zeroinitializer)
-// CHECK-NEXT:    ret <16 x i8> [[TMP3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> zeroinitializer)
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
 //
 uint8x16_t test_vldrbq_z_u8(const uint8_t *base, mve_pred16_t p)
 {
@@ -362,11 +362,11 @@ uint8x16_t test_vldrbq_z_u8(const uint8_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vldrbq_z_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr [[BASE:%.*]], i32 1, <8 x i1> [[TMP2]], <8 x i8> zeroinitializer)
-// CHECK-NEXT:    [[TMP4:%.*]] = zext <8 x i8> [[TMP3]] to <8 x i16>
-// CHECK-NEXT:    ret <8 x i16> [[TMP4]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr [[BASE:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i8> zeroinitializer)
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16>
+// CHECK-NEXT:    ret <8 x i16> [[TMP3]]
 //
 uint16x8_t test_vldrbq_z_u16(const uint8_t *base, mve_pred16_t p)
 {
@@ -375,11 +375,11 @@ uint16x8_t test_vldrbq_z_u16(const uint8_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vldrbq_z_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr [[BASE:%.*]], i32 1, <4 x i1> [[TMP2]], <4 x i8> zeroinitializer)
-// CHECK-NEXT:    [[TMP4:%.*]] = zext <4 x i8> [[TMP3]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP4]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr [[BASE:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x i8> zeroinitializer)
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 //
 uint32x4_t test_vldrbq_z_u32(const uint8_t *base, mve_pred16_t p)
 {
@@ -388,8 +388,8 @@ uint32x4_t test_vldrbq_z_u32(const uint8_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vldrhq_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x half>, ptr [[BASE:%.*]], align 2
-// CHECK-NEXT:    ret <8 x half> [[TMP1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x half>, ptr [[BASE:%.*]], align 2
+// CHECK-NEXT:    ret <8 x half> [[TMP0]]
 //
 float16x8_t test_vldrhq_f16(const float16_t *base)
 {
@@ -398,8 +398,8 @@ float16x8_t test_vldrhq_f16(const float16_t *base)
 
 // CHECK-LABEL: @test_vldrhq_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[BASE:%.*]], align 2
-// CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i16>, ptr [[BASE:%.*]], align 2
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
 //
 int16x8_t test_vldrhq_s16(const int16_t *base)
 {
@@ -408,9 +408,9 @@ int16x8_t test_vldrhq_s16(const int16_t *base)
 
 // CHECK-LABEL: @test_vldrhq_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[BASE:%.*]], align 2
-// CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i16>, ptr [[BASE:%.*]], align 2
+// CHECK-NEXT:    [[TMP1:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
 int32x4_t test_vldrhq_s32(const int16_t *base)
 {
@@ -419,8 +419,8 @@ int32x4_t test_vldrhq_s32(const int16_t *base)
 
 // CHECK-LABEL: @test_vldrhq_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[BASE:%.*]], align 2
-// CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i16>, ptr [[BASE:%.*]], align 2
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
 //
 uint16x8_t test_vldrhq_u16(const uint16_t *base)
 {
@@ -429,9 +429,9 @@ uint16x8_t test_vldrhq_u16(const uint16_t *base)
 
 // CHECK-LABEL: @test_vldrhq_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[BASE:%.*]], align 2
-// CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i16>, ptr [[BASE:%.*]], align 2
+// CHECK-NEXT:    [[TMP1:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
 uint32x4_t test_vldrhq_u32(const uint16_t *base)
 {
@@ -440,10 +440,10 @@ uint32x4_t test_vldrhq_u32(const uint16_t *base)
 
 // CHECK-LABEL: @test_vldrhq_z_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <8 x half> @llvm.masked.load.v8f16.p0(ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP2]], <8 x half> zeroinitializer)
-// CHECK-NEXT:    ret <8 x half> [[TMP3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x half> @llvm.masked.load.v8f16.p0(ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP1]], <8 x half> zeroinitializer)
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
 //
 float16x8_t test_vldrhq_z_f16(const float16_t *base, mve_pred16_t p)
 {
@@ -452,10 +452,10 @@ float16x8_t test_vldrhq_z_f16(const float16_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vldrhq_z_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP2]], <8 x i16> zeroinitializer)
-// CHECK-NEXT:    ret <8 x i16> [[TMP3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP1]], <8 x i16> zeroinitializer)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
 //
 int16x8_t test_vldrhq_z_s16(const int16_t *base, mve_pred16_t p)
 {
@@ -464,11 +464,11 @@ int16x8_t test_vldrhq_z_s16(const int16_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vldrhq_z_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[BASE:%.*]], i32 2, <4 x i1> [[TMP2]], <4 x i16> zeroinitializer)
-// CHECK-NEXT:    [[TMP4:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP4]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[BASE:%.*]], i32 2, <4 x i1> [[TMP1]], <4 x i16> zeroinitializer)
+// CHECK-NEXT:    [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 //
 int32x4_t test_vldrhq_z_s32(const int16_t *base, mve_pred16_t p)
 {
@@ -477,10 +477,10 @@ int32x4_t test_vldrhq_z_s32(const int16_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vldrhq_z_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP2]], <8 x i16> zeroinitializer)
-// CHECK-NEXT:    ret <8 x i16> [[TMP3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP1]], <8 x i16> zeroinitializer)
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
 //
 uint16x8_t test_vldrhq_z_u16(const uint16_t *base, mve_pred16_t p)
 {
@@ -489,11 +489,11 @@ uint16x8_t test_vldrhq_z_u16(const uint16_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vldrhq_z_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[BASE:%.*]], i32 2, <4 x i1> [[TMP2]], <4 x i16> zeroinitializer)
-// CHECK-NEXT:    [[TMP4:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32>
-// CHECK-NEXT:    ret <4 x i32> [[TMP4]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[BASE:%.*]], i32 2, <4 x i1> [[TMP1]], <4 x i16> zeroinitializer)
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 //
 uint32x4_t test_vldrhq_z_u32(const uint16_t *base, mve_pred16_t p)
 {
@@ -502,8 +502,8 @@ uint32x4_t test_vldrhq_z_u32(const uint16_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vldrwq_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[BASE:%.*]], align 4
-// CHECK-NEXT:    ret <4 x float> [[TMP1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[BASE:%.*]], align 4
+// CHECK-NEXT:    ret <4 x float> [[TMP0]]
 //
 float32x4_t test_vldrwq_f32(const float32_t *base)
 {
@@ -512,8 +512,8 @@ float32x4_t test_vldrwq_f32(const float32_t *base)
 
 // CHECK-LABEL: @test_vldrwq_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[BASE:%.*]], align 4
-// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[BASE:%.*]], align 4
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
 int32x4_t test_vldrwq_s32(const int32_t *base)
 {
@@ -522,8 +522,8 @@ int32x4_t test_vldrwq_s32(const int32_t *base)
 
 // CHECK-LABEL: @test_vldrwq_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[BASE:%.*]], align 4
-// CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[BASE:%.*]], align 4
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
 uint32x4_t test_vldrwq_u32(const uint32_t *base)
 {
@@ -532,10 +532,10 @@ uint32x4_t test_vldrwq_u32(const uint32_t *base)
 
 // CHECK-LABEL: @test_vldrwq_z_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP2]], <4 x float> zeroinitializer)
-// CHECK-NEXT:    ret <4 x float> [[TMP3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP1]], <4 x float> zeroinitializer)
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
 //
 float32x4_t test_vldrwq_z_f32(const float32_t *base, mve_pred16_t p)
 {
@@ -544,10 +544,10 @@ float32x4_t test_vldrwq_z_f32(const float32_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vldrwq_z_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP2]], <4 x i32> zeroinitializer)
-// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP1]], <4 x i32> zeroinitializer)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 //
 int32x4_t test_vldrwq_z_s32(const int32_t *base, mve_pred16_t p)
 {
@@ -556,10 +556,10 @@ int32x4_t test_vldrwq_z_s32(const int32_t *base, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vldrwq_z_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP2]], <4 x i32> zeroinitializer)
-// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP1]], <4 x i32> zeroinitializer)
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 //
 uint32x4_t test_vldrwq_z_u32(const uint32_t *base, mve_pred16_t p)
 {
@@ -680,9 +680,9 @@ void test_vst1q_u32(uint32_t *base, uint32x4_t value)
 
 // CHECK-LABEL: @test_vst1q_p_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
-// CHECK-NEXT:    call void @llvm.masked.store.v8f16.p0(<8 x half> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP2]])
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    call void @llvm.masked.store.v8f16.p0(<8 x half> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP1]])
 // CHECK-NEXT:    ret void
 //
 void test_vst1q_p_f16(float16_t *base, float16x8_t value, mve_pred16_t p)
@@ -696,9 +696,9 @@ void test_vst1q_p_f16(float16_t *base, float16x8_t value, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vst1q_p_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
-// CHECK-NEXT:    call void @llvm.masked.store.v4f32.p0(<4 x float> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP2]])
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    call void @llvm.masked.store.v4f32.p0(<4 x float> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP1]])
 // CHECK-NEXT:    ret void
 //
 void test_vst1q_p_f32(float32_t *base, float32x4_t value, mve_pred16_t p)
@@ -712,9 +712,9 @@ void test_vst1q_p_f32(float32_t *base, float32x4_t value, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vst1q_p_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
-// CHECK-NEXT:    call void @llvm.masked.store.v16i8.p0(<16 x i8> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP2]])
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    call void @llvm.masked.store.v16i8.p0(<16 x i8> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP1]])
 // CHECK-NEXT:    ret void
 //
 void test_vst1q_p_s8(int8_t *base, int8x16_t value, mve_pred16_t p)
@@ -728,9 +728,9 @@ void test_vst1q_p_s8(int8_t *base, int8x16_t value, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vst1q_p_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
-// CHECK-NEXT:    call void @llvm.masked.store.v8i16.p0(<8 x i16> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP2]])
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    call void @llvm.masked.store.v8i16.p0(<8 x i16> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP1]])
 // CHECK-NEXT:    ret void
 //
 void test_vst1q_p_s16(int16_t *base, int16x8_t value, mve_pred16_t p)
@@ -744,9 +744,9 @@ void test_vst1q_p_s16(int16_t *base, int16x8_t value, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vst1q_p_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
-// CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP2]])
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP1]])
 // CHECK-NEXT:    ret void
 //
 void test_vst1q_p_s32(int32_t *base, int32x4_t value, mve_pred16_t p)
@@ -760,9 +760,9 @@ void test_vst1q_p_s32(int32_t *base, int32x4_t value, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vst1q_p_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
-// CHECK-NEXT:    call void @llvm.masked.store.v16i8.p0(<16 x i8> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP2]])
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    call void @llvm.masked.store.v16i8.p0(<16 x i8> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP1]])
 // CHECK-NEXT:    ret void
 //
 void test_vst1q_p_u8(uint8_t *base, uint8x16_t value, mve_pred16_t p)
@@ -776,9 +776,9 @@ void test_vst1q_p_u8(uint8_t *base, uint8x16_t value, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vst1q_p_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
-// CHECK-NEXT:    call void @llvm.masked.store.v8i16.p0(<8 x i16> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP2]])
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    call void @llvm.masked.store.v8i16.p0(<8 x i16> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP1]])
 // CHECK-NEXT:    ret void
 //
 void test_vst1q_p_u16(uint16_t *base, uint16x8_t value, mve_pred16_t p)
@@ -792,9 +792,9 @@ void test_vst1q_p_u16(uint16_t *base, uint16x8_t value, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vst1q_p_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
-// CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP2]])
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP1]])
 // CHECK-NEXT:    ret void
 //
 void test_vst1q_p_u32(uint32_t *base, uint32x4_t value, mve_pred16_t p)
@@ -896,9 +896,9 @@ void test_vstrbq_u32(uint8_t *base, uint32x4_t value)
 
 // CHECK-LABEL: @test_vstrbq_p_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
-// CHECK-NEXT:    call void @llvm.masked.store.v16i8.p0(<16 x i8> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP2]])
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    call void @llvm.masked.store.v16i8.p0(<16 x i8> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP1]])
 // CHECK-NEXT:    ret void
 //
 void test_vstrbq_p_s8(int8_t *base, int8x16_t value, mve_pred16_t p)
@@ -913,9 +913,9 @@ void test_vstrbq_p_s8(int8_t *base, int8x16_t value, mve_pred16_t p)
 // CHECK-LABEL: @test_vstrbq_p_s16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <8 x i16> [[VALUE:%.*]] to <8 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]])
-// CHECK-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP0]], ptr [[BASE:%.*]], i32 1, <8 x i1> [[TMP3]])
+// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
+// CHECK-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP0]], ptr [[BASE:%.*]], i32 1, <8 x i1> [[TMP2]])
 // CHECK-NEXT:    ret void
 //
 void test_vstrbq_p_s16(int8_t *base, int16x8_t value, mve_pred16_t p)
@@ -930,9 +930,9 @@ void test_vstrbq_p_s16(int8_t *base, int16x8_t value, mve_pred16_t p)
 // CHECK-LABEL: @test_vstrbq_p_s32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <4 x i32> [[VALUE:%.*]] to <4 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP2]])
-// CHECK-NEXT:    call void @llvm.masked.store.v4i8.p0(<4 x i8> [[TMP0]], ptr [[BASE:%.*]], i32 1, <4 x i1> [[TMP3]])
+// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
+// CHECK-NEXT:    call void @llvm.masked.store.v4i8.p0(<4 x i8> [[TMP0]], ptr [[BASE:%.*]], i32 1, <4 x i1> [[TMP2]])
 // CHECK-NEXT:    ret void
 //
 void test_vstrbq_p_s32(int8_t *base, int32x4_t value, mve_pred16_t p)
@@ -946,9 +946,9 @@ void test_vstrbq_p_s32(int8_t *base, int32x4_t value, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vstrbq_p_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
-// CHECK-NEXT:    call void @llvm.masked.store.v16i8.p0(<16 x i8> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP2]])
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
+// CHECK-NEXT:    call void @llvm.masked.store.v16i8.p0(<16 x i8> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 1, <16 x i1> [[TMP1]])
 // CHECK-NEXT:    ret void
 //
 void test_vstrbq_p_u8(uint8_t *base, uint8x16_t value, mve_pred16_t p)
@@ -963,9 +963,9 @@ void test_vstrbq_p_u8(uint8_t *base, uint8x16_t value, mve_pred16_t p)
 // CHECK-LABEL: @test_vstrbq_p_u16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <8 x i16> [[VALUE:%.*]] to <8 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP2]])
-// CHECK-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP0]], ptr [[BASE:%.*]], i32 1, <8 x i1> [[TMP3]])
+// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
+// CHECK-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP0]], ptr [[BASE:%.*]], i32 1, <8 x i1> [[TMP2]])
 // CHECK-NEXT:    ret void
 //
 void test_vstrbq_p_u16(uint8_t *base, uint16x8_t value, mve_pred16_t p)
@@ -980,9 +980,9 @@ void test_vstrbq_p_u16(uint8_t *base, uint16x8_t value, mve_pred16_t p)
 // CHECK-LABEL: @test_vstrbq_p_u32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <4 x i32> [[VALUE:%.*]] to <4 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP2]])
-// CHECK-NEXT:    call void @llvm.masked.store.v4i8.p0(<4 x i8> [[TMP0]], ptr [[BASE:%.*]], i32 1, <4 x i1> [[TMP3]])
+// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
+// CHECK-NEXT:    call void @llvm.masked.store.v4i8.p0(<4 x i8> [[TMP0]], ptr [[BASE:%.*]], i32 1, <4 x i1> [[TMP2]])
 // CHECK-NEXT:    ret void
 //
 void test_vstrbq_p_u32(uint8_t *base, uint32x4_t value, mve_pred16_t p)
@@ -1068,9 +1068,9 @@ void test_vstrhq_u32(uint16_t *base, uint32x4_t value)
 
 // CHECK-LABEL: @test_vstrhq_p_f16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
-// CHECK-NEXT:    call void @llvm.masked.store.v8f16.p0(<8 x half> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP2]])
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    call void @llvm.masked.store.v8f16.p0(<8 x half> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP1]])
 // CHECK-NEXT:    ret void
 //
 void test_vstrhq_p_f16(float16_t *base, float16x8_t value, mve_pred16_t p)
@@ -1084,9 +1084,9 @@ void test_vstrhq_p_f16(float16_t *base, float16x8_t value, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vstrhq_p_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
-// CHECK-NEXT:    call void @llvm.masked.store.v8i16.p0(<8 x i16> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP2]])
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    call void @llvm.masked.store.v8i16.p0(<8 x i16> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP1]])
 // CHECK-NEXT:    ret void
 //
 void test_vstrhq_p_s16(int16_t *base, int16x8_t value, mve_pred16_t p)
@@ -1101,9 +1101,9 @@ void test_vstrhq_p_s16(int16_t *base, int16x8_t value, mve_pred16_t p)
 // CHECK-LABEL: @test_vstrhq_p_s32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <4 x i32> [[VALUE:%.*]] to <4 x i16>
-// CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP2]])
-// CHECK-NEXT:    call void @llvm.masked.store.v4i16.p0(<4 x i16> [[TMP0]], ptr [[BASE:%.*]], i32 2, <4 x i1> [[TMP3]])
+// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
+// CHECK-NEXT:    call void @llvm.masked.store.v4i16.p0(<4 x i16> [[TMP0]], ptr [[BASE:%.*]], i32 2, <4 x i1> [[TMP2]])
 // CHECK-NEXT:    ret void
 //
 void test_vstrhq_p_s32(int16_t *base, int32x4_t value, mve_pred16_t p)
@@ -1117,9 +1117,9 @@ void test_vstrhq_p_s32(int16_t *base, int32x4_t value, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vstrhq_p_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
-// CHECK-NEXT:    call void @llvm.masked.store.v8i16.p0(<8 x i16> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP2]])
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
+// CHECK-NEXT:    call void @llvm.masked.store.v8i16.p0(<8 x i16> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 2, <8 x i1> [[TMP1]])
 // CHECK-NEXT:    ret void
 //
 void test_vstrhq_p_u16(uint16_t *base, uint16x8_t value, mve_pred16_t p)
@@ -1134,9 +1134,9 @@ void test_vstrhq_p_u16(uint16_t *base, uint16x8_t value, mve_pred16_t p)
 // CHECK-LABEL: @test_vstrhq_p_u32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <4 x i32> [[VALUE:%.*]] to <4 x i16>
-// CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP2]])
-// CHECK-NEXT:    call void @llvm.masked.store.v4i16.p0(<4 x i16> [[TMP0]], ptr [[BASE:%.*]], i32 2, <4 x i1> [[TMP3]])
+// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
+// CHECK-NEXT:    call void @llvm.masked.store.v4i16.p0(<4 x i16> [[TMP0]], ptr [[BASE:%.*]], i32 2, <4 x i1> [[TMP2]])
 // CHECK-NEXT:    ret void
 //
 void test_vstrhq_p_u32(uint16_t *base, uint32x4_t value, mve_pred16_t p)
@@ -1192,9 +1192,9 @@ void test_vstrwq_u32(uint32_t *base, uint32x4_t value)
 
 // CHECK-LABEL: @test_vstrwq_p_f32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
-// CHECK-NEXT:    call void @llvm.masked.store.v4f32.p0(<4 x float> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP2]])
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    call void @llvm.masked.store.v4f32.p0(<4 x float> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP1]])
 // CHECK-NEXT:    ret void
 //
 void test_vstrwq_p_f32(float32_t *base, float32x4_t value, mve_pred16_t p)
@@ -1208,9 +1208,9 @@ void test_vstrwq_p_f32(float32_t *base, float32x4_t value, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vstrwq_p_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
-// CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP2]])
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP1]])
 // CHECK-NEXT:    ret void
 //
 void test_vstrwq_p_s32(int32_t *base, int32x4_t value, mve_pred16_t p)
@@ -1224,9 +1224,9 @@ void test_vstrwq_p_s32(int32_t *base, int32x4_t value, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vstrwq_p_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
-// CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP2]])
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
+// CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[VALUE:%.*]], ptr [[BASE:%.*]], i32 4, <4 x i1> [[TMP1]])
 // CHECK-NEXT:    ret void
 //
 void test_vstrwq_p_u32(uint32_t *base, uint32x4_t value, mve_pred16_t p)
diff --git a/clang/test/CodeGen/attr-target-mv.c b/clang/test/CodeGen/attr-target-mv.c
index 607e3e4..b8807dd 100644
--- a/clang/test/CodeGen/attr-target-mv.c
+++ b/clang/test/CodeGen/attr-target-mv.c
@@ -31,6 +31,7 @@ int __attribute__((target("arch=pantherlake"))) foo(void) {return 25;}
 int __attribute__((target("arch=clearwaterforest"))) foo(void) {return 26;}
 int __attribute__((target("arch=diamondrapids"))) foo(void) {return 27;}
 int __attribute__((target("arch=wildcatlake"))) foo(void) {return 28;}
+int __attribute__((target("arch=novalake"))) foo(void) {return 29;}
 int __attribute__((target("default"))) foo(void) { return 2; }
 
 int bar(void) {
@@ -206,6 +207,8 @@ void calls_pr50025c(void) { pr50025c(); }
 // ITANIUM: ret i32 27
 // ITANIUM: define{{.*}} i32 @foo.arch_wildcatlake()
 // ITANIUM: ret i32 28
+// ITANIUM: define{{.*}} i32 @foo.arch_novalake()
+// ITANIUM: ret i32 29
 // ITANIUM: define{{.*}} i32 @foo()
 // ITANIUM: ret i32 2
 // ITANIUM: define{{.*}} i32 @bar()
@@ -267,6 +270,8 @@ void calls_pr50025c(void) { pr50025c(); }
 // WINDOWS: ret i32 27
 // WINDOWS: define dso_local i32 @foo.arch_wildcatlake()
 // WINDOWS: ret i32 28
+// WINDOWS: define dso_local i32 @foo.arch_novalake()
+// WINDOWS: ret i32 29
 // WINDOWS: define dso_local i32 @foo()
 // WINDOWS: ret i32 2
 // WINDOWS: define dso_local i32 @bar()
diff --git a/clang/test/CodeGen/target-builtin-noerror.c b/clang/test/CodeGen/target-builtin-noerror.c
index 2c0d83c..47d5ae5 100644
--- a/clang/test/CodeGen/target-builtin-noerror.c
+++ b/clang/test/CodeGen/target-builtin-noerror.c
@@ -179,6 +179,7 @@ void verifycpustrings(void) {
   (void)__builtin_cpu_is("clearwaterforest");
   (void)__builtin_cpu_is("pantherlake");
   (void)__builtin_cpu_is("wildcatlake");
+  (void)__builtin_cpu_is("novalake");
   (void)__builtin_cpu_is("haswell");
   (void)__builtin_cpu_is("icelake-client");
   (void)__builtin_cpu_is("icelake-server");
diff --git a/clang/test/Driver/x86-march.c b/clang/test/Driver/x86-march.c
index 24404ff..15f8254 100644
--- a/clang/test/Driver/x86-march.c
+++ b/clang/test/Driver/x86-march.c
@@ -120,6 +120,10 @@
 // RUN:   | FileCheck %s -check-prefix=wildcatlake
 // wildcatlake: "-target-cpu" "wildcatlake"
 //
+// RUN: %clang --target=x86_64 -c -### %s -march=novalake 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=novalake
+// novalake: "-target-cpu" "novalake"
+//
 // RUN: %clang --target=x86_64 -c -### %s -march=clearwaterforest 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=clearwaterforest
 // clearwaterforest: "-target-cpu" "clearwaterforest"
diff --git a/clang/test/FixIt/fixit-constrained-structured-binding.cpp b/clang/test/FixIt/fixit-constrained-structured-binding.cpp
index 3f21c1d..bfb004f 100644
--- a/clang/test/FixIt/fixit-constrained-structured-binding.cpp
+++ b/clang/test/FixIt/fixit-constrained-structured-binding.cpp
@@ -14,20 +14,20 @@ T get_T();
 
 void use() {
   UnaryC auto [a, b] = get_S();
-  // CHECK: error: decomposition declaration cannot be declared with constrained 'auto'
+  // CHECK: error: structured binding declaration cannot be declared with constrained 'auto'
   // CHECK: fix-it:{{.*}}:{16:3-16:10}:""
   BinaryC<int> auto [c, d] = get_S();
-  // CHECK: error: decomposition declaration cannot be declared with constrained 'auto'
+  // CHECK: error: structured binding declaration cannot be declared with constrained 'auto'
   // CHECK: fix-it:{{.*}}:{19:3-19:16}:""
 }
 
 template<typename T>
 void TemplUse() {
   UnaryC auto [a, b] = get_T<T>();
-  // CHECK: error: decomposition declaration cannot be declared with constrained 'auto'
+  // CHECK: error: structured binding declaration cannot be declared with constrained 'auto'
   // XCHECK: fix-it:{{.*}}:{26:3-26:10}:""
   BinaryC<T> auto [c, d] = get_T<T>();
-  // CHECK: error: decomposition declaration cannot be declared with constrained 'auto'
+  // CHECK: error: structured binding declaration cannot be declared with constrained 'auto'
   // XCHECK: fix-it:{{.*}}:{29:3-29:14}:""
 }
 
diff --git a/clang/test/Misc/target-invalid-cpu-note/x86.c b/clang/test/Misc/target-invalid-cpu-note/x86.c
index 3906318..4a70e9b 100644
--- a/clang/test/Misc/target-invalid-cpu-note/x86.c
+++ b/clang/test/Misc/target-invalid-cpu-note/x86.c
@@ -64,6 +64,7 @@
 // X86-SAME: {{^}}, gracemont
 // X86-SAME: {{^}}, pantherlake
 // X86-SAME: {{^}}, wildcatlake
+// X86-SAME: {{^}}, novalake
 // X86-SAME: {{^}}, sierraforest
 // X86-SAME: {{^}}, grandridge
 // X86-SAME: {{^}}, graniterapids
@@ -152,6 +153,7 @@
 // X86_64-SAME: {{^}}, gracemont
 // X86_64-SAME: {{^}}, pantherlake
 // X86_64-SAME: {{^}}, wildcatlake
+// X86_64-SAME: {{^}}, novalake
 // X86_64-SAME: {{^}}, sierraforest
 // X86_64-SAME: {{^}}, grandridge
 // X86_64-SAME: {{^}}, graniterapids
@@ -249,6 +251,7 @@
 // TUNE_X86-SAME: {{^}}, gracemont
 // TUNE_X86-SAME: {{^}}, pantherlake
 // TUNE_X86-SAME: {{^}}, wildcatlake
+// TUNE_X86-SAME: {{^}}, novalake
 // TUNE_X86-SAME: {{^}}, sierraforest
 // TUNE_X86-SAME: {{^}}, grandridge
 // TUNE_X86-SAME: {{^}}, graniterapids
@@ -353,6 +356,7 @@
 // TUNE_X86_64-SAME: {{^}}, gracemont
 // TUNE_X86_64-SAME: {{^}}, pantherlake
 // TUNE_X86_64-SAME: {{^}}, wildcatlake
+// TUNE_X86_64-SAME: {{^}}, novalake
 // TUNE_X86_64-SAME: {{^}}, sierraforest
 // TUNE_X86_64-SAME: {{^}}, grandridge
 // TUNE_X86_64-SAME: {{^}}, graniterapids
diff --git a/clang/test/PCH/cxx1z-decomposition.cpp b/clang/test/PCH/cxx1z-decomposition.cpp
index 914ce80..340d5ea 100644
--- a/clang/test/PCH/cxx1z-decomposition.cpp
+++ b/clang/test/PCH/cxx1z-decomposition.cpp
@@ -22,7 +22,7 @@ constexpr int foo(Q &&q) {
   return a * 10 + b;
 }
 
-auto [noinit]; // expected-error{{decomposition declaration '[noinit]' requires an initializer}}
+auto [noinit]; // expected-error{{structured binding declaration '[noinit]' requires an initializer}}
 
 #else
 
@@ -31,7 +31,7 @@ int k = decomp(arr);
 
 static_assert(foo({1, 2}) == 12);
 
-// expected-error@15 {{cannot decompose non-class, non-array type 'const int'}}
+// expected-error@15 {{cannot bind non-class, non-array type 'const int'}}
 int z = decomp(10); // expected-note {{instantiation of}}
 
 #endif
diff --git a/clang/test/Parser/cxx1z-class-template-argument-deduction.cpp b/clang/test/Parser/cxx1z-class-template-argument-deduction.cpp
index 9d27f83..ece00a0 100644
--- a/clang/test/Parser/cxx1z-class-template-argument-deduction.cpp
+++ b/clang/test/Parser/cxx1z-class-template-argument-deduction.cpp
@@ -158,7 +158,7 @@ namespace decl {
   A arr[3] = 0; // expected-error {{cannot form array of deduced class template specialization type}}
   A F::*pm = 0; // expected-error {{cannot form pointer to deduced class template specialization type}}
   A (*fp)() = 0; // expected-error {{cannot form function returning deduced class template specialization type}}
-  A [x, y] = 0; // expected-error {{cannot be declared with type 'A'}} expected-error {{type 'A<int>' decomposes into 0 elements, but 2 names were provided}}
+  A [x, y] = 0; // expected-error {{cannot be declared with type 'A'}} expected-error {{type 'A<int>' binds to 0 elements, but 2 names were provided}}
 }
 
 namespace typename_specifier {
@@ -185,7 +185,7 @@ namespace typename_specifier {
   typename ::A arr[3] = 0; // expected-error {{cannot form array of deduced class template specialization type}}
   typename ::A F::*pm = 0; // expected-error {{cannot form pointer to deduced class template specialization type}}
   typename ::A (*fp)() = 0; // expected-error {{cannot form function returning deduced class template specialization type}}
-  typename ::A [x, y] = 0; // expected-error {{cannot be declared with type 'typename ::A'}} expected-error {{type 'typename ::A<int>' (aka 'A<int>') decomposes into 0}}
+  typename ::A [x, y] = 0; // expected-error {{cannot be declared with type 'typename ::A'}} expected-error {{type 'typename ::A<int>' (aka 'A<int>') binds to 0}}
 
   struct X { template<typename T> struct A { A(T); }; }; // expected-note 8{{declared here}}
 
@@ -208,7 +208,7 @@ namespace typename_specifier {
     {typename T::A arr[3] = 0;} // expected-error {{refers to class template member}}
     {typename T::A F::*pm = 0;} // expected-error {{refers to class template member}}
     {typename T::A (*fp)() = 0;} // expected-error {{refers to class template member}}
-    {typename T::A [x, y] = 0;} // expected-error {{cannot be declared with type 'typename T::A'}} expected-error {{type 'typename typename_specifier::X::A<int>' (aka 'typename_specifier::X::A<int>') decomposes into 0}}
+    {typename T::A [x, y] = 0;} // expected-error {{cannot be declared with type 'typename T::A'}} expected-error {{type 'typename typename_specifier::X::A<int>' (aka 'typename_specifier::X::A<int>') binds to 0}}
   }
   template void f<X>(); // expected-note {{instantiation of}}
 
diff --git a/clang/test/Parser/cxx1z-decomposition.cpp b/clang/test/Parser/cxx1z-decomposition.cpp
index 274e24e..21c9419 100644
--- a/clang/test/Parser/cxx1z-decomposition.cpp
+++ b/clang/test/Parser/cxx1z-decomposition.cpp
@@ -5,7 +5,7 @@
 
 struct S { int a, b, c; }; // expected-note 2 {{'S::a' declared here}}
 
-// A simple-declaration can be a decompsition declaration.
+// A simple-declaration can be a structured binding declaration.
 namespace SimpleDecl {
   auto [a_x, b_x, c_x] = S();
 
@@ -19,7 +19,7 @@ namespace SimpleDecl {
   }
 }
 
-// A for-range-declaration can be a decomposition declaration.
+// A for-range-declaration can be a structured binding declaration.
 namespace ForRangeDecl {
   extern S arr[10];
   void h() {
@@ -100,12 +100,12 @@ namespace BadSpecifiers {
   inline auto &[k] = n; // expected-error {{cannot be declared 'inline'}}
 
   const int K = 5;
-  auto ([c]) = s; // expected-error {{decomposition declaration cannot be declared with parentheses}}
+  auto ([c]) = s; // expected-error {{structured binding declaration cannot be declared with parentheses}}
   void g() {
     // defining-type-specifiers other than cv-qualifiers and 'auto'
     S [a] = s; // expected-error {{cannot be declared with type 'S'}}
     decltype(auto) [b] = s; // expected-error {{cannot be declared with type 'decltype(auto)'}}
-    auto ([c2]) = s; // cxx17-error {{decomposition declaration cannot be declared with parenthese}} \
+    auto ([c2]) = s; // cxx17-error {{structured binding declaration cannot be declared with parenthese}} \
                      // post2b-error {{use of undeclared identifier 'c2'}} \
                      // post2b-error {{expected body of lambda expression}} \
 
@@ -114,7 +114,7 @@ namespace BadSpecifiers {
     auto [e][1] = s; // expected-error {{expected ';'}} expected-error {{requires an initializer}}
 
     // FIXME: This should fire the 'misplaced array declarator' diagnostic.
-    int [K] arr = {0}; // expected-error {{expected ';'}} expected-error {{cannot be declared with type 'int'}} expected-error {{decomposition declaration '[K]' requires an initializer}}
+    int [K] arr = {0}; // expected-error {{expected ';'}} expected-error {{cannot be declared with type 'int'}} expected-error {{structured binding declaration '[K]' requires an initializer}}
     int [5] arr = {0}; // expected-error {{place the brackets after the name}}
 
     auto *[f] = s; // expected-error {{cannot be declared with type 'auto *'}} expected-error {{incompatible initializer}}
@@ -145,14 +145,14 @@ namespace MultiDeclarator {
 namespace Template {
   int n[3];
   // Structured binding template is not allowed.
-  template<typename T> auto [a, b, c] = n; // expected-error {{decomposition declaration cannot be a template}}
+  template<typename T> auto [a, b, c] = n; // expected-error {{structured binding declaration cannot be a template}}
 }
 
 namespace Init {
   void f() {
     int arr[1];
     struct S { int n; };
-    auto &[bad1]; // expected-error {{decomposition declaration '[bad1]' requires an initializer}}
+    auto &[bad1]; // expected-error {{structured binding declaration '[bad1]' requires an initializer}}
     const auto &[bad2](S{}, S{}); // expected-error {{initializer for variable '[bad2]' with type 'const auto &' contains multiple expressions}}
     const auto &[bad3](); // expected-error {{expected expression}}
     auto &[good1] = arr;
diff --git a/clang/test/Parser/cxx2c-binding-pack.cpp b/clang/test/Parser/cxx2c-binding-pack.cpp
index 0daaad3..40d843e 100644
--- a/clang/test/Parser/cxx2c-binding-pack.cpp
+++ b/clang/test/Parser/cxx2c-binding-pack.cpp
@@ -12,7 +12,7 @@ void decompose_array() {
   auto [...] = arr; // #2
                     // expected-error@#2{{expected identifier}}
                     // expected-error@#2{{{no names were provided}}}
-                    // expected-warning@#2{{{does not allow a decomposition group to be empty}}}
+                    // expected-warning@#2{{{does not allow a structured binding group to be empty}}}
   auto [a, ..., b] = arr; // #3
                           // expected-error@#3{{expected identifier}}
                           // expected-error@#3{{{only 1 name was provided}}}
diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c
index e2f4bcb..dac3649 100644
--- a/clang/test/Preprocessor/predefined-arch-macros.c
+++ b/clang/test/Preprocessor/predefined-arch-macros.c
@@ -2529,9 +2529,12 @@
 // RUN: %clang -march=wildcatlake -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M32,CHECK_ARLS_M32,CHECK_NKL_M32
+// RUN: %clang -march=novalake -m32 -E -dM %s -o - 2>&1 \
+// RUN:     -target i386-unknown-linux \
+// RUN:   | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M32,CHECK_ARLS_M32,CHECK_NVL_M32,CHECK_NKL_M32
 // RUN: %clang -march=clearwaterforest -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
-// RUN:   | FileCheck -match-full-lines %s -check-prefixes=CHECK_SRF_M32,CHECK_ARLS_M32,CHECK_CWF_M32,CHECK_NKL_M32
+// RUN:   | FileCheck -match-full-lines %s -check-prefixes=CHECK_SRF_M32,CHECK_ARLS_M32,CHECK_NVL_M32,CHECK_UMSR_M32,CHECK_NKL_M32
 // CHECK_ARL_M32: #define __ADX__ 1
 // CHECK_ARL_M32: #define __AES__ 1
 // CHECK_ARL_M32: #define __AVX2__ 1
@@ -2571,7 +2574,7 @@
 // CHECK_ARL_M32: #define __POPCNT__ 1
 // CHECK_ARL_M32-NOT: #define __PREFETCHI__ 1
 // CHECK_ARLS_M32-NOT: #define __PREFETCHI__ 1
-// CHECK_CWF_M32: #define __PREFETCHI__ 1
+// CHECK_NVL_M32: #define __PREFETCHI__ 1
 // CHECK_ARL_M32: #define __PRFCHW__ 1
 // CHECK_ARL_M32: #define __PTWRITE__ 1
 // CHECK_ARL_M32-NOT: #define __RAOINT__ 1
@@ -2598,7 +2601,7 @@
 // CHECK_ARL_M32: #define __UINTR__ 1
 // CHECK_ARL_M32-NOT: #define __USERMSR__ 1
 // CHECK_ARLS_M32-NOT: #define __USERMSR__ 1
-// CHECK_CWF_M32: #define __USERMSR__ 1
+// CHECK_UMSR_M32: #define __USERMSR__ 1
 // CHECK_ARL_M32: #define __VAES__ 1
 // CHECK_ARL_M32: #define __VPCLMULQDQ__ 1
 // CHECK_ARL_M32: #define __WAITPKG__ 1
@@ -2636,9 +2639,12 @@
 // RUN: %clang -march=wildcatlake -m64 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M64,CHECK_ARLS_M64,CHECK_NKL_M64
+// RUN: %clang -march=novalake -m64 -E -dM %s -o - 2>&1 \
+// RUN:     -target i386-unknown-linux \
+// RUN:   | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M64,CHECK_ARLS_M64,CHECK_NVL_M64,CHECK_NKL_M64
 // RUN: %clang -march=clearwaterforest -m64 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
-// RUN:   | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M64,CHECK_SRF_M64,CHECK_ARLS_M64,CHECK_CWF_M64,CHECK_NKL_M64
+// RUN:   | FileCheck -match-full-lines %s -check-prefixes=CHECK_ARL_M64,CHECK_SRF_M64,CHECK_ARLS_M64,CHECK_NVL_M64,CHECK_UMSR_M64,CHECK_NKL_M64
 // CHECK_ARL_M64: #define __ADX__ 1
 // CHECK_ARL_M64: #define __AES__ 1
 // CHECK_ARL_M64: #define __AVX2__ 1
@@ -2678,7 +2684,7 @@
 // CHECK_ARL_M64: #define __POPCNT__ 1
 // CHECK_ARL_M64-NOT: #define __PREFETCHI__ 1
 // CHECK_ARLS_M64-NOT: #define __PREFETCHI__ 1
-// CHECK_CWF_M64: #define __PREFETCHI__ 1
+// CHECK_NVL_M64: #define __PREFETCHI__ 1
 // CHECK_ARL_M64: #define __PRFCHW__ 1
 // CHECK_ARL_M64: #define __PTWRITE__ 1
 // CHECK_ARL_M64-NOT: #define __RAOINT__ 1
@@ -2706,7 +2712,7 @@
 // CHECK_ARL_M64: #define __UINTR__ 1
 // CHECK_ARL_M64-NOT: #define __USERMSR__ 1
 // CHECK_ARLS_M64-NOT: #define __USERMSR__ 1
-// CHECK_CWF_M64: #define __USERMSR__ 1
+// CHECK_UMSR_M64: #define __USERMSR__ 1
 // CHECK_ARL_M64: #define __VAES__ 1
 // CHECK_ARL_M64: #define __VPCLMULQDQ__ 1
 // CHECK_ARL_M64: #define __WAITPKG__ 1
diff --git a/clang/test/Sema/attr-cpuspecific-cpus.c b/clang/test/Sema/attr-cpuspecific-cpus.c
index 0874d0c..2360d86 100644
--- a/clang/test/Sema/attr-cpuspecific-cpus.c
+++ b/clang/test/Sema/attr-cpuspecific-cpus.c
@@ -88,3 +88,4 @@ ATTR(cpu_specific(gracemont)) void CPU38(void){}
 ATTR(cpu_specific(pantherlake)) void CPU39(void){}
 ATTR(cpu_specific(clearwaterforest)) void CPU40(void){}
 ATTR(cpu_specific(wildcatlake)) void CPU41(void){}
+ATTR(cpu_specific(novalake)) void CPU42(void){}
diff --git a/clang/test/SemaCXX/builtin-structured-binding-size.cpp b/clang/test/SemaCXX/builtin-structured-binding-size.cpp
index bcd13a6..a8f6705 100644
--- a/clang/test/SemaCXX/builtin-structured-binding-size.cpp
+++ b/clang/test/SemaCXX/builtin-structured-binding-size.cpp
@@ -40,8 +40,8 @@ static_assert(__builtin_structured_binding_size(S5) == 2);
 // expected-error@-1 {{static assertion failed due to requirement '__builtin_structured_binding_size(S5) == 2'}} \
 // expected-note@-1 {{expression evaluates to '1 == 2'}}
 static_assert(__builtin_structured_binding_size(S6) == 2);
-// expected-error@-1 {{cannot decompose class type 'S6' because it has an anonymous union member}} \
-// expected-error@-1 {{type 'S6' cannot be decomposed}} \
+// expected-error@-1 {{cannot bind class type 'S6' because it has an anonymous union member}} \
+// expected-error@-1 {{type 'S6' cannot be bound}} \
 // expected-error@-1 {{static assertion expression is not an integral constant expression}} \
 // expected-note@#note-anon-union {{declared here}}
 static_assert(__builtin_structured_binding_size(S7) == 1);
@@ -49,15 +49,15 @@ static_assert(__builtin_structured_binding_size(S7) == 1);
 
 static_assert(__builtin_structured_binding_size(SD) == 1);
 static_assert(__builtin_structured_binding_size(SE1) == 1);
-// expected-error@-1 {{cannot decompose class type 'SE1': both it and its base class 'S1' have non-static data members}} \
-// expected-error@-1 {{type 'SE1' cannot be decomposed}} \
+// expected-error@-1 {{cannot bind class type 'SE1': both it and its base class 'S1' have non-static data members}} \
+// expected-error@-1 {{type 'SE1' cannot be bound}} \
 // expected-error@-1 {{static assertion expression is not an integral constant expression}}
 
 static_assert(__builtin_structured_binding_size(U1) == 0);
-// expected-error@-1 {{type 'U1' cannot be decomposed}} \
+// expected-error@-1 {{type 'U1' cannot be bound}} \
 // expected-error@-1 {{static assertion expression is not an integral constant expression}}
 static_assert(__builtin_structured_binding_size(U2) == 0);
-// expected-error@-1 {{type 'U2' cannot be decomposed}} \
+// expected-error@-1 {{type 'U2' cannot be bound}} \
 // expected-error@-1 {{static assertion expression is not an integral constant expression}}
 
 
@@ -75,7 +75,7 @@ static_assert(__builtin_structured_binding_size(decltype(__builtin_complex(0., 0
 
 int VLASize; // expected-note {{declared here}}
 static_assert(__builtin_structured_binding_size(int[VLASize]) == 42);
-// expected-error@-1 {{type 'int[VLASize]' cannot be decomposed}} \
+// expected-error@-1 {{type 'int[VLASize]' cannot be bound}} \
 // expected-warning@-1 {{variable length arrays in C++ are a Clang extension}} \
 // expected-note@-1 {{read of non-const variable 'VLASize' is not allowed in a constant expression}} \
 // expected-error@-1 {{static assertion expression is not an integral constant expression}}
@@ -84,10 +84,10 @@ static_assert(__builtin_structured_binding_size(int[VLASize]) == 42);
 struct Incomplete; // expected-note {{forward declaration of 'Incomplete'}}
 static_assert(__builtin_structured_binding_size(Incomplete) == 1);
 // expected-error@-1 {{incomplete type 'Incomplete' where a complete type is required}} \
-// expected-error@-1 {{type 'Incomplete' cannot be decomposed}} \
+// expected-error@-1 {{type 'Incomplete' cannot be bound}} \
 // expected-error@-1 {{static assertion expression is not an integral constant expression}}
 static_assert(__builtin_structured_binding_size(Incomplete[]) == 1);
-// expected-error@-1 {{type 'Incomplete[]' cannot be decomposed}} \
+// expected-error@-1 {{type 'Incomplete[]' cannot be bound}} \
 // expected-error@-1 {{static assertion expression is not an integral constant expression}}
 static_assert(__builtin_structured_binding_size(Incomplete[0]) == 0);
 static_assert(__builtin_structured_binding_size(Incomplete[1]) == 1);
@@ -97,12 +97,12 @@ static_assert(__builtin_structured_binding_size(Incomplete[42]) == 42);
 static_assert(__builtin_structured_binding_size(P1) == 0);
 // expected-error@-1 {{static assertion failed due to requirement '__builtin_structured_binding_size(P1) == 0'}} \
 // expected-note@-1 {{expression evaluates to '1 == 0'}} \
-// expected-error@-1 {{cannot decompose private member 'a' of 'P1}} \
+// expected-error@-1 {{cannot bind private member 'a' of 'P1}} \
 // expected-note@#note-private {{implicitly declared private here}}
 
 
 void func(int array[14], int x = __builtin_structured_binding_size(decltype(array)));
-//expected-error@-1 {{type 'decltype(array)' (aka 'int *') cannot be decomposed}}
+//expected-error@-1 {{type 'decltype(array)' (aka 'int *') cannot be bound}}
 
 struct SM {
     static int array[14];
@@ -115,7 +115,7 @@ struct T {
 };
 
 T<int> t1;
-// expected-error@#tpl-1 {{type 'int' cannot be decomposed}} \
+// expected-error@#tpl-1 {{type 'int' cannot be bound}} \
 // expected-error@#tpl-1 {{non-type template argument is not a constant expression}} \
 // expected-note@-1 {{in instantiation of default argument for 'T<int>' required here}} \
 // expected-note@-1 {{while checking a default template argument used here}} \
@@ -183,8 +183,8 @@ static_assert(!is_destructurable<T0&>);
 static_assert(__builtin_structured_binding_size(T1) == 1);
 static_assert(__builtin_structured_binding_size(T42) == 42);
 static_assert(__builtin_structured_binding_size(TSizeError) == 42);
-// expected-error@-1 {{cannot decompose this type; 'std::tuple_size<TSizeError>::value' is not a valid integral constant expression}} \
-// expected-error@-1 {{type 'TSizeError' cannot be decomposed}} \
+// expected-error@-1 {{cannot bind this type; 'std::tuple_size<TSizeError>::value' is not a valid integral constant expression}} \
+// expected-error@-1 {{type 'TSizeError' cannot be bound}} \
 // expected-error@-1 {{static assertion expression is not an integral constant expression}}
 static_assert(!is_destructurable<TSizeError>);
 }
@@ -195,7 +195,7 @@ struct S {
   int y;
   static_assert(__builtin_structured_binding_size(S) == 2);
   //expected-error@-1 {{incomplete type 'S' where a complete type is required}} \
-  // expected-error@-1 {{type 'S' cannot be decomposed}} \
+  // expected-error@-1 {{type 'S' cannot be bound}} \
   // expected-error@-1 {{static assertion expression is not an integral constant expression}} \
   // expected-note@-4 {{definition of 'S' is not complete until the closing '}'}}
 };
@@ -228,20 +228,20 @@ static_assert(__is_same_as(tag_of_t<S1>, int));
 
 static_assert(__is_same_as(tag_of_t<int>, int)); // error
 // expected-error@-1 {{constraints not satisfied for alias template 'tag_of_t' [with T = int]}}
-// expected-note@#tag-of-constr {{because substituted constraint expression is ill-formed: type 'int' cannot be decomposed}}
+// expected-note@#tag-of-constr {{because substituted constraint expression is ill-formed: type 'int' cannot be bound}}
 
 struct MinusOne;
 template <> struct ::std::tuple_size<MinusOne> {
   static constexpr int value = -1;
 };
 int minus_one = __builtin_structured_binding_size(MinusOne);
-// expected-error@-1 {{cannot decompose this type; 'std::tuple_size<MinusOne>::value' is not a valid size: -1}}
-// expected-error@-2 {{type 'MinusOne' cannot be decomposed}}
+// expected-error@-1 {{cannot bind this type; 'std::tuple_size<MinusOne>::value' is not a valid size: -1}}
+// expected-error@-2 {{type 'MinusOne' cannot be bound}}
 
 struct UintMax;
 template <> struct ::std::tuple_size<UintMax> {
   static constexpr unsigned value = -1;
 };
 int uint_max = __builtin_structured_binding_size(UintMax);
-// expected-error@-1 {{cannot decompose this type; 'std::tuple_size<UintMax>::value' is not a valid size: 4294967295}}
-// expected-error@-2 {{type 'UintMax' cannot be decomposed}}
+// expected-error@-1 {{cannot bind this type; 'std::tuple_size<UintMax>::value' is not a valid size: 4294967295}}
+// expected-error@-2 {{type 'UintMax' cannot be bound}}
diff --git a/clang/test/SemaCXX/cxx17-compat.cpp b/clang/test/SemaCXX/cxx17-compat.cpp
index 99e41d8..1c9060d 100644
--- a/clang/test/SemaCXX/cxx17-compat.cpp
+++ b/clang/test/SemaCXX/cxx17-compat.cpp
@@ -76,18 +76,18 @@ struct ConstexprVirtual {
 struct C { int x, y, z; };
 static auto [cx, cy, cz] = C();
 #if __cplusplus <= 201703L
-    // expected-warning@-2 {{decomposition declaration declared 'static' is a C++20 extension}}
+    // expected-warning@-2 {{structured binding declaration declared 'static' is a C++20 extension}}
 #else
-    // expected-warning@-4 {{decomposition declaration declared 'static' is incompatible with C++ standards before C++20}}
+    // expected-warning@-4 {{structured binding declaration declared 'static' is incompatible with C++ standards before C++20}}
 #endif
 void f() {
   static thread_local auto [cx, cy, cz] = C();
 #if __cplusplus <= 201703L
-    // expected-warning@-2 {{decomposition declaration declared 'static' is a C++20 extension}}
-    // expected-warning@-3 {{decomposition declaration declared 'thread_local' is a C++20 extension}}
+    // expected-warning@-2 {{structured binding declaration declared 'static' is a C++20 extension}}
+    // expected-warning@-3 {{structured binding declaration declared 'thread_local' is a C++20 extension}}
 #else
-    // expected-warning@-5 {{decomposition declaration declared 'static' is incompatible with C++ standards before C++20}}
-    // expected-warning@-6 {{decomposition declaration declared 'thread_local' is incompatible with C++ standards before C++20}}
+    // expected-warning@-5 {{structured binding declaration declared 'static' is incompatible with C++ standards before C++20}}
+    // expected-warning@-6 {{structured binding declaration declared 'thread_local' is incompatible with C++ standards before C++20}}
 #endif
 }
 
diff --git a/clang/test/SemaCXX/cxx1z-decomposition.cpp b/clang/test/SemaCXX/cxx1z-decomposition.cpp
index 6ee1249..158a3a6 100644
--- a/clang/test/SemaCXX/cxx1z-decomposition.cpp
+++ b/clang/test/SemaCXX/cxx1z-decomposition.cpp
@@ -3,22 +3,22 @@
 // RUN: %clang_cc1 -std=c++20 -Wpre-c++20-compat -fexperimental-new-constant-interpreter -verify=expected %s
 
 void use_from_own_init() {
-  auto [a] = a; // expected-error {{binding 'a' cannot appear in the initializer of its own decomposition declaration}}
+  auto [a] = a; // expected-error {{binding 'a' cannot appear in the initializer of its own structured binding declaration}}
 }
 
 void num_elems() {
   struct A0 {} a0;
   int a1[1], a2[2];
 
-  auto [] = a0; // expected-warning {{does not allow a decomposition group to be empty}}
-  auto [v1] = a0; // expected-error {{type 'struct A0' decomposes into 0 elements, but 1 name was provided}}
-  auto [] = a1; // expected-error {{type 'int[1]' decomposes into 1 element, but no names were provided}} expected-warning {{empty}}
+  auto [] = a0; // expected-warning {{does not allow a structured binding group to be empty}}
+  auto [v1] = a0; // expected-error {{type 'struct A0' binds to 0 elements, but 1 name was provided}}
+  auto [] = a1; // expected-error {{type 'int[1]' binds to 1 element, but no names were provided}} expected-warning {{empty}}
   auto [v2] = a1;
-  auto [v3, v4] = a1; // expected-error {{type 'int[1]' decomposes into 1 element, but 2 names were provided}}
-  auto [] = a2; // expected-error {{type 'int[2]' decomposes into 2 elements, but no names were provided}} expected-warning {{empty}}
-  auto [v5] = a2; // expected-error {{type 'int[2]' decomposes into 2 elements, but only 1 name was provided}}
+  auto [v3, v4] = a1; // expected-error {{type 'int[1]' binds to 1 element, but 2 names were provided}}
+  auto [] = a2; // expected-error {{type 'int[2]' binds to 2 elements, but no names were provided}} expected-warning {{empty}}
+  auto [v5] = a2; // expected-error {{type 'int[2]' binds to 2 elements, but only 1 name was provided}}
   auto [v6, v7] = a2;
-  auto [v8, v9, v10] = a2; // expected-error {{type 'int[2]' decomposes into 2 elements, but 3 names were provided}}
+  auto [v8, v9, v10] = a2; // expected-error {{type 'int[2]' binds to 2 elements, but 3 names were provided}}
 }
 
 // As a Clang extension, _Complex can be decomposed.
@@ -105,7 +105,7 @@ void enclosing() {
 void bitfield() {
   struct { int a : 3, : 4, b : 5; } a;
   auto &[x, y] = a;
-  auto &[p, q, r] = a; // expected-error-re {{type 'struct (unnamed struct at {{.*}})' decomposes into 2 elements, but 3 names were provided}}
+  auto &[p, q, r] = a; // expected-error-re {{type 'struct (unnamed struct at {{.*}})' binds to 2 elements, but 3 names were provided}}
 }
 
 void for_range() {
@@ -115,7 +115,7 @@ void for_range() {
   }
 
   int y[5];
-  for (auto[c] : y) { // expected-error {{cannot decompose non-class, non-array type 'int'}}
+  for (auto[c] : y) { // expected-error {{cannot bind non-class, non-array type 'int'}}
     c++;
   }
 }
@@ -157,16 +157,16 @@ int f2() {
 namespace lambdas {
   void f() {
     int n;
-    auto [a] =  // expected-error {{cannot decompose lambda closure type}}
+    auto [a] =  // expected-error {{cannot bind lambda closure type}}
         [n] {}; // expected-note {{lambda expression}}
   }
 
-  auto [] = []{}; // expected-warning {{ISO C++17 does not allow a decomposition group to be empty}}
+  auto [] = []{}; // expected-warning {{ISO C++17 does not allow a structured binding group to be empty}}
 
   int g() {
     int n = 0;
     auto a = [=](auto &self) { // expected-note {{lambda expression}}
-      auto &[capture] = self; // expected-error {{cannot decompose lambda closure type}}
+      auto &[capture] = self; // expected-error {{cannot bind lambda closure type}}
       ++capture;
       return n;
     };
@@ -188,14 +188,14 @@ namespace lambdas {
     struct A : decltype(x) {
       int n;
     };
-    auto &&[r] = A{x, 0}; // expected-error-re {{cannot decompose class type 'A': both it and its base class 'decltype(x)' (aka '(lambda {{.*}})') have non-static data members}}
+    auto &&[r] = A{x, 0}; // expected-error-re {{cannot bind class type 'A': both it and its base class 'decltype(x)' (aka '(lambda {{.*}})') have non-static data members}}
     return r;
   }
 
   void j() {
     auto x = [] {};
     struct A : decltype(x) {};
-    auto &&[] = A{x}; // expected-warning {{ISO C++17 does not allow a decomposition group to be empty}}
+    auto &&[] = A{x}; // expected-warning {{ISO C++17 does not allow a structured binding group to be empty}}
   }
 }
 
diff --git a/clang/test/SemaCXX/cxx2c-binding-pack-nontemplate.cpp b/clang/test/SemaCXX/cxx2c-binding-pack-nontemplate.cpp
index 638a2d8..0dfb52b 100644
--- a/clang/test/SemaCXX/cxx2c-binding-pack-nontemplate.cpp
+++ b/clang/test/SemaCXX/cxx2c-binding-pack-nontemplate.cpp
@@ -10,7 +10,7 @@ void decompose_array() {
   auto [x, ...rest, y] = arr;
 
   // cxx26-warning@+4 {{structured binding packs are incompatible with C++ standards before C++2c}}
-  // cxx23-error@+3 {{decomposition declaration cannot be declared 'constexpr'}}
+  // cxx23-error@+3 {{structured binding declaration cannot be declared 'constexpr'}}
   // cxx23-warning@+2 {{structured binding packs are a C++2c extension}}
   // nontemplate-error@+1 {{pack declaration outside of template}}
   constexpr auto [x_c, ...rest_c, y_c] = arr;
diff --git a/clang/test/SemaCXX/cxx2c-binding-pack.cpp b/clang/test/SemaCXX/cxx2c-binding-pack.cpp
index a8c1386..0b0eb88 100644
--- a/clang/test/SemaCXX/cxx2c-binding-pack.cpp
+++ b/clang/test/SemaCXX/cxx2c-binding-pack.cpp
@@ -82,7 +82,7 @@ void decompose_array() {
   static_assert(sizeof...(b) == 0);
   auto [...c] = arr1;
   static_assert(sizeof...(c) == 1);
-  auto [a1, ...b1, c1] = arr1; // expected-error{{decomposes into 1 element, but 3 names were provided}}
+  auto [a1, ...b1, c1] = arr1; // expected-error{{binds to 1 element, but 3 names were provided}}
 }
 
 // Test case by Younan Zhang.
@@ -160,7 +160,7 @@ void now_i_know_my() {
   static_assert(sizeof...(e) == 2);
   auto [h, i, j, ...k] = C(); // OK, the pack k is empty
   static_assert(sizeof...(e) == 0);
-  auto [l, m, n, o, ...p] = C(); // expected-error{{{decomposes into 3 elements, but 5 names were provided}}}
+  auto [l, m, n, o, ...p] = C(); // expected-error{{{binds to 3 elements, but 5 names were provided}}}
 }
 }  // namespace
 
@@ -225,7 +225,7 @@ namespace GH125165 {
 template <typename = void>
 auto f(auto t) {
     const auto& [...pack] = t;
-    // expected-error@-1 {{cannot decompose non-class, non-array type 'char const'}}
+    // expected-error@-1 {{cannot bind non-class, non-array type 'char const'}}
     (pack, ...);
 };
 
diff --git a/clang/test/SemaCXX/matrix-type.cpp b/clang/test/SemaCXX/matrix-type.cpp
index 186d3b6..0f9bff8 100644
--- a/clang/test/SemaCXX/matrix-type.cpp
+++ b/clang/test/SemaCXX/matrix-type.cpp
@@ -14,6 +14,7 @@ void matrix_var_dimensions(int Rows, unsigned Columns, char C) {
   using matrix7_t = int __attribute__((matrix_type(1, 0)));       // expected-error{{zero matrix size}}
   using matrix7_t = int __attribute__((matrix_type(char, 0)));    // expected-error{{expected '(' for function-style cast or type construction}}
   using matrix8_t = int __attribute__((matrix_type(1048576, 1))); // expected-error{{matrix row size too large}}
+  using matrix8_t = int __attribute__((matrix_type(1048576, 1048576))); // expected-error{{matrix row and column size too large}}
 }
 
 struct S1 {};
diff --git a/clang/test/SemaCXX/sizeless-1.cpp b/clang/test/SemaCXX/sizeless-1.cpp
index 688bbf0..ef3eec9 100644
--- a/clang/test/SemaCXX/sizeless-1.cpp
+++ b/clang/test/SemaCXX/sizeless-1.cpp
@@ -532,7 +532,7 @@ void cxx_only(int sel) {
   auto auto_int8 = local_int8;
   auto auto_int16 = local_int16;
 #if __cplusplus >= 201703L
-  auto [auto_int8_a] = local_int8; // expected-error {{cannot decompose non-class, non-array type 'svint8_t' (aka '__SVInt8_t')}}
+  auto [auto_int8_a] = local_int8; // expected-error {{cannot bind non-class, non-array type 'svint8_t' (aka '__SVInt8_t')}}
 #endif
 #endif
 
diff --git a/clang/test/SemaCXX/warn-shadow-in-lambdas.cpp b/clang/test/SemaCXX/warn-shadow-in-lambdas.cpp
index 2388c5f..0042ef0 100644
--- a/clang/test/SemaCXX/warn-shadow-in-lambdas.cpp
+++ b/clang/test/SemaCXX/warn-shadow-in-lambdas.cpp
@@ -259,11 +259,11 @@ struct S {
 
 int foo() {
 #ifdef AVOID
-  auto [a] = S{0}; // cxx14-warning {{decomposition declarations are a C++17 extension}}
+  auto [a] = S{0}; // cxx14-warning {{structured binding declarations are a C++17 extension}}
   [a = a] () { // No warning with basic -Wshadow due to uncaptured-local classification
   }();
 #else
-  auto [a] = S{0}; // cxx14-warning {{decomposition declarations are a C++17 extension}} expected-note {{previous declaration is here}}
+  auto [a] = S{0}; // cxx14-warning {{structured binding declarations are a C++17 extension}} expected-note {{previous declaration is here}}
   [a = a] () { // expected-warning {{declaration shadows a structured binding}}
   }();
 #endif
diff --git a/clang/test/SemaHLSL/BuiltIns/matrix-basic_types-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/matrix-basic_types-errors.hlsl
index 6f6b01b..5ad1d6a 100644
--- a/clang/test/SemaHLSL/BuiltIns/matrix-basic_types-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/matrix-basic_types-errors.hlsl
@@ -10,3 +10,26 @@ uint16_t4x4 mat2;
 matrix<int, 5, 5> mat3;
 // expected-error@-1 {{constraints not satisfied for alias template 'matrix' [with element = int, rows_count = 5, cols_count = 5]}}
 // expected-note@* {{because '5 <= 4' (5 <= 4) evaluated to false}}
+
+using float8x4 = __attribute__((matrix_type(8,4))) float;
+// expected-error@-1 {{matrix row size too large}}
+
+using float4x8 = __attribute__((matrix_type(4,8))) float;
+// expected-error@-1 {{matrix column size too large}}
+
+using float8x8 = __attribute__((matrix_type(8,8))) float;
+// expected-error@-1 {{matrix row and column size too large}}
+
+using floatNeg1x4 = __attribute__((matrix_type(-1,4))) float;
+// expected-error@-1 {{matrix row size too large}}
+using float4xNeg1 = __attribute__((matrix_type(4,-1))) float;
+// expected-error@-1 {{matrix column size too large}}
+using floatNeg1xNeg1 = __attribute__((matrix_type(-1,-1))) float; 
+// expected-error@-1 {{matrix row and column size too large}}
+
+using float0x4 = __attribute__((matrix_type(0,4))) float;
+// expected-error@-1 {{zero matrix size}}
+using float4x0 = __attribute__((matrix_type(4,0))) float;
+// expected-error@-1 {{zero matrix size}}
+using float0x0 = __attribute__((matrix_type(0,0))) float;
+// expected-error@-1 {{zero matrix size}}
diff --git a/clang/test/SemaHLSL/Language/AggregateSplatCasts.hlsl b/clang/test/SemaHLSL/Language/AggregateSplatCasts.hlsl
index e5a851c..623fca0 100644
--- a/clang/test/SemaHLSL/Language/AggregateSplatCasts.hlsl
+++ b/clang/test/SemaHLSL/Language/AggregateSplatCasts.hlsl
@@ -3,8 +3,9 @@
 // splat from vec1 to vec
 // CHECK-LABEL: call1
 // CHECK: CStyleCastExpr {{.*}} 'int3':'vector<int, 3>' <HLSLAggregateSplatCast>
-// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int' lvalue <FloatingToIntegral> part_of_explicit_cast
-// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float' lvalue <HLSLVectorTruncation> part_of_explicit_cast
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int' <FloatingToIntegral> part_of_explicit_cast
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float' <HLSLVectorTruncation> part_of_explicit_cast
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float1':'vector<float, 1>' <LValueToRValue> part_of_explicit_cast
 // CHECK-NEXT: DeclRefExpr {{.*}} 'float1':'vector<float, 1>' lvalue Var {{.*}} 'A' 'float1':'vector<float, 1>'
 export void call1() {
   float1 A = {1.0};
@@ -21,7 +22,7 @@ struct S {
 // splat from scalar to aggregate
 // CHECK-LABEL: call2
 // CHECK: CStyleCastExpr {{.*}} 'S' <HLSLAggregateSplatCast>
-// CHECK-NEXt: IntegerLiteral {{.*}} 'int' 5
+// CHECK-NEXT: IntegerLiteral {{.*}} 'int' 5
 export void call2() {
   S s = (S)5; 
 }
diff --git a/clang/test/SemaHLSL/Language/ElementwiseCasts.hlsl b/clang/test/SemaHLSL/Language/ElementwiseCasts.hlsl
index 563d3f0..8481cfc 100644
--- a/clang/test/SemaHLSL/Language/ElementwiseCasts.hlsl
+++ b/clang/test/SemaHLSL/Language/ElementwiseCasts.hlsl
@@ -21,3 +21,27 @@ export void call2() {
   float B[1] = {1.0};
   B = (float[1])A;
 }
+
+struct S {
+  int A;
+  float F;
+};
+
+// cast from a struct
+// CHECK-LABEL: call3
+// CHECK: CStyleCastExpr {{.*}} 'int[2]' <HLSLElementwiseCast>
+// CHECK-NEXT: DeclRefExpr {{.*}} 'S' lvalue Var {{.*}} 'SS' 'S'
+export void call3() {
+  S SS = {1,1.0};
+  int A[2] = (int[2])SS;
+}
+
+// cast from a vector
+// CHECK-LABEL: call4
+// CHECK: CStyleCastExpr {{.*}} 'float[3]' <HLSLElementwiseCast>
+// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int3':'vector<int, 3>' <LValueToRValue> part_of_explicit_cast
+// CHECK-NEXT: DeclRefExpr {{.*}} 'int3':'vector<int, 3>' lvalue Var {{.*}} 'A' 'int3'
+export void call4() {
+  int3 A = {1,2,3};
+  float B[3] = (float[3])A;
+}
diff --git a/clang/test/SemaTemplate/cxx1z-decomposition.cpp b/clang/test/SemaTemplate/cxx1z-decomposition.cpp
index 779c4cf..b0cc4e6 100644
--- a/clang/test/SemaTemplate/cxx1z-decomposition.cpp
+++ b/clang/test/SemaTemplate/cxx1z-decomposition.cpp
@@ -15,7 +15,7 @@ namespace std {
 template<> struct std::tuple_size<C> { enum { value = 2 }; };
 
 template<typename T> int decomp(T &t) { 
-  auto &[a, b] = t; // expected-error {{type 'D' decomposes into 3 elements, but only 2 names were provided}}
+  auto &[a, b] = t; // expected-error {{type 'D' binds to 3 elements, but only 2 names were provided}}
   return a + b; // expected-error {{cannot initialize return object of type 'int' with an rvalue of type 'int *'}}
 }
 
diff --git a/clang/unittests/Frontend/CompilerInvocationTest.cpp b/clang/unittests/Frontend/CompilerInvocationTest.cpp
index 75390aa..1332422 100644
--- a/clang/unittests/Frontend/CompilerInvocationTest.cpp
+++ b/clang/unittests/Frontend/CompilerInvocationTest.cpp
@@ -732,6 +732,26 @@ TEST_F(CommandLineTest, ConditionalParsingIfTrueFlagPresent) {
   ASSERT_THAT(GeneratedArgs, Contains(StrEq("-sycl-std=2017")));
 }
 
+TEST_F(CommandLineTest, ConditionalParsingIfHLSLFlagPresent) {
+  const char *Args[] = {"-xhlsl"};
+
+  CompilerInvocation::CreateFromArgs(Invocation, Args, *Diags);
+
+  ASSERT_EQ(Invocation.getLangOpts().MaxMatrixDimension, 4u);
+
+  Invocation.generateCC1CommandLine(GeneratedArgs, *this);
+}
+
+TEST_F(CommandLineTest, ConditionalParsingIfHLSLFlagNotPresent) {
+  const char *Args[] = {""};
+
+  CompilerInvocation::CreateFromArgs(Invocation, Args, *Diags);
+
+  ASSERT_EQ(Invocation.getLangOpts().MaxMatrixDimension, 1048575u);
+
+  Invocation.generateCC1CommandLine(GeneratedArgs, *this);
+}
+
 // Wide integer option.
 
 TEST_F(CommandLineTest, WideIntegerHighValue) {