332 files changed, 32247 insertions, 18572 deletions
diff --git a/gcc/config/aarch64/aarch64-builtin-pairs.def b/gcc/config/aarch64/aarch64-builtin-pairs.def
new file mode 100644
index 0000000..1757e19
--- /dev/null
+++ b/gcc/config/aarch64/aarch64-builtin-pairs.def
@@ -0,0 +1,73 @@
+/* Pairs of AArch64 builtins that can be folded into each other.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Pairs of single and half integer modes.  */
+#define LO_HI_PAIR_V_HSI(T, LO, HI) \
+  LO_HI_PAIR (T##_##LO##v2si, T##_##HI##v4si) \
+  LO_HI_PAIR (T##_##LO##v4hi, T##_##HI##v8hi)
+
+#define LO_HI_PAIR_V_US_HSI(T, LO, HI) \
+  LO_HI_PAIR_V_HSI (T, s##LO, s##HI) \
+  LO_HI_PAIR_V_HSI (T##U, u##LO, u##HI)
+
+/* Pairs of widenable integer modes.  */
+#define LO_HI_PAIR_V_WI(T, LO, HI) \
+  LO_HI_PAIR_V_HSI (T, LO, HI) \
+  LO_HI_PAIR (T##_##LO##v8qi, T##_##HI##v16qi)
+
+#define LO_HI_PAIR_V_US_WI(T, LO, HI) \
+  LO_HI_PAIR_V_WI (T, s##LO, s##HI) \
+  LO_HI_PAIR_V_WI (T##U, u##LO, u##HI)
+
+#define UNOP_LONG_LH_PAIRS \
+  LO_HI_PAIR (UNOP_sxtlv8hi,  UNOP_vec_unpacks_hi_v16qi) \
+  LO_HI_PAIR (UNOP_sxtlv4si,  UNOP_vec_unpacks_hi_v8hi) \
+  LO_HI_PAIR (UNOP_sxtlv2di,  UNOP_vec_unpacks_hi_v4si) \
+  LO_HI_PAIR (UNOPU_uxtlv8hi, UNOPU_vec_unpacku_hi_v16qi) \
+  LO_HI_PAIR (UNOPU_uxtlv4si, UNOPU_vec_unpacku_hi_v8hi) \
+  LO_HI_PAIR (UNOPU_uxtlv2di, UNOPU_vec_unpacku_hi_v4si)
+
+#define BINOP_LONG_LH_PAIRS \
+  LO_HI_PAIR_V_US_WI (BINOP,  addl, addl2) \
+  LO_HI_PAIR_V_US_WI (BINOP,  subl, subl2) \
+  LO_HI_PAIR_V_US_WI (BINOP,  abdl, abdl2) \
+  LO_HI_PAIR_V_WI (BINOP,  intrinsic_vec_smult_lo_, vec_widen_smult_hi_) \
+  LO_HI_PAIR_V_WI (BINOPU, intrinsic_vec_umult_lo_, vec_widen_umult_hi_) \
+  LO_HI_PAIR_V_HSI (BINOP,  sqdmull, sqdmull2) \
+  LO_HI_PAIR (BINOPP_pmullv8qi, BINOPP_pmull_hiv16qi)
+
+#define BINOP_LONG_N_LH_PAIRS \
+  LO_HI_PAIR_V_US_HSI (BINOP,  mull_n, mull_hi_n) \
+  LO_HI_PAIR_V_HSI (BINOP,  sqdmull_n, sqdmull2_n)
+
+#define BINOP_WIDE_LH_PAIRS \
+  LO_HI_PAIR_V_US_WI (BINOP,  subw, subw2) \
+  LO_HI_PAIR_V_US_WI (BINOP,  addw, addw2)
+
+#define TERNOP_LONG_LH_PAIRS \
+  LO_HI_PAIR_V_US_WI (TERNOP,  mlal, mlal_hi) \
+  LO_HI_PAIR_V_US_WI (TERNOP,  mlsl, mlsl_hi) \
+  LO_HI_PAIR_V_US_WI (TERNOP,  abal, abal2) \
+  LO_HI_PAIR_V_HSI (TERNOP, sqdmlal, sqdmlal2) \
+  LO_HI_PAIR_V_HSI (TERNOP, sqdmlsl, sqdmlsl2)
+
+#define TERNOP_LONG_N_LH_PAIRS \
+  LO_HI_PAIR_V_US_HSI (TERNOP,  mlal_n, mlal_hi_n) \
+  LO_HI_PAIR_V_US_HSI (TERNOP,  mlsl_n, mlsl_hi_n) \
+  LO_HI_PAIR_V_HSI (TERNOP,  sqdmlal_n, sqdmlal2_n)
diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc
index 93f939a..408099a 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -49,6 +49,8 @@
 #include "attribs.h"
 #include "gimple-fold.h"
 #include "builtins.h"
+#include "tree-pass.h"
+#include "tree-vector-builder.h"
 #include "aarch64-builtins.h"
 
 using namespace aarch64;
@@ -738,6 +740,16 @@ static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
   VGET_HIGH_BUILTIN(u64) \
   VGET_HIGH_BUILTIN(bf16)
 
+#include "aarch64-builtin-pairs.def"
+
+#define LO_HI_PAIRINGS \
+  UNOP_LONG_LH_PAIRS \
+  BINOP_LONG_LH_PAIRS \
+  BINOP_LONG_N_LH_PAIRS \
+  BINOP_WIDE_LH_PAIRS \
+  TERNOP_LONG_LH_PAIRS \
+  TERNOP_LONG_N_LH_PAIRS
+
 typedef struct
 {
   const char *name;
@@ -5004,6 +5016,167 @@ aarch64_gimple_fold_pragma_builtin
     }
 }
 
+/* Return the fndecl of the builtin paired with FCODE_LO if one
+   exists (see aarch64-builtin-pairs.def), or NULL_TREE if not.  */
+static inline tree
+aarch64_get_highpart_builtin (unsigned int fcode_lo)
+{
+#undef LO_HI_PAIR
+#define LO_HI_PAIR(A, B) case AARCH64_SIMD_BUILTIN_##A:   \
+  return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_##B];
+
+  switch (fcode_lo)
+    {
+      LO_HI_PAIRINGS
+      default:
+	return NULL_TREE;
+    }
+}
+
+/* If REF describes the high half of a 128-bit vector, return this
+   vector.  Otherwise, return NULL_TREE.  */
+static tree
+aarch64_v128_highpart_ref (const_tree ref)
+{
+  if (TREE_CODE (ref) != SSA_NAME)
+    return NULL_TREE;
+
+  gassign *stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (ref));
+  if (!stmt || gimple_assign_rhs_code (stmt) != BIT_FIELD_REF)
+    return NULL_TREE;
+
+  /* Look for a BIT_FIELD_REF that denotes the most significant 64
+     bits of a 128-bit vector.  */
+  tree bf_ref = gimple_assign_rhs1 (stmt);
+  unsigned int offset = BYTES_BIG_ENDIAN ? 0 : 64;
+
+  if (maybe_ne (bit_field_size (bf_ref), 64u)
+      || maybe_ne (bit_field_offset (bf_ref), offset))
+    return NULL_TREE;
+
+  tree obj = TREE_OPERAND (bf_ref, 0);
+  tree type = TREE_TYPE (obj);
+
+  if (VECTOR_TYPE_P (type) && tree_fits_uhwi_p (TYPE_SIZE (type))
+      && tree_to_uhwi (TYPE_SIZE (type)) == 128)
+    return obj;
+
+  return NULL_TREE;
+}
+
+/* Build and return a new VECTOR_CST of type OUT_TY, using repeated
+   copies of the elements of VEC_IN.  */
+static tree
+aarch64_build_vector_cst (const_tree vec_in, tree out_ty)
+{
+  gcc_assert (TREE_CODE (vec_in) == VECTOR_CST
+	      && VECTOR_TYPE_P (out_ty));
+  unsigned HOST_WIDE_INT nelts
+    = VECTOR_CST_NELTS (vec_in).to_constant ();
+
+  tree_vector_builder vec_out (out_ty, nelts, 1);
+  for (unsigned i = 0; i < nelts; i++)
+    vec_out.quick_push (VECTOR_CST_ELT (vec_in, i));
+
+  return vec_out.build ();
+}
+
+/* Try to fold STMT, a call to to a lowpart-operating builtin, to
+   it's highpart-operating equivalent if doing so would save
+   unnecessary data movement instructions.
+
+   Return the new call if so, otherwise nullptr.  */
+static gcall *
+aarch64_fold_lo_call_to_hi (unsigned int fcode, gcall *stmt,
+			    gimple_stmt_iterator *gsi)
+{
+  /* Punt until as late as possible:
+    1) By folding away BIT_FIELD_REFs we remove information about the
+    operands that may be useful to other optimizers.
+
+    2) For simplicity, we'd like the expression
+
+	x = BIT_FIELD_REF<a, x, y>
+
+    to imply that A is not a VECTOR_CST.  This assumption is unlikely
+    to hold before constant prop/folding.  */
+  if (!(cfun->curr_properties & PROP_last_full_fold))
+    return nullptr;
+
+  tree builtin_hi = aarch64_get_highpart_builtin (fcode);
+  gcc_assert (builtin_hi != NULL_TREE);
+
+  /* Prefer to use the highpart builtin when at least one vector
+     argument is a reference to the high half of a 128b vector, and
+     all others are VECTOR_CSTs that we can extend to 128b.  */
+  auto_vec<unsigned int, 2> vec_constants;
+  auto_vec<unsigned int, 2> vec_highparts;
+  /* The arguments and signature of the new call.  */
+  auto_vec<tree, 4> call_args;
+  auto_vec<tree, 4> call_types;
+
+  /* The interesting args are those that differ between the lo/hi
+     builtins.  Walk the function signatures to find these.  */
+  tree types_hi = TYPE_ARG_TYPES (TREE_TYPE (builtin_hi));
+  tree types_lo = TYPE_ARG_TYPES (gimple_call_fntype (stmt));
+  unsigned int argno = 0;
+  while (types_lo != void_list_node && types_hi != void_list_node)
+    {
+      tree type_lo = TREE_VALUE (types_lo);
+      tree type_hi = TREE_VALUE (types_hi);
+      tree arg = gimple_call_arg (stmt, argno);
+      if (!types_compatible_p (type_lo, type_hi))
+	{
+	  /* Check our assumptions about this pair.  */
+	  gcc_assert (wi::to_widest (TYPE_SIZE (type_lo)) == 64
+		      && wi::to_widest (TYPE_SIZE (type_hi)) == 128);
+
+	  tree vq = aarch64_v128_highpart_ref (arg);
+	  if (vq && is_gimple_reg (vq))
+	    {
+	      vec_highparts.safe_push (argno);
+	      arg = vq;
+	    }
+	  else if (TREE_CODE (arg) == VECTOR_CST)
+	    vec_constants.safe_push (argno);
+	  else
+	    return nullptr;
+	}
+      call_args.safe_push (arg);
+      call_types.safe_push (type_hi);
+
+      argno++;
+      types_hi = TREE_CHAIN (types_hi);
+      types_lo = TREE_CHAIN (types_lo);
+    }
+  gcc_assert (types_lo == void_list_node && types_hi == void_list_node);
+
+  if (vec_highparts.is_empty ())
+    return nullptr;
+
+  /* Build and return a new call to BUILTIN_HI.  */
+  for (auto i : vec_constants)
+    call_args[i] = aarch64_build_vector_cst (call_args[i], call_types[i]);
+
+  for (auto i : vec_highparts)
+    if (!types_compatible_p (TREE_TYPE (call_args[i]), call_types[i]))
+      {
+	tree vce_ssa = make_ssa_name (call_types[i]);
+	tree vce_expr = build1 (VIEW_CONVERT_EXPR,
+				call_types[i], call_args[i]);
+	gsi_insert_before (gsi, gimple_build_assign (vce_ssa, vce_expr),
+			   GSI_SAME_STMT);
+	call_args[i] = vce_ssa;
+      }
+
+  gcall *new_call = gimple_build_call_vec (builtin_hi, call_args);
+  gimple_call_set_lhs (new_call, gimple_call_lhs (stmt));
+  return new_call;
+}
+
+#undef LO_HI_PAIR
+#define LO_HI_PAIR(A, B) case AARCH64_SIMD_BUILTIN_##A:
+
 /* Try to fold STMT, given that it's a call to the built-in function with
    subcode FCODE.  Return the new statement on success and null on
    failure.  */
@@ -5190,6 +5363,9 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt,
 	    }
 	  break;
 	}
+    LO_HI_PAIRINGS
+      new_stmt = aarch64_fold_lo_call_to_hi (fcode, stmt, gsi);
+      break;
     case AARCH64_SIMD_BUILTIN_LANE_CHECK:
       if (aarch64_fold_builtin_lane_check (args[0], args[1], args[2]))
 	{
diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
index d1e2ab9..98337b7 100644
--- a/gcc/config/aarch64/aarch64-c.cc
+++ b/gcc/config/aarch64/aarch64-c.cc
@@ -293,6 +293,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
   aarch64_def_or_undef (TARGET_SME2, "__ARM_FEATURE_SME2", pfile);
   aarch64_def_or_undef (AARCH64_HAVE_ISA (SME2p1),
 			"__ARM_FEATURE_SME2p1", pfile);
+  aarch64_def_or_undef (TARGET_FAMINMAX, "__ARM_FEATURE_FAMINMAX", pfile);
 
   /* Not for ACLE, but required to keep "float.h" correct if we switch
      target between implementations that do or do not support ARMv8.2-A
diff --git a/gcc/config/aarch64/aarch64-cc-fusion.cc b/gcc/config/aarch64/aarch64-cc-fusion.cc
deleted file mode 100644
index cea54de..0000000
--- a/gcc/config/aarch64/aarch64-cc-fusion.cc
+++ /dev/null
@@ -1,297 +0,0 @@
-// Pass to fuse CC operations with other instructions.
-// Copyright (C) 2021-2025 Free Software Foundation, Inc.
-//
-// This file is part of GCC.
-//
-// GCC is free software; you can redistribute it and/or modify it under
-// the terms of the GNU General Public License as published by the Free
-// Software Foundation; either version 3, or (at your option) any later
-// version.
-//
-// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
-// WARRANTY; without even the implied warranty of MERCHANTABILITY or
-// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-// for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with GCC; see the file COPYING3.  If not see
-// <http://www.gnu.org/licenses/>.
-
-// This pass looks for sequences of the form:
-//
-//    A: (set (reg R1) X1)
-//    B: ...instructions that might change the value of X1...
-//    C: (set (reg CC) X2) // X2 uses R1
-//
-// and tries to change them to:
-//
-//    C': [(set (reg CC) X2')
-//         (set (reg R1) X1)]
-//    B: ...instructions that might change the value of X1...
-//
-// where X2' is the result of replacing R1 with X1 in X2.
-//
-// This sequence occurs in SVE code in two important cases:
-//
-// (a) Sometimes, to deal correctly with overflow, we need to increment
-//     an IV after a WHILELO rather than before it.  In this case:
-//     - A is a WHILELO,
-//     - B includes an IV increment and
-//     - C is a separate PTEST.
-//
-// (b) ACLE code of the form:
-//
-//       svbool_t ok = svrdffr ();
-//       if (svptest_last (pg, ok))
-//         ...
-//
-//     must, for performance reasons, be code-generated as:
-//
-//       RDFFRS Pok.B, Pg/Z
-//       ...branch on flags result...
-//
-//     without a separate PTEST of Pok.  In this case:
-//     - A is an aarch64_rdffr
-//     - B includes an aarch64_update_ffrt
-//     - C is a separate PTEST
-//
-// Combine can handle this optimization if B doesn't exist and if A and
-// C are in the same BB.  This pass instead handles cases where B does
-// exist and cases where A and C are in different BBs of the same EBB.
-
-#define IN_TARGET_CODE 1
-
-#define INCLUDE_ALGORITHM
-#define INCLUDE_FUNCTIONAL
-#define INCLUDE_ARRAY
-#include "config.h"
-#include "system.h"
-#include "coretypes.h"
-#include "backend.h"
-#include "rtl.h"
-#include "df.h"
-#include "rtl-ssa.h"
-#include "tree-pass.h"
-
-using namespace rtl_ssa;
-
-namespace {
-const pass_data pass_data_cc_fusion =
-{
-  RTL_PASS, // type
-  "cc_fusion", // name
-  OPTGROUP_NONE, // optinfo_flags
-  TV_NONE, // tv_id
-  0, // properties_required
-  0, // properties_provided
-  0, // properties_destroyed
-  0, // todo_flags_start
-  TODO_df_finish, // todo_flags_finish
-};
-
-// Class that represents one run of the pass.
-class cc_fusion
-{
-public:
-  cc_fusion ()  : m_parallel () {}
-  void execute ();
-
-private:
-  rtx optimizable_set (const insn_info *);
-  bool parallelize_insns (def_info *, rtx, def_info *, rtx);
-  void optimize_cc_setter (def_info *, rtx);
-
-  // A spare PARALLEL rtx, or null if none.
-  rtx m_parallel;
-};
-
-// See whether INSN is a single_set that we can optimize.  Return the
-// set if so, otherwise return null.
-rtx
-cc_fusion::optimizable_set (const insn_info *insn)
-{
-  if (!insn->can_be_optimized ()
-      || insn->is_asm ()
-      || insn->has_volatile_refs ()
-      || insn->has_pre_post_modify ())
-    return NULL_RTX;
-
-  return single_set (insn->rtl ());
-}
-
-// CC_SET is a single_set that sets (only) CC_DEF; OTHER_SET is likewise
-// a single_set that sets (only) OTHER_DEF.  CC_SET is known to set the
-// CC register and the instruction that contains CC_SET is known to use
-// OTHER_DEF.  Try to do CC_SET and OTHER_SET in parallel.
-bool
-cc_fusion::parallelize_insns (def_info *cc_def, rtx cc_set,
-			      def_info *other_def, rtx other_set)
-{
-  auto attempt = crtl->ssa->new_change_attempt ();
-
-  insn_info *cc_insn = cc_def->insn ();
-  insn_info *other_insn = other_def->insn ();
-  if (dump_file && (dump_flags & TDF_DETAILS))
-    fprintf (dump_file, "trying to parallelize insn %d and insn %d\n",
-	     other_insn->uid (), cc_insn->uid ());
-
-  // Try to substitute OTHER_SET into CC_INSN.
-  insn_change_watermark rtl_watermark;
-  rtx_insn *cc_rtl = cc_insn->rtl ();
-  insn_propagation prop (cc_rtl, SET_DEST (other_set),
-			 SET_SRC (other_set));
-  if (!prop.apply_to_pattern (&PATTERN (cc_rtl))
-      || prop.num_replacements == 0)
-    {
-      if (dump_file && (dump_flags & TDF_DETAILS))
-	fprintf (dump_file, "-- failed to substitute all uses of r%d\n",
-		 other_def->regno ());
-      return false;
-    }
-
-  // Restrict the uses to those outside notes.
-  use_array cc_uses = remove_note_accesses (attempt, cc_insn->uses ());
-  use_array other_set_uses = remove_note_accesses (attempt,
-						   other_insn->uses ());
-
-  // Remove the use of the substituted value.
-  access_array_builder uses_builder (attempt);
-  uses_builder.reserve (cc_uses.size ());
-  for (use_info *use : cc_uses)
-    if (use->def () != other_def)
-      uses_builder.quick_push (use);
-  cc_uses = use_array (uses_builder.finish ());
-
-  // Get the list of uses for the new instruction.
-  insn_change cc_change (cc_insn);
-  cc_change.new_uses = merge_access_arrays (attempt, other_set_uses, cc_uses);
-  if (!cc_change.new_uses.is_valid ())
-    {
-      if (dump_file && (dump_flags & TDF_DETAILS))
-	fprintf (dump_file, "-- cannot merge uses\n");
-      return false;
-    }
-
-  // The instruction initially defines just two registers.  recog can add
-  // extra clobbers if necessary.
-  auto_vec<access_info *, 2> new_defs;
-  new_defs.quick_push (cc_def);
-  new_defs.quick_push (other_def);
-  sort_accesses (new_defs);
-  cc_change.new_defs = def_array (access_array (new_defs));
-
-  // Make sure there is somewhere that the new instruction could live.
-  auto other_change = insn_change::delete_insn (other_insn);
-  insn_change *changes[] = { &other_change, &cc_change };
-  cc_change.move_range = cc_insn->ebb ()->insn_range ();
-  if (!restrict_movement (cc_change, ignore_changing_insns (changes)))
-    {
-      if (dump_file && (dump_flags & TDF_DETAILS))
-	fprintf (dump_file, "-- cannot satisfy all definitions and uses\n");
-      return false;
-    }
-
-  // Tentatively install the new pattern.  By convention, the CC set
-  // must be first.
-  if (m_parallel)
-    {
-      XVECEXP (m_parallel, 0, 0) = cc_set;
-      XVECEXP (m_parallel, 0, 1) = other_set;
-    }
-  else
-    {
-      rtvec vec = gen_rtvec (2, cc_set, other_set);
-      m_parallel = gen_rtx_PARALLEL (VOIDmode, vec);
-    }
-  validate_change (cc_rtl, &PATTERN (cc_rtl), m_parallel, 1);
-
-  // These routines report failures themselves.
-  if (!recog (attempt, cc_change, ignore_changing_insns (changes))
-      || !changes_are_worthwhile (changes)
-      || !crtl->ssa->verify_insn_changes (changes))
-    return false;
-
-  remove_reg_equal_equiv_notes (cc_rtl);
-  confirm_change_group ();
-  crtl->ssa->change_insns (changes);
-  m_parallel = NULL_RTX;
-  return true;
-}
-
-// Try to optimize the instruction that contains CC_DEF, where CC_DEF describes
-// a definition of the CC register by CC_SET.
-void
-cc_fusion::optimize_cc_setter (def_info *cc_def, rtx cc_set)
-{
-  // Search the registers used by the CC setter for an easily-substitutable
-  // def-use chain.
-  for (use_info *other_use : cc_def->insn ()->uses ())
-    if (def_info *other_def = other_use->def ())
-      if (other_use->regno () != CC_REGNUM
-	  && other_def->ebb () == cc_def->ebb ())
-	if (rtx other_set = optimizable_set (other_def->insn ()))
-	  {
-	    rtx dest = SET_DEST (other_set);
-	    if (REG_P (dest)
-		&& REGNO (dest) == other_def->regno ()
-		&& REG_NREGS (dest) == 1
-		&& parallelize_insns (cc_def, cc_set, other_def, other_set))
-	      return;
-	  }
-}
-
-// Run the pass on the current function.
-void
-cc_fusion::execute ()
-{
-  // Initialization.
-  calculate_dominance_info (CDI_DOMINATORS);
-  df_analyze ();
-  crtl->ssa = new rtl_ssa::function_info (cfun);
-
-  // Walk through all instructions that set CC.  Look for a PTEST instruction
-  // that we can optimize.
-  //
-  // ??? The PTEST test isn't needed for correctness, but it ensures that the
-  // pass no effect on non-SVE code.
-  for (def_info *def : crtl->ssa->reg_defs (CC_REGNUM))
-    if (rtx cc_set = optimizable_set (def->insn ()))
-      if (REG_P (SET_DEST (cc_set))
-	  && REGNO (SET_DEST (cc_set)) == CC_REGNUM
-	  && GET_CODE (SET_SRC (cc_set)) == UNSPEC
-	  && XINT (SET_SRC (cc_set), 1) == UNSPEC_PTEST)
-	optimize_cc_setter (def, cc_set);
-
-  // Finalization.
-  crtl->ssa->perform_pending_updates ();
-  free_dominance_info (CDI_DOMINATORS);
-}
-
-class pass_cc_fusion : public rtl_opt_pass
-{
-public:
-  pass_cc_fusion (gcc::context *ctxt)
-    : rtl_opt_pass (pass_data_cc_fusion, ctxt)
-  {}
-
-  // opt_pass methods:
-  virtual bool gate (function *) { return TARGET_SVE && optimize >= 2; }
-  virtual unsigned int execute (function *);
-};
-
-unsigned int
-pass_cc_fusion::execute (function *)
-{
-  cc_fusion ().execute ();
-  return 0;
-}
-
-} // end namespace
-
-// Create a new CC fusion pass instance.
-
-rtl_opt_pass *
-make_pass_cc_fusion (gcc::context *ctxt)
-{
-  return new pass_cc_fusion (ctxt);
-}
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index 7f204fd..6f11cc0 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -132,7 +132,7 @@ AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, V8_2A,  (CRYPTO, PROFI
 
 /* Fujitsu ('F') cores. */
 AARCH64_CORE("a64fx", a64fx, a64fx, V8_2A,  (F16, SVE), a64fx, 0x46, 0x001, -1)
-AARCH64_CORE("fujitsu-monaka", fujitsu_monaka, cortexa57, V9_3A, (F16, FP8, LS64, RNG, CRYPTO, SVE2_AES, SVE2_BITPERM, SVE2_SHA3, SVE2_SM4), fujitsu_monaka, 0x46, 0x003, -1)
+AARCH64_CORE("fujitsu-monaka", fujitsu_monaka, cortexa57, V9_3A, (F16, FAMINMAX, FP8FMA, FP8DOT2, FP8DOT4, LS64, LUT, RNG, CRYPTO, SVE2_AES, SVE2_BITPERM, SVE2_SHA3, SVE2_SM4), fujitsu_monaka, 0x46, 0x003, -1)
 
 /* HiSilicon ('H') cores. */
 AARCH64_CORE("tsv110",  tsv110, tsv110, V8_2A,  (CRYPTO, F16), tsv110,   0x48, 0xd01, -1)
@@ -224,7 +224,10 @@ AARCH64_CORE("neoverse-v3ae", neoversev3ae, cortexa57, V9_2A, (SVE2_BITPERM, RNG
 AARCH64_CORE("demeter", demeter, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1)
 
 /* NVIDIA ('N') cores. */
-AARCH64_CORE("olympus", olympus, cortexa57, V9_2A, (SVE2_BITPERM, RNG, LS64, MEMTAG, PROFILE, FAMINMAX, FP8DOT2, LUT, SVE2_AES, SVE2_SHA3, SVE2_SM4), neoversev3, 0x4e, 0x10, -1)
+AARCH64_CORE("olympus", olympus, cortexa57, V9_2A, (SVE2_BITPERM, RNG, LS64, MEMTAG, PROFILE, FAMINMAX, FP8FMA, FP8DOT2, FP8DOT4, LUT, SVE2_AES, SVE2_SHA3, SVE2_SM4), olympus, 0x4e, 0x10, -1)
+
+/* Armv9-A big.LITTLE processors.  */
+AARCH64_CORE("gb10",  gb10, cortexa57, V9_2A,  (SVE2_BITPERM, SVE2_AES, SVE2_SHA3, SVE2_SM4, MEMTAG, PROFILE), cortexx925, 0x41, AARCH64_BIG_LITTLE (0xd85, 0xd87), -1)
 
 /* Generic Architecture Processors.  */
 AARCH64_CORE("generic",  generic, cortexa53, V8A,  (), generic, 0x0, 0x0, -1)
diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h
index c49ff7f..e7926eb 100644
--- a/gcc/config/aarch64/aarch64-cost-tables.h
+++ b/gcc/config/aarch64/aarch64-cost-tables.h
@@ -125,9 +125,9 @@ const struct cpu_cost_table qdf24xx_extra_costs =
   {
     COSTS_N_INSNS (1),  /* alu.  */
     COSTS_N_INSNS (4),  /* mult.  */
-    COSTS_N_INSNS (1),  /* movi.  */
-    COSTS_N_INSNS (2),  /* dup.  */
-    COSTS_N_INSNS (2)   /* extract.  */
+    COSTS_N_INSNS (0),  /* movi.  */
+    COSTS_N_INSNS (1),  /* dup.  */
+    COSTS_N_INSNS (1)   /* extract.  */
   }
 };
 
@@ -233,9 +233,9 @@ const struct cpu_cost_table thunderx_extra_costs =
   {
     COSTS_N_INSNS (1),	/* Alu.  */
     COSTS_N_INSNS (4),	/* mult.  */
-    COSTS_N_INSNS (1),	/* movi.  */
-    COSTS_N_INSNS (2),	/* dup.  */
-    COSTS_N_INSNS (2)	/* extract.  */
+    COSTS_N_INSNS (0),	/* movi.  */
+    COSTS_N_INSNS (1),	/* dup.  */
+    COSTS_N_INSNS (1)	/* extract.  */
   }
 };
 
@@ -340,9 +340,9 @@ const struct cpu_cost_table thunderx2t99_extra_costs =
   {
     COSTS_N_INSNS (1),	/* Alu.  */
     COSTS_N_INSNS (4),	/* Mult.  */
-    COSTS_N_INSNS (1),	/* movi.  */
-    COSTS_N_INSNS (2),	/* dup.  */
-    COSTS_N_INSNS (2)	/* extract.  */
+    COSTS_N_INSNS (0),	/* movi.  */
+    COSTS_N_INSNS (1),	/* dup.  */
+    COSTS_N_INSNS (1)	/* extract.  */
   }
 };
 
@@ -447,9 +447,9 @@ const struct cpu_cost_table thunderx3t110_extra_costs =
   {
     COSTS_N_INSNS (1),	/* Alu.  */
     COSTS_N_INSNS (4),	/* Mult.  */
-    COSTS_N_INSNS (1),	/* movi.  */
-    COSTS_N_INSNS (2),	/* dup.  */
-    COSTS_N_INSNS (2)	/* extract.  */
+    COSTS_N_INSNS (0),	/* movi.  */
+    COSTS_N_INSNS (1),	/* dup.  */
+    COSTS_N_INSNS (1)	/* extract.  */
   }
 };
 
@@ -555,9 +555,9 @@ const struct cpu_cost_table tsv110_extra_costs =
   {
     COSTS_N_INSNS (1),  /* alu.  */
     COSTS_N_INSNS (4),  /* mult.  */
-    COSTS_N_INSNS (1),  /* movi.  */
-    COSTS_N_INSNS (2),  /* dup.  */
-    COSTS_N_INSNS (2)   /* extract.  */
+    COSTS_N_INSNS (0),  /* movi.  */
+    COSTS_N_INSNS (1),  /* dup.  */
+    COSTS_N_INSNS (1)   /* extract.  */
   }
 };
 
@@ -662,9 +662,9 @@ const struct cpu_cost_table a64fx_extra_costs =
   {
     COSTS_N_INSNS (1),  /* alu.  */
     COSTS_N_INSNS (4),  /* mult.  */
-    COSTS_N_INSNS (1),  /* movi.  */
-    COSTS_N_INSNS (2),  /* dup.  */
-    COSTS_N_INSNS (2)   /* extract.  */
+    COSTS_N_INSNS (0),  /* movi.  */
+    COSTS_N_INSNS (1),  /* dup.  */
+    COSTS_N_INSNS (1)   /* extract.  */
   }
 };
 
@@ -769,9 +769,9 @@ const struct cpu_cost_table ampere1_extra_costs =
   {
     COSTS_N_INSNS (3),  /* alu.  */
     COSTS_N_INSNS (3),  /* mult.  */
-    COSTS_N_INSNS (2),  /* movi.  */
-    COSTS_N_INSNS (2),  /* dup.  */
-    COSTS_N_INSNS (2)   /* extract.  */
+    COSTS_N_INSNS (1),  /* movi.  */
+    COSTS_N_INSNS (1),  /* dup.  */
+    COSTS_N_INSNS (1)   /* extract.  */
   }
 };
 
@@ -876,9 +876,9 @@ const struct cpu_cost_table ampere1a_extra_costs =
   {
     COSTS_N_INSNS (3),  /* alu.  */
     COSTS_N_INSNS (3),  /* mult.  */
-    COSTS_N_INSNS (2),  /* movi.  */
-    COSTS_N_INSNS (2),  /* dup.  */
-    COSTS_N_INSNS (2)   /* extract.  */
+    COSTS_N_INSNS (1),  /* movi.  */
+    COSTS_N_INSNS (1),  /* dup.  */
+    COSTS_N_INSNS (1)   /* extract.  */
   }
 };
 
@@ -983,9 +983,9 @@ const struct cpu_cost_table ampere1b_extra_costs =
   {
     COSTS_N_INSNS (1),  /* alu.  */
     COSTS_N_INSNS (2),  /* mult.  */
-    COSTS_N_INSNS (1),  /* movi.  */
-    COSTS_N_INSNS (1),  /* dup.  */
-    COSTS_N_INSNS (1)   /* extract.  */
+    COSTS_N_INSNS (0),  /* movi.  */
+    COSTS_N_INSNS (0),  /* dup.  */
+    COSTS_N_INSNS (0)   /* extract.  */
   }
 };
 
diff --git a/gcc/config/aarch64/aarch64-elf-metadata.cc b/gcc/config/aarch64/aarch64-elf-metadata.cc
new file mode 100644
index 0000000..88fbb93
--- /dev/null
+++ b/gcc/config/aarch64/aarch64-elf-metadata.cc
@@ -0,0 +1,145 @@
+/* ELF metadata for AArch64 architecture.
+   Copyright (C) 2024-2025 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#define INCLUDE_STRING
+#define INCLUDE_ALGORITHM
+#define INCLUDE_MEMORY
+#define INCLUDE_VECTOR
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "target.h"
+#include "rtl.h"
+#include "output.h"
+
+#include "aarch64-elf-metadata.h"
+
+/* Defined for convenience.  */
+#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
+
+namespace aarch64 {
+
+constexpr unsigned GNU_PROPERTY_AARCH64_FEATURE_1_AND = 0xc0000000;
+constexpr unsigned GNU_PROPERTY_AARCH64_FEATURE_1_BTI = (1U << 0);
+constexpr unsigned GNU_PROPERTY_AARCH64_FEATURE_1_PAC = (1U << 1);
+constexpr unsigned GNU_PROPERTY_AARCH64_FEATURE_1_GCS = (1U << 2);
+
+namespace {
+
+std::string
+gnu_property_features_to_string (unsigned feature_1_and)
+{
+  struct flag_name
+  {
+    unsigned int mask;
+    const char *name;
+  };
+
+  static const flag_name flags[] = {
+    {GNU_PROPERTY_AARCH64_FEATURE_1_BTI, "BTI"},
+    {GNU_PROPERTY_AARCH64_FEATURE_1_PAC, "PAC"},
+    {GNU_PROPERTY_AARCH64_FEATURE_1_GCS, "GCS"},
+  };
+
+  const char *separator = "";
+  std::string s_features;
+  for (auto &flag : flags)
+    if (feature_1_and & flag.mask)
+      {
+	s_features.append (separator).append (flag.name);
+	separator = ", ";
+      }
+  return s_features;
+};
+
+} // namespace anonymous
+
+section_note_gnu_property::section_note_gnu_property ()
+  : m_feature_1_and (0) {}
+
+void
+section_note_gnu_property::bti_enabled ()
+{
+  m_feature_1_and |= GNU_PROPERTY_AARCH64_FEATURE_1_BTI;
+}
+
+void
+section_note_gnu_property::pac_enabled ()
+{
+  m_feature_1_and |= GNU_PROPERTY_AARCH64_FEATURE_1_PAC;
+}
+
+void
+section_note_gnu_property::gcs_enabled ()
+{
+  m_feature_1_and |= GNU_PROPERTY_AARCH64_FEATURE_1_GCS;
+}
+
+void
+section_note_gnu_property::write () const
+{
+  if (m_feature_1_and)
+    {
+      /* Generate .note.gnu.property section.  */
+      switch_to_section (
+	get_section (".note.gnu.property", SECTION_NOTYPE, NULL));
+
+      /* PT_NOTE header: namesz, descsz, type.
+	 namesz = 4 ("GNU\0")
+	 descsz = 16 (Size of the program property array)
+		  [(12 + padding) * Number of array elements]
+	 type   = 5 (NT_GNU_PROPERTY_TYPE_0).  */
+      assemble_align (POINTER_SIZE);
+      assemble_integer (GEN_INT (4), 4, 32, 1);
+      assemble_integer (GEN_INT (ROUND_UP (12, POINTER_BYTES)), 4, 32, 1);
+      assemble_integer (GEN_INT (5), 4, 32, 1);
+
+      /* PT_NOTE name.  */
+      assemble_string ("GNU", 4);
+
+      /* PT_NOTE contents for NT_GNU_PROPERTY_TYPE_0:
+	 type   = GNU_PROPERTY_AARCH64_FEATURE_1_AND
+	 datasz = 4
+	 data   = feature_1_and.  */
+      fputs (integer_asm_op (4, true), asm_out_file);
+      fprint_whex (asm_out_file, GNU_PROPERTY_AARCH64_FEATURE_1_AND);
+      putc ('\n', asm_out_file);
+      assemble_integer (GEN_INT (4), 4, 32, 1);
+
+      fputs (integer_asm_op (4, true), asm_out_file);
+      fprint_whex (asm_out_file, m_feature_1_and);
+      if (flag_debug_asm)
+	{
+	  auto const &s_features
+	    = gnu_property_features_to_string (m_feature_1_and);
+	  asm_fprintf (asm_out_file,
+		       "\t%s GNU_PROPERTY_AARCH64_FEATURE_1_AND (%s)\n",
+		       ASM_COMMENT_START, s_features.c_str ());
+	}
+      else
+	putc ('\n', asm_out_file);
+
+      /* Pad the size of the note to the required alignment.  */
+      assemble_align (POINTER_SIZE);
+    }
+}
+
+} // namespace aarch64
diff --git a/gcc/config/aarch64/aarch64-elf-metadata.h b/gcc/config/aarch64/aarch64-elf-metadata.h
new file mode 100644
index 0000000..e99f6df
--- /dev/null
+++ b/gcc/config/aarch64/aarch64-elf-metadata.h
@@ -0,0 +1,253 @@
+/* ELF metadata for AArch64 architecture.
+   Copyright (C) 2024-2025 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_AARCH64_ELF_METADATA_H
+#define GCC_AARCH64_ELF_METADATA_H
+
+#include "vec.h"
+
+namespace aarch64 {
+
+class section_note_gnu_property
+{
+ public:
+  section_note_gnu_property ();
+
+  /* Add BTI flag to GNU properties.  */
+  void bti_enabled ();
+  /* Add GCS flag to GNU properties.  */
+  void gcs_enabled ();
+  /* Add PAC flag to GNU properties.  */
+  void pac_enabled ();
+
+  /* Write the data to the assembly file.  */
+  void write () const;
+
+ private:
+  unsigned m_feature_1_and;
+};
+
+enum subsection_optionality : uint8_t
+{
+  required = 0x0,
+  optional = 0x1,
+};
+
+enum subsection_val_type : uint8_t
+{
+  uleb128 = 0x0,
+  ntbs = 0x1,
+};
+
+enum BA_TagFeature_t : uint8_t
+{
+  Tag_Feature_BTI = 0,
+  Tag_Feature_PAC = 1,
+  Tag_Feature_GCS = 2,
+};
+
+template <typename T_tag, typename T_val>
+struct aeabi_attribute
+{
+  T_tag tag;
+  T_val value;
+};
+
+template <typename T_tag, typename T_val>
+aeabi_attribute<T_tag, T_val>
+make_aeabi_attribute (T_tag tag, T_val val)
+{
+  return aeabi_attribute<T_tag, T_val>{tag, val};
+}
+
+namespace details {
+
+constexpr const char *
+to_c_str (bool b)
+{
+  return b ? "true" : "false";
+}
+
+constexpr const char *
+to_c_str (const char *s)
+{
+  return s;
+}
+
+constexpr const char *
+to_c_str (subsection_optionality v)
+{
+  return (v == optional ? "optional"
+	  : v == required ? "required"
+	  : nullptr);
+}
+
+constexpr const char *
+to_c_str (subsection_val_type v)
+{
+  return (v == uleb128 ? "ULEB128"
+	  : v == ntbs ? "NTBS"
+	  : nullptr);
+}
+
+constexpr const char *
+to_c_str (BA_TagFeature_t feature)
+{
+  return (feature == Tag_Feature_BTI ? "Tag_Feature_BTI"
+	  : feature == Tag_Feature_PAC ? "Tag_Feature_PAC"
+	  : feature == Tag_Feature_GCS ? "Tag_Feature_GCS"
+	  : nullptr);
+}
+
+template <
+  typename T,
+  typename = typename std::enable_if<std::is_unsigned<T>::value, T>::type
+>
+constexpr const char *
+aeabi_attr_str_fmt (T)
+{
+  return "\t.aeabi_attribute %s, %u";
+}
+
+constexpr const char *
+aeabi_attr_str_fmt (const char *)
+{
+  return "\t.aeabi_attribute %s, \"%s\"";
+}
+
+template <
+  typename T,
+  typename = typename std::enable_if<std::is_unsigned<T>::value, T>::type
+>
+constexpr uint8_t
+aeabi_attr_val_for_fmt (T value)
+{
+  return static_cast<uint8_t>(value);
+}
+
+constexpr const char *
+aeabi_attr_val_for_fmt (const char *s)
+{
+  return s;
+}
+
+template <typename T_tag, typename T_val>
+void
+write (FILE *out_file, aeabi_attribute<T_tag, T_val> const &attr)
+{
+  asm_fprintf (out_file, aeabi_attr_str_fmt (T_val{}),
+	       to_c_str (attr.tag), aeabi_attr_val_for_fmt (attr.value));
+  if (flag_debug_asm)
+    asm_fprintf (out_file, "\t%s %s: %s", ASM_COMMENT_START,
+		 to_c_str (attr.tag), to_c_str (attr.value));
+  asm_fprintf (out_file, "\n");
+}
+
+template <
+  typename T,
+  typename = typename std::enable_if<std::is_unsigned<T>::value, T>::type
+>
+constexpr subsection_val_type
+deduce_attr_av_type (T)
+{
+  return subsection_val_type::uleb128;
+}
+
+constexpr subsection_val_type
+deduce_attr_av_type (const char *)
+{
+  return subsection_val_type::ntbs;
+}
+
+} // namespace details
+
+/* AEABI subsections can be public or private.  A subsection is public if it is
+   prefixed with "aeabi", private otherwise.  The header of an AEABI subsection
+   is composed of a name (usually a vendor name), an optionality status (optional
+   or required), and the expected type of its associated attributes (ULEB128 or
+   NTBS).  Note: The attributes in the same subsection have all the same type.
+   An attribute is composed of a tag identifier (ULEB128), and its value (ULEB128
+   or NTBS).
+
+   Syntax:
+     .aeabi_subsection NameOfTheSubsection: string (=NTBS),
+		       Optional: boolean (=ULEB128),
+		       AttributeValueType: enum{ULEB128, NTBS} (=ULEB128)
+     [
+       .aeabi_attribute  TagIdentifier: ULEB128,
+			 TagValue: Variant[ULEB128|NTBS]
+     ]*
+
+   Example:
+     .aeabi_subsection .aeabi-feature-and-bits, optional, ULEB128
+     .aeabi_attribute Tag_Feature_GCS, 1 // Tag_Feature_GCS: true
+
+   Note: The textual representations of the tag and its value are emitted as a
+   comment along their numerical representations to annotate the assembler
+   output when the developer flag '-dA' is provided.  */
+template <
+  typename T_tag, /* The type of a tag.  */
+  typename T_val, /* The type of a value.  */
+  size_t N = 0    /* The number of expected attributes if we know it.  */
+>
+class aeabi_subsection
+{
+ public:
+  aeabi_subsection (const char *name, bool optional)
+    : m_name (name),
+      m_optionality (optional
+		     ? subsection_optionality::optional
+		     : subsection_optionality::required),
+      m_avtype (details::deduce_attr_av_type (T_val{}))
+  {}
+
+  /* Append an attribute to the subsection.  */
+  void append (aeabi_attribute<T_tag, T_val> &&attr)
+  {
+    m_attributes.quick_push (std::move (attr));
+  }
+
+  /* Write the data to the assembly file.  */
+  void write (FILE *out_file) const
+  {
+    asm_fprintf (out_file, "\n\t.aeabi_subsection %s, %s, %s\n",
+		 m_name, details::to_c_str (m_optionality),
+		 details::to_c_str (m_avtype));
+
+    for (auto const &attr : m_attributes)
+      details::write (out_file, attr);
+  }
+
+  /* Indicate if the subsection is empty.  */
+  bool empty () const
+  {
+    return m_attributes.is_empty ();
+  }
+
+ private:
+  const char *m_name;
+  subsection_optionality m_optionality;
+  subsection_val_type m_avtype;
+  auto_vec<aeabi_attribute<T_tag, T_val>, N> m_attributes;
+};
+
+} // namespace aarch64
+
+#endif /* GCC_AARCH64_ELF_METADATA_H */
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
index dbbb021..db88df0 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -128,7 +128,9 @@ AARCH64_OPT_FMV_EXTENSION("sha2", SHA2, (SIMD), (), (), "sha1 sha2")
 
 AARCH64_FMV_FEATURE("sha3", SHA3, (SHA3))
 
-AARCH64_OPT_FMV_EXTENSION("aes", AES, (SIMD), (), (), "aes")
+AARCH64_OPT_EXTENSION("aes", AES, (SIMD), (), (), "aes")
+
+AARCH64_FMV_FEATURE("aes", PMULL, (AES))
 
 /* +nocrypto disables AES, SHA2 and SM4, and anything that depends on them
    (such as SHA3 and the SVE2 crypto extensions).  */
@@ -171,8 +173,6 @@ AARCH64_OPT_FMV_EXTENSION("i8mm", I8MM, (SIMD), (), (), "i8mm")
    instructions.  */
 AARCH64_OPT_FMV_EXTENSION("bf16", BF16, (FP), (SIMD), (), "bf16")
 
-AARCH64_FMV_FEATURE("rpres", RPRES, ())
-
 AARCH64_OPT_FMV_EXTENSION("sve", SVE, (SIMD, F16, FCMA), (), (), "sve")
 
 /* This specifically does not imply +sve.  */
@@ -190,7 +190,7 @@ AARCH64_OPT_FMV_EXTENSION("sve2", SVE2, (SVE), (), (), "sve2")
 
 AARCH64_OPT_EXTENSION("sve2-aes", SVE2_AES, (SVE2, AES), (), (), "sveaes")
 
-AARCH64_FMV_FEATURE("sve2-aes", SVE_AES, (SVE2_AES))
+AARCH64_FMV_FEATURE("sve2-aes", SVE_PMULL128, (SVE2_AES))
 
 AARCH64_OPT_EXTENSION("sve2-bitperm", SVE2_BITPERM, (SVE2), (), (),
 		      "svebitperm")
@@ -245,9 +245,11 @@ AARCH64_OPT_EXTENSION("sme-b16b16", SME_B16B16, (SME2, SVE_B16B16), (), (), "sme
 
 AARCH64_OPT_EXTENSION("sme-f16f16", SME_F16F16, (SME2), (), (), "smef16f16")
 
-AARCH64_OPT_EXTENSION("mops", MOPS, (), (), (), "mops")
+AARCH64_OPT_FMV_EXTENSION("mops", MOPS, (), (), (), "mops")
+
+AARCH64_OPT_FMV_EXTENSION("cssc", CSSC, (), (), (), "cssc")
 
-AARCH64_OPT_EXTENSION("cssc", CSSC, (), (), (), "cssc")
+AARCH64_OPT_EXTENSION("cmpbr", CMPBR, (), (), (), "cmpbr")
 
 AARCH64_OPT_EXTENSION("lse128", LSE128, (LSE), (), (), "lse128")
 
diff --git a/gcc/config/aarch64/aarch64-passes.def b/gcc/config/aarch64/aarch64-passes.def
index 9cf9d3e..6a53ff3 100644
--- a/gcc/config/aarch64/aarch64-passes.def
+++ b/gcc/config/aarch64/aarch64-passes.def
@@ -24,6 +24,5 @@ INSERT_PASS_BEFORE (pass_reorder_blocks, 1, pass_track_speculation);
 INSERT_PASS_BEFORE (pass_late_thread_prologue_and_epilogue, 1, pass_switch_pstate_sm);
 INSERT_PASS_BEFORE (pass_late_thread_prologue_and_epilogue, 1, pass_late_track_speculation);
 INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_insert_bti);
-INSERT_PASS_AFTER (pass_if_after_combine, 1, pass_cc_fusion);
 INSERT_PASS_BEFORE (pass_early_remat, 1, pass_ldp_fusion);
 INSERT_PASS_BEFORE (pass_peephole2, 1, pass_ldp_fusion);
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 8f44aea..56efcf2 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -290,8 +290,8 @@ struct sve_vec_cost : simd_vec_cost
 
   /* The cost of a gather load instruction.  The x32 value is for loads
      of 32-bit elements and the x64 value is for loads of 64-bit elements.  */
-  const int gather_load_x32_cost;
-  const int gather_load_x64_cost;
+  const unsigned int gather_load_x32_cost;
+  const unsigned int gather_load_x64_cost;
 
   /* Additional loop initialization cost of using a gather load instruction.  The x32
      value is for loads of 32-bit elements and the x64 value is for loads of
@@ -933,6 +933,7 @@ char *aarch64_output_simd_mov_imm (rtx, unsigned);
 char *aarch64_output_simd_orr_imm (rtx, unsigned);
 char *aarch64_output_simd_and_imm (rtx, unsigned);
 char *aarch64_output_simd_xor_imm (rtx, unsigned);
+char *aarch64_output_fmov (rtx);
 
 char *aarch64_output_sve_mov_immediate (rtx);
 char *aarch64_output_sve_ptrues (rtx);
@@ -946,8 +947,10 @@ bool aarch64_parallel_select_half_p (machine_mode, rtx);
 bool aarch64_pars_overlap_p (rtx, rtx);
 bool aarch64_simd_scalar_immediate_valid_for_move (rtx, scalar_int_mode);
 bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool);
+bool aarch64_sve_valid_pred_p (rtx, machine_mode);
 bool aarch64_sve_ptrue_svpattern_p (rtx, struct simd_immediate_info *);
 bool aarch64_simd_valid_and_imm (rtx);
+bool aarch64_simd_valid_and_imm_fmov (rtx, unsigned int * = NULL);
 bool aarch64_simd_valid_mov_imm (rtx);
 bool aarch64_simd_valid_orr_imm (rtx);
 bool aarch64_simd_valid_xor_imm (rtx);
@@ -1017,7 +1020,7 @@ void aarch64_err_no_fpadvsimd (machine_mode);
 void aarch64_expand_epilogue (rtx_call_insn *);
 rtx aarch64_ptrue_all (unsigned int);
 opt_machine_mode aarch64_ptrue_all_mode (rtx);
-rtx aarch64_convert_sve_data_to_pred (rtx, machine_mode, rtx);
+rtx aarch64_convert_sve_data_to_pred (rtx, rtx);
 rtx aarch64_expand_sve_dupq (rtx, machine_mode, rtx);
 void aarch64_expand_mov_immediate (rtx, rtx);
 rtx aarch64_stack_protect_canary_mem (machine_mode, rtx, aarch64_salt_type);
@@ -1026,12 +1029,19 @@ rtx aarch64_ptrue_reg (machine_mode, unsigned int);
 rtx aarch64_ptrue_reg (machine_mode, machine_mode);
 rtx aarch64_pfalse_reg (machine_mode);
 bool aarch64_sve_same_pred_for_ptest_p (rtx *, rtx *);
+rtx aarch64_sve_packed_pred (machine_mode);
+rtx aarch64_sve_fp_pred (machine_mode, rtx *);
+rtx aarch64_sve_emit_masked_fp_pred (machine_mode, rtx);
+void aarch64_emit_load_store_through_mode (rtx, rtx, machine_mode);
+bool aarch64_expand_maskloadstore (rtx *, machine_mode);
 void aarch64_emit_sve_pred_move (rtx, rtx, rtx);
 void aarch64_expand_sve_mem_move (rtx, rtx, machine_mode);
 bool aarch64_maybe_expand_sve_subreg_move (rtx, rtx);
 rtx aarch64_replace_reg_mode (rtx, machine_mode);
 void aarch64_split_sve_subreg_move (rtx, rtx, rtx);
+void aarch64_emit_sve_pred_vec_duplicate (machine_mode, rtx, rtx);
 void aarch64_expand_prologue (void);
+void aarch64_decompose_vec_struct_index (machine_mode, rtx *, rtx *, bool);
 void aarch64_expand_vector_init (rtx, rtx);
 void aarch64_sve_expand_vector_init_subvector (rtx, rtx);
 void aarch64_sve_expand_vector_init (rtx, rtx);
@@ -1053,6 +1063,7 @@ void aarch64_subvti_scratch_regs (rtx, rtx, rtx *,
 				  rtx *, rtx *, rtx *);
 void aarch64_expand_subvti (rtx, rtx, rtx,
 			    rtx, rtx, rtx, rtx, bool);
+int aarch64_exact_log2_inverse (unsigned int, rtx);
 
 
 /* Initialize builtins for SIMD intrinsics.  */
@@ -1087,6 +1098,7 @@ bool aarch64_legitimate_address_p (machine_mode, rtx, bool,
 				   aarch64_addr_query_type = ADDR_QUERY_M);
 machine_mode aarch64_select_cc_mode (RTX_CODE, rtx, rtx);
 rtx aarch64_gen_compare_reg (RTX_CODE, rtx, rtx);
+rtx aarch64_gen_compare_split_imm24 (rtx, rtx, rtx);
 bool aarch64_maxmin_plus_const (rtx_code, rtx *, bool);
 rtx aarch64_load_tp (rtx);
 
@@ -1126,6 +1138,8 @@ bool aarch64_general_check_builtin_call (location_t, vec<location_t>,
 					 unsigned int, tree, unsigned int,
 					 tree *);
 
+bool aarch64_cb_rhs (rtx_code op_code, rtx rhs);
+
 namespace aarch64 {
   void report_non_ice (location_t, tree, unsigned int);
   void report_out_of_range (location_t, tree, unsigned int, HOST_WIDE_INT,
@@ -1223,7 +1237,6 @@ rtl_opt_pass *make_pass_fma_steering (gcc::context *);
 rtl_opt_pass *make_pass_track_speculation (gcc::context *);
 rtl_opt_pass *make_pass_late_track_speculation (gcc::context *);
 rtl_opt_pass *make_pass_insert_bti (gcc::context *ctxt);
-rtl_opt_pass *make_pass_cc_fusion (gcc::context *ctxt);
 rtl_opt_pass *make_pass_switch_pstate_sm (gcc::context *ctxt);
 rtl_opt_pass *make_pass_ldp_fusion (gcc::context *);
 
@@ -1260,10 +1273,15 @@ void aarch64_restore_za (rtx);
 void aarch64_expand_crc_using_pmull (scalar_mode, scalar_mode, rtx *);
 void aarch64_expand_reversed_crc_using_pmull (scalar_mode, scalar_mode, rtx *);
 
+void aarch64_expand_fp_spaceship (rtx, rtx, rtx, rtx);
 
+extern bool aarch64_pacret_enabled ();
 extern bool aarch64_gcs_enabled ();
 
 extern unsigned aarch64_data_alignment (const_tree exp, unsigned align);
 extern unsigned aarch64_stack_alignment (const_tree exp, unsigned align);
 
+extern rtx aarch64_gen_compare_zero_and_branch (rtx_code code, rtx x,
+						rtx_code_label *label);
+
 #endif /* GCC_AARCH64_PROTOS_H */
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 6cc45b1..685bf0d 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -57,7 +57,7 @@
   VAR1 (BINOPP, pmull, 0, DEFAULT, v8qi)
   VAR1 (BINOPP, pmull_hi, 0, DEFAULT, v16qi)
   BUILTIN_VHSDF_HSDF (BINOP, fmulx, 0, FP)
-  BUILTIN_VHSDF_DF (UNOP, sqrt, 2, FP)
+  BUILTIN_VHSDF_HSDF (UNOP, sqrt, 2, FP)
   BUILTIN_VDQ_I (BINOP, addp, 0, DEFAULT)
   BUILTIN_VDQ_I (BINOPU, addp, 0, DEFAULT)
   BUILTIN_VDQ_BHSI (UNOP, clrsb, 2, DEFAULT)
@@ -848,9 +848,6 @@
   BUILTIN_VHSDF_HSDF (BINOP_USS, facgt, 0, FP)
   BUILTIN_VHSDF_HSDF (BINOP_USS, facge, 0, FP)
 
-  /* Implemented by sqrt<mode>2.  */
-  VAR1 (UNOP, sqrt, 2, FP, hf)
-
   /* Implemented by <optab><mode>hf2.  */
   VAR1 (UNOP, floatdi, 2, FP, hf)
   VAR1 (UNOP, floatsi, 2, FP, hf)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index e2afe87..8b75c3d 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1117,17 +1117,17 @@
   [(set_attr "type" "neon_fp_abd_<stype><q>")]
 )
 
-;; For AND (vector, register) and BIC (vector, immediate)
+;; For AND (vector, register), BIC (vector, immediate) and FMOV (register)
 (define_insn "and<mode>3<vczle><vczbe>"
   [(set (match_operand:VDQ_I 0 "register_operand")
 	(and:VDQ_I (match_operand:VDQ_I 1 "register_operand")
 		   (match_operand:VDQ_I 2 "aarch64_reg_or_and_imm")))]
   "TARGET_SIMD"
-  {@ [ cons: =0 , 1 , 2   ]
-     [ w        , w , w   ] and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
-     [ w        , 0 , Db  ] << aarch64_output_simd_and_imm (operands[2], <bitsize>);
+  {@ [ cons: =0 , 1 , 2  ; attrs: type   ]
+     [ w        , w , w  ; neon_logic<q> ] and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
+     [ w        , w , Df ; fmov          ] << aarch64_output_fmov (operands[2]);
+     [ w        , 0 , Db ; neon_logic<q> ] << aarch64_output_simd_and_imm (operands[2], <bitsize>);
   }
-  [(set_attr "type" "neon_logic<q>")]
 )
 
 ;; For ORR (vector, register) and ORR (vector, immediate)
@@ -1190,15 +1190,20 @@
   [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
 )
 
+;; Inserting from the zero register into a vector lane is treated as an
+;; expensive GP->FP move on all CPUs.  Avoid it when optimizing for speed.
 (define_insn "aarch64_simd_vec_set_zero<mode>"
   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
 	(vec_merge:VALL_F16
-	    (match_operand:VALL_F16 1 "aarch64_simd_imm_zero" "")
-	    (match_operand:VALL_F16 3 "register_operand" "0")
+	    (match_operand:VALL_F16 1 "register_operand" "0")
+	    (match_operand:VALL_F16 3 "aarch64_simd_imm_zero" "")
 	    (match_operand:SI 2 "immediate_operand" "i")))]
-  "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
+  "TARGET_SIMD && aarch64_exact_log2_inverse (<nunits>, operands[2]) >= 0
+   && optimize_function_for_size_p (cfun)"
   {
-    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
+    int elt = ENDIAN_LANE_N (<nunits>,
+			     aarch64_exact_log2_inverse (<nunits>,
+							 operands[2]));
     operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
     return "ins\\t%0.<Vetype>[%p2], <vwcore>zr";
   }
@@ -1626,6 +1631,24 @@
   }
 )
 
+(define_expand "vec_set<mode>"
+  [(match_operand:VSTRUCT_QD 0 "register_operand")
+   (match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand")
+   (match_operand:SI 2 "immediate_operand")]
+  "TARGET_SIMD"
+{
+  aarch64_decompose_vec_struct_index (<VSTRUCT_ELT>mode, &operands[0],
+				      &operands[2], true);
+  /* For tuples of 64-bit modes, <vstruct_elt> is the 64-bit scalar mode.
+     Allow gen_vec_set<vstruct_elt> to cope with those cases too.  */
+  auto gen_vec_setdi ATTRIBUTE_UNUSED = [](rtx x0, rtx x1, rtx)
+    {
+      return gen_move_insn (x0, x1);
+    };
+  auto gen_vec_setdf ATTRIBUTE_UNUSED = gen_vec_setdi;
+  emit_insn (gen_vec_set<vstruct_elt> (operands[0], operands[1], operands[2]));
+  DONE;
+})
 
 (define_insn "aarch64_mla<mode><vczle><vczbe>"
  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
@@ -3946,7 +3969,7 @@
 
   rtx cc_reg = aarch64_gen_compare_reg (code, val, const0_rtx);
   rtx cmp_rtx = gen_rtx_fmt_ee (code, DImode, cc_reg, const0_rtx);
-  emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, operands[3]));
+  emit_jump_insn (gen_aarch64_bcond (cmp_rtx, cc_reg, operands[3]));
   DONE;
 })
 
@@ -5026,6 +5049,36 @@
   DONE;
 })
 
+;; convert (truncate)(~x >> imm) into (truncate)(((u16)-1 - x) >> imm)
+;; because it will result in the 'not' being replaced with a constant load
+;; which allows for better loop optimization.
+;; We limit this to truncations that take the upper half and shift it to the
+;; lower half as we use subhn (patterns that would have generated an shrn
+;; otherwise).
+;; On some implementations the use of subhn also result in better throughput.
+(define_insn_and_split "*shrn_to_subhn_<mode>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=&w")
+	(truncate:<VNARROWQ>
+	  (lshiftrt:VQN
+	    (not:VQN (match_operand:VQN 1 "register_operand" "w"))
+	    (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_exact_top"))))]
+  "TARGET_SIMD"
+  "#"
+  "&& true"
+  [(const_int 0)]
+{
+  rtx tmp;
+  if (can_create_pseudo_p ())
+    tmp = gen_reg_rtx (<MODE>mode);
+  else
+    tmp = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
+  emit_move_insn (tmp, CONSTM1_RTX (<MODE>mode));
+  emit_insn (gen_aarch64_subhn<mode>_insn (operands[0], tmp,
+					   operands[1], operands[2]));
+  DONE;
+})
+
+
 ;; pmul.
 
 (define_insn "aarch64_pmul<mode>"
@@ -8881,6 +8934,26 @@
     DONE;
 })
 
+(define_expand "vec_extract<mode><Vel>"
+  [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
+   (match_operand:VSTRUCT_QD 1 "register_operand")
+   (match_operand:SI 2 "immediate_operand")]
+  "TARGET_SIMD"
+{
+  aarch64_decompose_vec_struct_index (<VSTRUCT_ELT>mode, &operands[1],
+				      &operands[2], false);
+  /* For tuples of 64-bit modes, <vstruct_elt> is the 64-bit scalar mode.
+     Allow gen_vec_extract<vstruct_elt><Vel> to cope with those cases too.  */
+  auto gen_vec_extractdidi ATTRIBUTE_UNUSED = [](rtx x0, rtx x1, rtx)
+    {
+      return gen_move_insn (x0, x1);
+    };
+  auto gen_vec_extractdfdf ATTRIBUTE_UNUSED = gen_vec_extractdidi;
+  emit_insn (gen_vec_extract<vstruct_elt><Vel> (operands[0], operands[1],
+						operands[2]));
+  DONE;
+})
+
 ;; Extract a 64-bit vector from one half of a 128-bit vector.
 (define_expand "vec_extract<mode><Vhalf>"
   [(match_operand:<VHALF> 0 "register_operand")
@@ -9110,12 +9183,12 @@
 ;; sha3
 
 (define_insn "eor3q<mode>4"
-  [(set (match_operand:VQ_I 0 "register_operand" "=w")
-	(xor:VQ_I
-	 (xor:VQ_I
-	  (match_operand:VQ_I 2 "register_operand" "w")
-	  (match_operand:VQ_I 3 "register_operand" "w"))
-	 (match_operand:VQ_I 1 "register_operand" "w")))]
+  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
+	(xor:VDQ_I
+	 (xor:VDQ_I
+	  (match_operand:VDQ_I 2 "register_operand" "w")
+	  (match_operand:VDQ_I 3 "register_operand" "w"))
+	 (match_operand:VDQ_I 1 "register_operand" "w")))]
   "TARGET_SHA3"
   "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
   [(set_attr "type" "crypto_sha3")]
@@ -9171,17 +9244,46 @@
 )
 
 (define_insn "bcaxq<mode>4"
-  [(set (match_operand:VQ_I 0 "register_operand" "=w")
-	(xor:VQ_I
-	 (and:VQ_I
-	  (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
-	  (match_operand:VQ_I 2 "register_operand" "w"))
-	 (match_operand:VQ_I 1 "register_operand" "w")))]
+  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
+	(xor:VDQ_I
+	 (and:VDQ_I
+	  (not:VDQ_I (match_operand:VDQ_I 3 "register_operand" "w"))
+	  (match_operand:VDQ_I 2 "register_operand" "w"))
+	 (match_operand:VDQ_I 1 "register_operand" "w")))]
   "TARGET_SHA3"
   "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
   [(set_attr "type" "crypto_sha3")]
 )
 
+(define_insn_and_split "*bcaxqdi4"
+  [(set (match_operand:DI 0 "register_operand")
+	(xor:DI
+	  (and:DI
+	    (not:DI (match_operand:DI 3 "register_operand"))
+	    (match_operand:DI 2 "register_operand"))
+	  (match_operand:DI 1 "register_operand")))]
+  "TARGET_SHA3"
+  {@ [ cons: =0, 1, 2 , 3  ; attrs: type ]
+     [ w       , w, w , w  ; crypto_sha3 ] bcax\t%0.16b, %1.16b, %2.16b, %3.16b
+     [ &r      , r, r0, r0 ; multiple    ] #
+  }
+  "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
+  [(set (match_dup 4)
+	(and:DI (not:DI (match_dup 3))
+		(match_dup 2)))
+   (set (match_dup 0)
+	(xor:DI (match_dup 4)
+		(match_dup 1)))]
+  {
+    if (reload_completed)
+      operands[4] = operands[0];
+    else if (can_create_pseudo_p ())
+      operands[4] = gen_reg_rtx (DImode);
+    else
+      FAIL;
+  }
+)
+
 ;; SM3
 
 (define_insn "aarch64_sm3ss1qv4si"
diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md
index c49affd..0123ea0 100644
--- a/gcc/config/aarch64/aarch64-sme.md
+++ b/gcc/config/aarch64/aarch64-sme.md
@@ -38,6 +38,7 @@
 ;; ---- Binary arithmetic on ZA tile
 ;; ---- Binary arithmetic on ZA slice
 ;; ---- Binary arithmetic, writing to ZA slice
+;; ---- Absolute minimum/maximum
 ;;
 ;; == Ternary arithmetic
 ;; ---- [INT] Dot product
@@ -61,6 +62,10 @@
 ;; (b) they are sometimes used conditionally, particularly in streaming-
 ;; compatible code.
 ;;
+;; To prevent the latter from upsetting the assembler, we emit the literal
+;; encodings of "SMSTART SM" and "SMSTOP SM" when compiling without
+;; TARGET_SME.
+;;
 ;; =========================================================================
 
 ;; -------------------------------------------------------------------------
@@ -160,7 +165,9 @@
    (clobber (reg:VNx16BI P14_REGNUM))
    (clobber (reg:VNx16BI P15_REGNUM))]
   ""
-  "smstart\tsm"
+  {
+    return TARGET_SME ? "smstart\tsm" : ".inst 0xd503437f // smstart sm";
+  }
 )
 
 ;; Turn off streaming mode.  This clobbers all SVE state.
@@ -195,7 +202,9 @@
    (clobber (reg:VNx16BI P14_REGNUM))
    (clobber (reg:VNx16BI P15_REGNUM))]
   ""
-  "smstop\tsm"
+  {
+    return TARGET_SME ? "smstop\tsm" : ".inst 0xd503427f // smstop sm";
+  }
 )
 
 ;; -------------------------------------------------------------------------
@@ -373,6 +382,8 @@
 		    (reg:DI SME_STATE_REGNUM)
 		    (reg:DI TPIDR2_SETUP_REGNUM)
 		    (reg:DI ZA_SAVED_REGNUM)] UNSPEC_RESTORE_ZA))
+   (set (reg:DI SME_STATE_REGNUM)
+	(unspec:DI [(reg:DI SME_STATE_REGNUM)] UNSPEC_TPIDR2_RESTORE))
    (clobber (reg:DI R0_REGNUM))
    (clobber (reg:DI R14_REGNUM))
    (clobber (reg:DI R15_REGNUM))
@@ -389,7 +400,8 @@
     auto label = gen_label_rtx ();
     auto tpidr2 = gen_rtx_REG (DImode, R16_REGNUM);
     emit_insn (gen_aarch64_read_tpidr2 (tpidr2));
-    auto jump = emit_likely_jump_insn (gen_aarch64_cbnedi1 (tpidr2, label));
+    auto pat = aarch64_gen_compare_zero_and_branch (NE, tpidr2, label);
+    auto jump = emit_likely_jump_insn (pat);
     JUMP_LABEL (jump) = label;
 
     aarch64_restore_za (operands[0]);
@@ -1262,6 +1274,23 @@
   "<sme_int_op>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<Vetype>"
 )
 
+;; -------------------------------------------------------------------------
+;; ---- Absolute minimum/maximum
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FAMIN (SME2+FAMINMAX)
+;; - FAMAX (SME2+FAMINMAX)
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sme_<faminmax_uns_op><mode>"
+  [(set (match_operand:SVE_Fx24 0 "register_operand" "=Uw<vector_count>")
+	(unspec:SVE_Fx24 [(match_operand:SVE_Fx24 1 "register_operand" "%0")
+			  (match_operand:SVE_Fx24 2 "register_operand" "Uw<vector_count>")]
+	 FAMINMAX_UNS))]
+  "TARGET_STREAMING_SME2 && TARGET_FAMINMAX"
+  "<faminmax_uns_op>\t%0, %1, %2"
+)
+
 ;; =========================================================================
 ;; == Ternary arithmetic
 ;; =========================================================================
diff --git a/gcc/config/aarch64/aarch64-speculation.cc b/gcc/config/aarch64/aarch64-speculation.cc
index 1c7be1a..618045a 100644
--- a/gcc/config/aarch64/aarch64-speculation.cc
+++ b/gcc/config/aarch64/aarch64-speculation.cc
@@ -160,9 +160,7 @@ aarch64_speculation_clobber_sp ()
   emit_insn (gen_rtx_SET (scratch, sp));
   emit_insn (gen_anddi3 (scratch, scratch, tracker));
   emit_insn (gen_rtx_SET (sp, scratch));
-  rtx_insn *seq = get_insns ();
-  end_sequence ();
-  return seq;
+  return end_sequence ();
 }
 
 /* Generate a code sequence to establish the tracker variable from the
@@ -176,9 +174,7 @@ aarch64_speculation_establish_tracker ()
   rtx cc = aarch64_gen_compare_reg (EQ, sp, const0_rtx);
   emit_insn (gen_cstoredi_neg (tracker,
 			       gen_rtx_NE (CCmode, cc, const0_rtx), cc));
-  rtx_insn *seq = get_insns ();
-  end_sequence ();
-  return seq;
+  return end_sequence ();
 }
 
 /* Main speculation tracking pass.  */
@@ -405,8 +401,7 @@ aarch64_do_track_speculation ()
 		    {
 		      start_sequence ();
 		      emit_insn (seq);
-		      seq = get_insns ();
-		      end_sequence ();
+		      seq = end_sequence ();
 		    }
 
 		  for (rtx_insn *list = seq; list; list = NEXT_INSN (list))
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index b439683..ecc0687 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -214,7 +214,8 @@ public:
   expand (function_expander &e) const override
   {
     e.add_ptrue_hint (0, e.gp_mode (0));
-    insn_code icode = code_for_aarch64_pred_fac (m_unspec, e.vector_mode (0));
+    insn_code icode = code_for_aarch64_pred_fac_acle (m_unspec,
+						      e.vector_mode (0));
     return e.use_exact_insn (icode);
   }
 
@@ -497,10 +498,10 @@ public:
       {
 	bool unsigned_p = e.type_suffix (0).unsigned_p;
 	rtx_code code = get_rtx_code (m_code, unsigned_p);
-	return e.use_exact_insn (code_for_aarch64_pred_cmp (code, mode));
+	return e.use_exact_insn (code_for_aarch64_pred_cmp_acle (code, mode));
       }
 
-    insn_code icode = code_for_aarch64_pred_fcm (m_unspec_for_fp, mode);
+    insn_code icode = code_for_aarch64_pred_fcm_acle (m_unspec_for_fp, mode);
     return e.use_exact_insn (icode);
   }
 
@@ -542,7 +543,7 @@ public:
 
     /* If the argument is a constant that the unwidened comparisons
        can handle directly, use them instead.  */
-    insn_code icode = code_for_aarch64_pred_cmp (code, mode);
+    insn_code icode = code_for_aarch64_pred_cmp_acle (code, mode);
     rtx op2 = unwrap_const_vec_duplicate (e.args[3]);
     if (CONSTANT_P (op2)
 	&& insn_data[icode].operand[4].predicate (op2, DImode))
@@ -581,7 +582,8 @@ public:
   expand (function_expander &e) const override
   {
     e.add_ptrue_hint (0, e.gp_mode (0));
-    return e.use_exact_insn (code_for_aarch64_pred_fcmuo (e.vector_mode (0)));
+    auto mode = e.vector_mode (0);
+    return e.use_exact_insn (code_for_aarch64_pred_fcmuo_acle (mode));
   }
 };
 
@@ -1048,6 +1050,23 @@ public:
   rtx
   expand (function_expander &e) const override
   {
+    machine_mode mode = e.vector_mode (0);
+    if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
+      {
+	gcc_assert (e.pred == PRED_none);
+
+	rtx src = e.args[0];
+	if (GET_CODE (src) == CONST_INT)
+	  return (src == const0_rtx
+		  ? CONST0_RTX (VNx16BImode)
+		  : aarch64_ptrue_all (e.type_suffix (0).element_bytes));
+
+	rtx dest = e.get_reg_target ();
+	src = force_reg (GET_MODE (src), src);
+	aarch64_emit_sve_pred_vec_duplicate (mode, dest, src);
+	return dest;
+      }
+
     if (e.pred == PRED_none || e.pred == PRED_x)
       /* There's no benefit to using predicated instructions for _x here.  */
       return e.use_unpred_insn (e.direct_optab_handler (vec_duplicate_optab));
@@ -1056,7 +1075,6 @@ public:
        the duplicate of the function argument and the "false" value
        is the value of inactive lanes.  */
     insn_code icode;
-    machine_mode mode = e.vector_mode (0);
     if (valid_for_const_vector_p (GET_MODE_INNER (mode), e.args.last ()))
       /* Duplicate the constant to fill a vector.  The pattern optimizes
 	 various cases involving constant operands, falling back to SEL
@@ -1197,8 +1215,7 @@ public:
     if (mode != e.vector_mode (0))
       {
 	rtx data_dupq = aarch64_expand_sve_dupq (NULL, mode, vq_reg);
-	return aarch64_convert_sve_data_to_pred (e.possible_target,
-						 e.vector_mode (0), data_dupq);
+	return aarch64_convert_sve_data_to_pred (e.possible_target, data_dupq);
       }
 
     return aarch64_expand_sve_dupq (e.possible_target, mode, vq_reg);
@@ -1259,9 +1276,10 @@ public:
 	index = target;
       }
 
-    e.args[0] = gen_lowpart (VNx2DImode, e.args[0]);
+    e.args[0] = aarch64_sve_reinterpret (VNx2DImode, e.args[0]);
     e.args[1] = index;
-    return e.use_exact_insn (CODE_FOR_aarch64_sve_tblvnx2di);
+    rtx res = e.use_exact_insn (CODE_FOR_aarch64_sve_tblvnx2di);
+    return aarch64_sve_reinterpret (mode, res);
   }
 };
 
@@ -2857,7 +2875,10 @@ public:
   rtx
   expand (function_expander &e) const override
   {
-    return e.use_exact_insn (code_for_aarch64_sve_rev (e.vector_mode (0)));
+    auto mode = e.vector_mode (0);
+    return e.use_exact_insn (e.type_suffix (0).bool_p
+			     ? code_for_aarch64_sve_rev_acle (mode)
+			     : code_for_aarch64_sve_rev (mode));
   }
 };
 
@@ -3248,7 +3269,7 @@ public:
     unsigned int unpacks = m_high_p ? UNSPEC_UNPACKSHI : UNSPEC_UNPACKSLO;
     insn_code icode;
     if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
-      icode = code_for_aarch64_sve_punpk (unpacku, mode);
+      icode = code_for_aarch64_sve_punpk_acle (unpacku);
     else
       {
 	int unspec = e.type_suffix (0).unsigned_p ? unpacku : unpacks;
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-functions.h b/gcc/config/aarch64/aarch64-sve-builtins-functions.h
index 6f1c694..c05946d 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-functions.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-functions.h
@@ -630,7 +630,10 @@ public:
   rtx
   expand (function_expander &e) const override
   {
-    insn_code icode = code_for_aarch64_sve (m_unspec, e.vector_mode (0));
+    auto mode = e.vector_mode (0);
+    insn_code icode = (e.type_suffix (0).bool_p
+		       ? code_for_aarch64_sve_acle (m_unspec, mode)
+		       : code_for_aarch64_sve (m_unspec, mode));
     return e.use_exact_insn (icode);
   }
 
@@ -838,7 +841,8 @@ public:
 
     machine_mode pred_mode = e.vector_mode (0);
     scalar_mode reg_mode = GET_MODE_INNER (e.vector_mode (1));
-    return e.use_exact_insn (code_for_while (unspec, reg_mode, pred_mode));
+    auto icode = code_for_aarch64_sve_while_acle (unspec, reg_mode, pred_mode);
+    return e.use_exact_insn (icode);
   }
 
   /* The unspec codes associated with signed and unsigned operations
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.def b/gcc/config/aarch64/aarch64-sve-builtins-sme.def
index f75c0a5..117b70e 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sme.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.def
@@ -92,6 +92,12 @@ DEF_SME_FUNCTION (svstr_zt, str_zt, none, none)
 DEF_SME_FUNCTION (svzero_zt, inherent_zt, none, none)
 #undef REQUIRED_EXTENSIONS
 
+#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME2 \
+					    | AARCH64_FL_FAMINMAX)
+DEF_SME_FUNCTION_GS (svamin, binary_opt_single_n, all_float, x24, none)
+DEF_SME_FUNCTION_GS (svamax, binary_opt_single_n, all_float, x24, none)
+#undef REQUIRED_EXTENSIONS
+
 /* The d_za entries in this section just declare C _za64 overloads,
    which will then be resolved to either an integer function or a
    floating-point function.  They are needed because the integer and
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
index d9922de..95c5ed8 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
@@ -316,7 +316,8 @@ public:
   expand (function_expander &e) const override
   {
     e.prepare_gather_address_operands (1, false);
-    return e.use_exact_insn (CODE_FOR_aarch64_gather_ld1q);
+    auto icode = code_for_aarch64_gather_ld1q (e.tuple_mode (0));
+    return e.use_exact_insn (icode);
   }
 };
 
@@ -722,7 +723,7 @@ public:
   expand (function_expander &e) const override
   {
     rtx data = e.args.last ();
-    e.args.last () = force_lowpart_subreg (VNx2DImode, data, GET_MODE (data));
+    e.args.last () = aarch64_sve_reinterpret (VNx2DImode, data);
     e.prepare_gather_address_operands (1, false);
     return e.use_exact_insn (CODE_FOR_aarch64_scatter_st1q);
   }
@@ -880,7 +881,9 @@ public:
   {
     for (unsigned int i = 0; i < 2; ++i)
       e.args[i] = e.convert_to_pmode (e.args[i]);
-    return e.use_exact_insn (code_for_while (m_unspec, Pmode, e.gp_mode (0)));
+    auto icode = code_for_aarch64_sve_while_acle (m_unspec, Pmode,
+						  e.gp_mode (0));
+    return e.use_exact_insn (icode);
   }
 
   int m_unspec;
@@ -929,6 +932,44 @@ public:
   unsigned int m_bits;
 };
 
+/* The same as cond_or_uncond_unspec_function but the intrinsics with vector
+   modes are SME2 extensions instead of SVE.  */
+class faminmaximpl : public function_base
+{
+public:
+  CONSTEXPR faminmaximpl (int cond_unspec, int uncond_unspec)
+    : m_cond_unspec (cond_unspec), m_uncond_unspec (uncond_unspec)
+    {}
+
+  rtx
+  expand (function_expander &e) const override
+  {
+    if (e.group_suffix ().vectors_per_tuple > 1)
+      {
+	/* SME2+faminmax intrinsics.  */
+	gcc_assert (e.pred == PRED_none);
+	auto mode = e.tuple_mode (0);
+	auto icode = (code_for_aarch64_sme (m_uncond_unspec, mode));
+	return e.use_exact_insn (icode);
+      }
+    /* SVE+faminmax intrinsics.  */
+    else if (e.pred == PRED_none)
+      {
+	auto mode = e.tuple_mode (0);
+	auto icode = (e.mode_suffix_id == MODE_single
+		      ? code_for_aarch64_sve_single (m_uncond_unspec, mode)
+		      : code_for_aarch64_sve (m_uncond_unspec, mode));
+	return e.use_exact_insn (icode);
+      }
+    return e.map_to_unspecs (m_cond_unspec, m_cond_unspec, m_cond_unspec);
+  }
+
+  /* The unspecs for the conditional and unconditional instructions,
+     respectively.  */
+  int m_cond_unspec;
+  int m_uncond_unspec;
+};
+
 } /* end anonymous namespace */
 
 namespace aarch64_sve {
@@ -957,10 +998,8 @@ FUNCTION (svaesd, fixed_insn_function, (CODE_FOR_aarch64_sve2_aesd))
 FUNCTION (svaese, fixed_insn_function, (CODE_FOR_aarch64_sve2_aese))
 FUNCTION (svaesimc, fixed_insn_function, (CODE_FOR_aarch64_sve2_aesimc))
 FUNCTION (svaesmc, fixed_insn_function, (CODE_FOR_aarch64_sve2_aesmc))
-FUNCTION (svamax, cond_or_uncond_unspec_function,
-	  (UNSPEC_COND_FAMAX, UNSPEC_FAMAX))
-FUNCTION (svamin, cond_or_uncond_unspec_function,
-	  (UNSPEC_COND_FAMIN, UNSPEC_FAMIN))
+FUNCTION (svamax, faminmaximpl, (UNSPEC_COND_FAMAX, UNSPEC_FAMAX))
+FUNCTION (svamin, faminmaximpl, (UNSPEC_COND_FAMIN, UNSPEC_FAMIN))
 FUNCTION (svandqv, reduction, (UNSPEC_ANDQV, UNSPEC_ANDQV, -1))
 FUNCTION (svbcax, CODE_FOR_MODE0 (aarch64_sve2_bcax),)
 FUNCTION (svbdep, unspec_based_function, (UNSPEC_BDEP, UNSPEC_BDEP, -1))
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index 3651926..1764cf8 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -47,6 +47,8 @@
 #include "langhooks.h"
 #include "stringpool.h"
 #include "attribs.h"
+#include "value-range.h"
+#include "tree-ssanames.h"
 #include "aarch64-sve-builtins.h"
 #include "aarch64-sve-builtins-base.h"
 #include "aarch64-sve-builtins-sve2.h"
@@ -3664,7 +3666,8 @@ gimple_folder::fold_pfalse ()
 /* Convert the lhs and all non-boolean vector-type operands to TYPE.
    Pass the converted variables to the callback FP, and finally convert the
    result back to the original type. Add the necessary conversion statements.
-   Return the new call.  */
+   Return the new call. Note the tree argument to the callback FP, can only
+   be set once; it will always be a SSA_NAME.  */
 gimple *
 gimple_folder::convert_and_fold (tree type,
 				 gimple *(*fp) (gimple_folder &,
@@ -3675,7 +3678,7 @@ gimple_folder::convert_and_fold (tree type,
   tree old_ty = TREE_TYPE (lhs);
   gimple_seq stmts = NULL;
   bool convert_lhs_p = !useless_type_conversion_p (type, old_ty);
-  tree lhs_conv = convert_lhs_p ? create_tmp_var (type) : lhs;
+  tree lhs_conv = convert_lhs_p ? make_ssa_name (type) : lhs;
   unsigned int num_args = gimple_call_num_args (call);
   auto_vec<tree, 16> args_conv;
   args_conv.safe_grow (num_args);
@@ -4001,7 +4004,8 @@ rtx
 function_expander::get_reg_target ()
 {
   machine_mode target_mode = result_mode ();
-  if (!possible_target || GET_MODE (possible_target) != target_mode)
+  if (!possible_target
+      || !register_operand (possible_target, target_mode))
     possible_target = gen_reg_rtx (target_mode);
   return possible_target;
 }
@@ -4586,10 +4590,31 @@ function_expander::expand ()
     {
       /* The last element of these functions is always an fpm_t that must be
          written to FPMR before the call to the instruction itself. */
-      gcc_assert (args.last ()->mode == DImode);
-      emit_move_insn (gen_rtx_REG (DImode, FPM_REGNUM), args.last ());
+      rtx fpm = args.last ();
+      gcc_assert (CONST_INT_P (fpm) || GET_MODE (fpm) == DImode);
+      emit_move_insn (gen_rtx_REG (DImode, FPM_REGNUM), fpm);
     }
-  return base->expand (*this);
+  rtx result = base->expand (*this);
+  if (function_returns_void_p ())
+    gcc_assert (result == const0_rtx);
+  else
+    {
+      auto expected_mode = result_mode ();
+      if (GET_MODE_CLASS (expected_mode) == MODE_INT)
+	/* Scalar integer constants don't store a mode.
+
+	   It's OK for a variable result to have a different mode from the
+	   function return type.  In particular, some functions that return int
+	   expand into instructions that have a DImode result, with all 64 bits
+	   of the DImode being well-defined (usually zero).  */
+	gcc_assert (CONST_SCALAR_INT_P (result)
+		    || GET_MODE_CLASS (GET_MODE (result)) == MODE_INT);
+      else
+	/* In other cases, the return value should have the same mode
+	   as the return type.  */
+	gcc_assert (GET_MODE (result) == expected_mode);
+    }
+  return result;
 }
 
 /* Return a structure type that contains a single field of type FIELD_TYPE.
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index d4af370..51e2d7d 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -154,8 +154,10 @@
 ;; ---- [FP<-INT] Packs
 ;; ---- [FP<-INT] Unpacks
 ;; ---- [FP<-FP] Packs
+;; ---- [FP<-FP] Truncating conversions
 ;; ---- [FP<-FP] Packs (bfloat16)
 ;; ---- [FP<-FP] Unpacks
+;; ---- [FP<-FP] Extending conversions
 ;; ---- [PRED<-PRED] Packs
 ;; ---- [PRED<-PRED] Unpacks
 ;;
@@ -702,6 +704,23 @@
   }
 )
 
+;; Fold predicated loads/stores with a PTRUE predicate to unpredicated
+;; loads/stores after RA.
+(define_insn_and_split "*aarch64_sve_ptrue<mode>_ldr_str"
+  [(set (match_operand:SVE_FULL 0 "aarch64_sve_nonimmediate_operand" "=Utr,w")
+	(unspec:SVE_FULL
+	  [(match_operand:<VPRED> 1 "aarch64_simd_imm_one")
+	   (match_operand:SVE_FULL 2 "aarch64_sve_nonimmediate_operand" "w,Utr")]
+	   UNSPEC_PRED_X))]
+  "TARGET_SVE && reload_completed
+   && (<MODE>mode == VNx16QImode || !BYTES_BIG_ENDIAN)
+   && ((REG_P (operands[0]) && MEM_P (operands[2]))
+       || (REG_P (operands[2]) && MEM_P (operands[0])))"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(match_dup 2))])
+
 ;; Unpredicated moves that cannot use LDR and STR, i.e. partial vectors
 ;; or vectors for which little-endian ordering isn't acceptable.  Memory
 ;; accesses require secondary reloads.
@@ -1286,7 +1305,24 @@
 ;; -------------------------------------------------------------------------
 
 ;; Predicated LD1 (single).
-(define_insn "maskload<mode><vpred>"
+(define_expand "maskload<mode><vpred>"
+  [(set (match_operand:SVE_ALL 0 "register_operand")
+	(unspec:SVE_ALL
+	  [(match_operand:<VPRED> 2 "nonmemory_operand")
+	   (match_operand:SVE_ALL 1 "memory_operand")
+	   (match_operand:SVE_ALL 3 "aarch64_maskload_else_operand")]
+	  UNSPEC_LD1_SVE))]
+  "TARGET_SVE"
+  {
+    if (aarch64_expand_maskloadstore (operands, <MODE>mode))
+      DONE;
+    if (CONSTANT_P (operands[2]))
+      operands[2] = force_reg (<VPRED>mode, operands[2]);
+  }
+)
+
+;; Predicated LD1 (single).
+(define_insn "*aarch64_maskload<mode><vpred>"
   [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
 	(unspec:SVE_ALL
 	  [(match_operand:<VPRED> 2 "register_operand" "Upl")
@@ -1308,7 +1344,7 @@
   "TARGET_SVE"
   {
     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
-    operands[3] = CONST0_RTX (<MODE>mode);
+    operands[3] = CONST0_RTX (<VSINGLE>mode);
   }
 )
 
@@ -1318,7 +1354,7 @@
 	(unspec:SVE_STRUCT
 	  [(match_operand:<VPRED> 2 "register_operand" "Upl")
 	   (match_operand:SVE_STRUCT 1 "memory_operand" "m")
-	   (match_operand 3 "aarch64_maskload_else_operand")]
+	   (match_operand:<VSINGLE> 3 "aarch64_maskload_else_operand")]
 	  UNSPEC_LDN))]
   "TARGET_SVE"
   "ld<vector_count><Vesize>\t%0, %2/z, %1"
@@ -1506,18 +1542,18 @@
 	  UNSPEC_LD1_GATHER))]
   "TARGET_SVE && TARGET_NON_STREAMING"
   {@ [cons: =0, 1, 2, 3, 4, 5  ]
-     [&w, Z,   w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s]
-     [?w, Z,   0, Ui1, Ui1, Upl] ^
-     [&w, vgw, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1]
-     [?w, vgw, 0, Ui1, Ui1, Upl] ^
-     [&w, rk,  w, Z,   Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
-     [?w, rk,  0, Z,   Ui1, Upl] ^
-     [&w, rk,  w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
-     [?w, rk,  0, Ui1, Ui1, Upl] ^
-     [&w, rk,  w, Z,   i,   Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
-     [?w, rk,  0, Z,   i,   Upl] ^
-     [&w, rk,  w, Ui1, i,   Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]
-     [?w, rk,  0, Ui1, i,   Upl] ^
+     [&w, Z,          w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s]
+     [?w, Z,          0, Ui1, Ui1, Upl] ^
+     [&w, vg<Vesize>, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1]
+     [?w, vg<Vesize>, 0, Ui1, Ui1, Upl] ^
+     [&w, rk,         w, Z,   Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
+     [?w, rk,         0, Z,   Ui1, Upl] ^
+     [&w, rk,         w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
+     [?w, rk,         0, Ui1, Ui1, Upl] ^
+     [&w, rk,         w, Z,   i,   Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
+     [?w, rk,         0, Z,   i,   Upl] ^
+     [&w, rk,         w, Ui1, i,   Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]
+     [?w, rk,         0, Ui1, i,   Upl] ^
   }
 )
 
@@ -1536,14 +1572,14 @@
 	  UNSPEC_LD1_GATHER))]
   "TARGET_SVE && TARGET_NON_STREAMING"
   {@ [cons: =0, 1, 2, 3, 4, 5]
-     [&w, Z,   w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d]
-     [?w, Z,   0, i, Ui1, Upl] ^
-     [&w, vgd, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1]
-     [?w, vgd, 0, i, Ui1, Upl] ^
-     [&w, rk,  w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d]
-     [?w, rk,  0, i, Ui1, Upl] ^
-     [&w, rk,  w, i, i,   Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]
-     [?w, rk,  0, i, i,   Upl] ^
+     [&w, Z,          w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d]
+     [?w, Z,          0, i, Ui1, Upl] ^
+     [&w, vg<Vesize>, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1]
+     [?w, vg<Vesize>, 0, i, Ui1, Upl] ^
+     [&w, rk,         w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d]
+     [?w, rk,         0, i, Ui1, Upl] ^
+     [&w, rk,         w, i, i,   Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]
+     [?w, rk,         0, i, i,   Upl] ^
   }
 )
 
@@ -2287,7 +2323,24 @@
 ;; -------------------------------------------------------------------------
 
 ;; Predicated ST1 (single).
-(define_insn "maskstore<mode><vpred>"
+(define_expand "maskstore<mode><vpred>"
+  [(set (match_operand:SVE_ALL 0 "memory_operand")
+	(unspec:SVE_ALL
+	  [(match_operand:<VPRED> 2 "nonmemory_operand")
+	   (match_operand:SVE_ALL 1 "register_operand")
+	   (match_dup 0)]
+	  UNSPEC_ST1_SVE))]
+  "TARGET_SVE"
+  {
+    if (aarch64_expand_maskloadstore (operands, <MODE>mode))
+      DONE;
+    if (CONSTANT_P (operands[2]))
+      operands[2] = force_reg (<VPRED>mode, operands[2]);
+  }
+)
+
+;; Predicated ST1 (single).
+(define_insn "*aarch64_maskstore<mode><vpred>"
   [(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
 	(unspec:SVE_ALL
 	  [(match_operand:<VPRED> 2 "register_operand" "Upl")
@@ -2435,13 +2488,13 @@
 	   (match_operand:SVE_4 4 "register_operand")]
 	  UNSPEC_ST1_SCATTER))]
   "TARGET_SVE && TARGET_NON_STREAMING"
-  {@ [ cons: 0 , 1 , 2   , 3   , 4 , 5    ]
-     [ Z       , w , Ui1 , Ui1 , w , Upl  ] st1<Vesize>\t%4.s, %5, [%1.s]
-     [ vgw     , w , Ui1 , Ui1 , w , Upl  ] st1<Vesize>\t%4.s, %5, [%1.s, #%0]
-     [ rk      , w , Z   , Ui1 , w , Upl  ] st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
-     [ rk      , w , Ui1 , Ui1 , w , Upl  ] st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
-     [ rk      , w , Z   , i   , w , Upl  ] st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
-     [ rk      , w , Ui1 , i   , w , Upl  ] st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]
+  {@ [ cons: 0    , 1 , 2   , 3   , 4 , 5    ]
+     [ Z          , w , Ui1 , Ui1 , w , Upl  ] st1<Vesize>\t%4.s, %5, [%1.s]
+     [ vg<Vesize> , w , Ui1 , Ui1 , w , Upl  ] st1<Vesize>\t%4.s, %5, [%1.s, #%0]
+     [ rk         , w , Z   , Ui1 , w , Upl  ] st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
+     [ rk         , w , Ui1 , Ui1 , w , Upl  ] st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
+     [ rk         , w , Z   , i   , w , Upl  ] st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
+     [ rk         , w , Ui1 , i   , w , Upl  ] st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]
   }
 )
 
@@ -2458,11 +2511,11 @@
 	   (match_operand:SVE_2 4 "register_operand")]
 	  UNSPEC_ST1_SCATTER))]
   "TARGET_SVE && TARGET_NON_STREAMING"
-  {@ [ cons: 0 , 1 , 3   , 4 , 5    ]
-     [ Z       , w , Ui1 , w , Upl  ] st1<Vesize>\t%4.d, %5, [%1.d]
-     [ vgd     , w , Ui1 , w , Upl  ] st1<Vesize>\t%4.d, %5, [%1.d, #%0]
-     [ rk      , w , Ui1 , w , Upl  ] st1<Vesize>\t%4.d, %5, [%0, %1.d]
-     [ rk      , w , i   , w , Upl  ] st1<Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]
+  {@ [ cons: 0    , 1 , 3   , 4 , 5    ]
+     [ Z          , w , Ui1 , w , Upl  ] st1<Vesize>\t%4.d, %5, [%1.d]
+     [ vg<Vesize> , w , Ui1 , w , Upl  ] st1<Vesize>\t%4.d, %5, [%1.d, #%0]
+     [ rk         , w , Ui1 , w , Upl  ] st1<Vesize>\t%4.d, %5, [%0, %1.d]
+     [ rk         , w , i   , w , Upl  ] st1<Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]
   }
 )
 
@@ -2937,10 +2990,7 @@
 	(vec_duplicate:PRED_ALL (match_operand:QI 1 "register_operand")))]
   "TARGET_SVE"
   {
-    rtx tmp = gen_reg_rtx (DImode);
-    rtx op1 = gen_lowpart (DImode, operands[1]);
-    emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode)));
-    emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp));
+    aarch64_emit_sve_pred_vec_duplicate (<MODE>mode, operands[0], operands[1]);
     DONE;
   }
 )
@@ -2969,10 +3019,11 @@
   {
     poly_int64 val;
     if (poly_int_rtx_p (operands[2], &val)
-	&& known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1))
+	&& known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1)
+	&& !val.is_constant ())
       {
-	/* The last element can be extracted with a LASTB and a false
-	   predicate.  */
+	/* For VLA, extract the last element with a LASTB and a false
+	   predicate. */
 	rtx sel = aarch64_pfalse_reg (<VPRED>mode);
 	emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
 	DONE;
@@ -3265,6 +3316,61 @@
 ;; - REVW
 ;; -------------------------------------------------------------------------
 
+(define_split
+  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
+	(rotate:SVE_FULL_HSDI
+	  (match_operand:SVE_FULL_HSDI 1 "register_operand")
+	  (match_operand:SVE_FULL_HSDI 2 "aarch64_constant_vector_operand")))]
+  "TARGET_SVE && can_create_pseudo_p ()"
+  [(set (match_dup 3)
+	(ashift:SVE_FULL_HSDI (match_dup 1)
+			      (match_dup 2)))
+   (set (match_dup 0)
+	(plus:SVE_FULL_HSDI
+	  (lshiftrt:SVE_FULL_HSDI (match_dup 1)
+				  (match_dup 4))
+	  (match_dup 3)))]
+  {
+    if (aarch64_emit_opt_vec_rotate (operands[0], operands[1], operands[2]))
+      DONE;
+
+    if (!TARGET_SVE2)
+      FAIL;
+
+    operands[3] = gen_reg_rtx (<MODE>mode);
+    HOST_WIDE_INT shift_amount =
+      INTVAL (unwrap_const_vec_duplicate (operands[2]));
+    int bitwidth = GET_MODE_UNIT_BITSIZE (<MODE>mode);
+    operands[4] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+						     bitwidth - shift_amount);
+  }
+)
+
+;; The RTL combiners are able to combine "ior (ashift, ashiftrt)" to a "bswap".
+;; Match that as well.
+(define_insn_and_split "*v_revvnx8hi"
+  [(parallel
+    [(set (match_operand:VNx8HI 0 "register_operand" "=w")
+	  (bswap:VNx8HI (match_operand 1 "register_operand" "w")))
+     (clobber (match_scratch:VNx8BI 2 "=Upl"))])]
+  "TARGET_SVE"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:VNx8HI
+	  [(match_dup 2)
+	   (unspec:VNx8HI
+	     [(match_dup 1)]
+	     UNSPEC_REVB)]
+	  UNSPEC_PRED_X))]
+  {
+    if (!can_create_pseudo_p ())
+      emit_move_insn (operands[2], CONSTM1_RTX (VNx8BImode));
+    else
+      operands[2] = aarch64_ptrue_reg (VNx8BImode);
+  }
+)
+
 ;; Predicated integer unary operations.
 (define_insn "@aarch64_pred_<optab><mode>"
   [(set (match_operand:SVE_FULL_I 0 "register_operand")
@@ -3643,9 +3749,9 @@
 
 ;; Unpredicated floating-point unary operations.
 (define_insn "@aarch64_sve_<optab><mode>"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
-	(unspec:SVE_FULL_F
-	  [(match_operand:SVE_FULL_F 1 "register_operand" "w")]
+  [(set (match_operand:SVE_F 0 "register_operand" "=w")
+	(unspec:SVE_F
+	  [(match_operand:SVE_F 1 "register_operand" "w")]
 	  SVE_FP_UNARY))]
   "TARGET_SVE"
   "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>"
@@ -3653,25 +3759,41 @@
 
 ;; Unpredicated floating-point unary operations.
 (define_expand "<optab><mode>2"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+	(unspec:SVE_F
 	  [(match_dup 2)
-	   (const_int SVE_RELAXED_GP)
-	   (match_operand:SVE_FULL_F 1 "register_operand")]
+	   (match_dup 3)
+	   (match_operand:SVE_F 1 "register_operand")]
 	  SVE_COND_FP_UNARY_OPTAB))]
   "TARGET_SVE"
   {
+    operands[2] = aarch64_sve_fp_pred (<MODE>mode, &operands[3]);
+  }
+)
+
+;; FABS and FNEG are non-trapping, so we can always expand with a <VPRED>
+;; predicate.  It doesn't matter whether the padding bits of a partial
+;; vector mode are active or inactive.
+(define_expand "<optab><mode>2"
+  [(set (match_operand:SVE_F 0 "register_operand")
+	(unspec:SVE_F
+	  [(match_dup 2)
+	   (const_int SVE_RELAXED_GP)
+	   (match_operand:SVE_F 1 "register_operand")]
+	  SVE_COND_FP_UNARY_BITWISE))]
+  "TARGET_SVE"
+  {
     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
   }
 )
 
 ;; Predicated floating-point unary operations.
 (define_insn "@aarch64_pred_<optab><mode>"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
-	  [(match_operand:<VPRED> 1 "register_operand")
+  [(set (match_operand:SVE_F 0 "register_operand")
+	(unspec:SVE_F
+	  [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
 	   (match_operand:SI 3 "aarch64_sve_gp_strictness")
-	   (match_operand:SVE_FULL_F 2 "register_operand")]
+	   (match_operand:SVE_F 2 "register_operand")]
 	  SVE_COND_FP_UNARY))]
   "TARGET_SVE"
   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
@@ -3697,13 +3819,13 @@
 
 ;; Predicated floating-point unary arithmetic, merging with the first input.
 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+	(unspec:SVE_F
 	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F
+	   (unspec:SVE_F
 	     [(match_operand 3)
 	      (const_int SVE_RELAXED_GP)
-	      (match_operand:SVE_FULL_F 2 "register_operand")]
+	      (match_operand:SVE_F 2 "register_operand")]
 	     SVE_COND_FP_UNARY)
 	   (match_dup 2)]
 	  UNSPEC_SEL))]
@@ -3745,15 +3867,15 @@
 ;; as earlyclobber helps to make the instruction more regular to the
 ;; register allocator.
 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+	(unspec:SVE_F
 	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F
+	   (unspec:SVE_F
 	     [(match_operand 4)
 	      (const_int SVE_RELAXED_GP)
-	      (match_operand:SVE_FULL_F 2 "register_operand")]
+	      (match_operand:SVE_F 2 "register_operand")]
 	     SVE_COND_FP_UNARY)
-	   (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
+	   (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")]
 	  UNSPEC_SEL))]
   "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
@@ -3841,6 +3963,7 @@
 ;; -------------------------------------------------------------------------
 ;; Includes:
 ;; - NOT
+;; - NOTS
 ;; -------------------------------------------------------------------------
 
 ;; Unpredicated predicate inverse.
@@ -3856,7 +3979,7 @@
 )
 
 ;; Predicated predicate inverse.
-(define_insn "*one_cmpl<mode>3"
+(define_insn "@aarch64_pred_one_cmpl<mode>_z"
   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
 	(and:PRED_ALL
 	  (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
@@ -3865,6 +3988,42 @@
   "not\t%0.b, %1/z, %2.b"
 )
 
+;; Predicated predicate inverse in which the flags are set in the same
+;; way as a PTEST.
+(define_insn "*one_cmpl<mode>3_cc"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
+	   (match_operand 3)
+	   (match_operand:SI 4 "aarch64_sve_ptrue_flag")
+	   (and:PRED_ALL
+	     (not:PRED_ALL
+	       (match_operand:PRED_ALL 2 "register_operand" "Upa"))
+	     (match_dup 3))]
+	  UNSPEC_PTEST))
+   (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
+	(and:PRED_ALL (not:PRED_ALL (match_dup 2)) (match_dup 3)))]
+  "TARGET_SVE"
+  "nots\t%0.b, %1/z, %2.b"
+)
+
+;; Same, where only the flags result is interesting.
+(define_insn "*one_cmpl<mode>3_ptest"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
+	   (match_operand 3)
+	   (match_operand:SI 4 "aarch64_sve_ptrue_flag")
+	   (and:PRED_ALL
+	     (not:PRED_ALL
+	       (match_operand:PRED_ALL 2 "register_operand" "Upa"))
+	     (match_dup 3))]
+	  UNSPEC_PTEST))
+   (clobber (match_scratch:PRED_ALL 0 "=Upa"))]
+  "TARGET_SVE"
+  "nots\t%0.b, %1/z, %2.b"
+)
+
 ;; =========================================================================
 ;; == Binary arithmetic
 ;; =========================================================================
@@ -3949,8 +4108,8 @@
 	     (match_operand:SVE_I_SIMD_DI 3 "aarch64_sve_<sve_imm_con>_operand"))]
 	  UNSPEC_PRED_X))]
   "TARGET_SVE"
-  {@ [ cons: =0 , 1   , 2  , 3             ; attrs: movprfx ]
-     [ w        , Upl , %0 , <sve_imm_con> ; *              ] #
+  {@ [ cons: =0 , 1   , %2 , 3             ; attrs: movprfx ]
+     [ w        , Upl , 0  , <sve_imm_con> ; *              ] #
      [ w        , Upl , 0  , w             ; *              ] <sve_int_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
      [ ?&w      , Upl , w  , <sve_imm_con> ; yes            ] #
      [ ?&w      , Upl , w  , w             ; yes            ] movprfx\t%Z0, %Z2\;<sve_int_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
@@ -4079,8 +4238,8 @@
 	  (match_operand:SVE_I 1 "register_operand")
 	  (match_operand:SVE_I 2 "aarch64_sve_add_operand")))]
   "TARGET_SVE"
-  {@ [ cons: =0 , 1  , 2   ; attrs: movprfx ]
-     [ w        , %0 , vsa ; *              ] add\t%0.<Vetype>, %0.<Vetype>, #%D2
+  {@ [ cons: =0 , %1 , 2   ; attrs: movprfx ]
+     [ w        , 0  , vsa ; *              ] add\t%0.<Vetype>, %0.<Vetype>, #%D2
      [ w        , 0  , vsn ; *              ] sub\t%0.<Vetype>, %0.<Vetype>, #%N2
      [ w        , 0  , vsi ; *              ] << aarch64_output_sve_vector_inc_dec ("%0.<Vetype>", operands[2]);
      [ ?w       , w  , vsa ; yes            ] movprfx\t%0, %1\;add\t%0.<Vetype>, %0.<Vetype>, #%D2
@@ -4182,80 +4341,57 @@
 (define_expand "@aarch64_adr<mode>_shift"
   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
 	(plus:SVE_FULL_SDI
-	  (unspec:SVE_FULL_SDI
-	    [(match_dup 4)
-	     (ashift:SVE_FULL_SDI
-	       (match_operand:SVE_FULL_SDI 2 "register_operand")
-	       (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))]
-	    UNSPEC_PRED_X)
+	  (ashift:SVE_FULL_SDI
+	    (match_operand:SVE_FULL_SDI 2 "register_operand")
+	    (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))
 	  (match_operand:SVE_FULL_SDI 1 "register_operand")))]
   "TARGET_SVE && TARGET_NON_STREAMING"
-  {
-    operands[4] = CONSTM1_RTX (<VPRED>mode);
-  }
 )
 
-(define_insn_and_rewrite "*aarch64_adr<mode>_shift"
+(define_insn "*aarch64_adr<mode>_shift"
   [(set (match_operand:SVE_24I 0 "register_operand" "=w")
 	(plus:SVE_24I
-	  (unspec:SVE_24I
-	    [(match_operand 4)
-	     (ashift:SVE_24I
-	       (match_operand:SVE_24I 2 "register_operand" "w")
-	       (match_operand:SVE_24I 3 "const_1_to_3_operand"))]
-	    UNSPEC_PRED_X)
+	  (ashift:SVE_24I
+	    (match_operand:SVE_24I 2 "register_operand" "w")
+	    (match_operand:SVE_24I 3 "const_1_to_3_operand"))
 	  (match_operand:SVE_24I 1 "register_operand" "w")))]
   "TARGET_SVE && TARGET_NON_STREAMING"
   "adr\t%0.<Vctype>, [%1.<Vctype>, %2.<Vctype>, lsl %3]"
-  "&& !CONSTANT_P (operands[4])"
-  {
-    operands[4] = CONSTM1_RTX (<VPRED>mode);
-  }
 )
 
 ;; Same, but with the index being sign-extended from the low 32 bits.
 (define_insn_and_rewrite "*aarch64_adr_shift_sxtw"
   [(set (match_operand:VNx2DI 0 "register_operand" "=w")
 	(plus:VNx2DI
-	  (unspec:VNx2DI
-	    [(match_operand 4)
-	     (ashift:VNx2DI
-	       (unspec:VNx2DI
-		 [(match_operand 5)
-		  (sign_extend:VNx2DI
-		    (truncate:VNx2SI
-		      (match_operand:VNx2DI 2 "register_operand" "w")))]
-		 UNSPEC_PRED_X)
-	       (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
-	    UNSPEC_PRED_X)
+	  (ashift:VNx2DI
+	    (unspec:VNx2DI
+	      [(match_operand 4)
+	       (sign_extend:VNx2DI
+		 (truncate:VNx2SI
+		   (match_operand:VNx2DI 2 "register_operand" "w")))]
+	     UNSPEC_PRED_X)
+	    (match_operand:VNx2DI 3 "const_1_to_3_operand"))
 	  (match_operand:VNx2DI 1 "register_operand" "w")))]
   "TARGET_SVE && TARGET_NON_STREAMING"
   "adr\t%0.d, [%1.d, %2.d, sxtw %3]"
-  "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
+  "&& !CONSTANT_P (operands[4])"
   {
-    operands[5] = operands[4] = CONSTM1_RTX (VNx2BImode);
+    operands[4] = CONSTM1_RTX (VNx2BImode);
   }
 )
 
 ;; Same, but with the index being zero-extended from the low 32 bits.
-(define_insn_and_rewrite "*aarch64_adr_shift_uxtw"
+(define_insn "*aarch64_adr_shift_uxtw"
   [(set (match_operand:VNx2DI 0 "register_operand" "=w")
 	(plus:VNx2DI
-	  (unspec:VNx2DI
-	    [(match_operand 5)
-	     (ashift:VNx2DI
-	       (and:VNx2DI
-		 (match_operand:VNx2DI 2 "register_operand" "w")
-		 (match_operand:VNx2DI 4 "aarch64_sve_uxtw_immediate"))
-	       (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
-	    UNSPEC_PRED_X)
+	  (ashift:VNx2DI
+	    (and:VNx2DI
+	      (match_operand:VNx2DI 2 "register_operand" "w")
+	      (match_operand:VNx2DI 4 "aarch64_sve_uxtw_immediate"))
+	    (match_operand:VNx2DI 3 "const_1_to_3_operand"))
 	  (match_operand:VNx2DI 1 "register_operand" "w")))]
   "TARGET_SVE && TARGET_NON_STREAMING"
   "adr\t%0.d, [%1.d, %2.d, uxtw %3]"
-  "&& !CONSTANT_P (operands[5])"
-  {
-    operands[5] = CONSTM1_RTX (VNx2BImode);
-  }
 )
 
 ;; -------------------------------------------------------------------------
@@ -4298,8 +4434,8 @@
 	       (match_dup 3))]
 	    UNSPEC_PRED_X)))]
   "TARGET_SVE"
-  {@ [ cons: =0 , 1   , 2  , 3 ; attrs: movprfx ]
-     [ w        , Upl , %0 , w ; *              ] <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+  {@ [ cons: =0 , 1   , %2 , 3 ; attrs: movprfx ]
+     [ w        , Upl , 0  , w ; *              ] <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
      [ ?&w      , Upl , w  , w ; yes            ] movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
   }
 )
@@ -4513,8 +4649,8 @@
 	     MUL_HIGHPART)]
 	  UNSPEC_PRED_X))]
   "TARGET_SVE"
-  {@ [ cons: =0 , 1   , 2  , 3 ; attrs: movprfx ]
-     [ w        , Upl , %0 , w ; *              ] <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+  {@ [ cons: =0 , 1   , %2 , 3 ; attrs: movprfx ]
+     [ w        , Upl , 0  , w ; *              ] <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
      [ ?&w      , Upl , w  , w ; yes            ] movprfx\t%0, %2\;<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
   }
 )
@@ -4568,8 +4704,8 @@
 	   (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")]
 	  UNSPEC_SEL))]
   "TARGET_SVE"
-  {@ [ cons: =0 , 1   , 2  , 3  ]
-     [ &w       , Upl , %0 , w  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+  {@ [ cons: =0 , 1   , %2 , 3  ]
+     [ &w       , Upl , 0  , w  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
      [ &w       , Upl , w  , w  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
   }
   [(set_attr "movprfx" "yes")])
@@ -4713,8 +4849,8 @@
 	  (match_operand:SVE_I 1 "register_operand")
 	  (match_operand:SVE_I 2 "aarch64_sve_logical_operand")))]
   "TARGET_SVE"
-  {@ [ cons: =0 , 1  , 2   ; attrs: movprfx ]
-     [ w        , %0 , vsl ; *              ] <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
+  {@ [ cons: =0 , %1 , 2   ; attrs: movprfx ]
+     [ w        , 0  , vsl ; *              ] <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
      [ ?w       , w  , vsl ; yes            ] movprfx\t%0, %1\;<logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
      [ w        , w  , w   ; *              ] <logical>\t%0.d, %1.d, %2.d
   }
@@ -4847,7 +4983,7 @@
     if (CONST_INT_P (operands[2]))
       {
 	amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
-	if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
+	if (!aarch64_sve_<lr>shift_operand (amount, <MODE>mode))
 	  amount = force_reg (<MODE>mode, amount);
       }
     else
@@ -4871,15 +5007,40 @@
 	  UNSPEC_PRED_X))]
   "TARGET_SVE"
   {
+    if (CONSTANT_P (operands[2]))
+      {
+	emit_insn (gen_aarch64_v<optab><mode>3_const (operands[0], operands[1],
+						      operands[2]));
+	DONE;
+      }
     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
   }
 )
 
-;; Shift by a vector, predicated with a PTRUE.  We don't actually need
-;; the predicate for the first alternative, but using Upa or X isn't
-;; likely to gain much and would make the instruction seem less uniform
-;; to the register allocator.
-(define_insn_and_split "@aarch64_pred_<optab><mode>"
+;; Shift by a vector, predicated with a PTRUE.
+(define_expand "@aarch64_pred_<optab><mode>"
+  [(set (match_operand:SVE_I 0 "register_operand")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand")
+	   (ASHIFT:SVE_I
+	     (match_operand:SVE_I 2 "register_operand")
+	     (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand"))]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE"
+  {
+    if (CONSTANT_P (operands[3]))
+      {
+	emit_insn (gen_aarch64_v<optab><mode>3_const (operands[0], operands[2],
+						      operands[3]));
+	DONE;
+      }
+  }
+)
+
+;; We don't actually need the predicate for the first alternative, but
+;; using Upa or X isn't likely to gain much and would make the instruction
+;; seem less uniform to the register allocator.
+(define_insn_and_split "*aarch64_pred_<optab><mode>"
   [(set (match_operand:SVE_I 0 "register_operand")
 	(unspec:SVE_I
 	  [(match_operand:<VPRED> 1 "register_operand")
@@ -4894,33 +5055,32 @@
      [ w        , Upl , w , 0     ; *              ] <shift>r\t%0.<Vetype>, %1/m, %3.<Vetype>, %2.<Vetype>
      [ ?&w      , Upl , w , w     ; yes            ] movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
   }
-  "&& reload_completed
-   && !register_operand (operands[3], <MODE>mode)"
+  "&& !register_operand (operands[3], <MODE>mode)"
   [(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))]
   ""
 )
 
-;; Unpredicated shift operations by a constant (post-RA only).
+;; Unpredicated shift operations by a constant.
 ;; These are generated by splitting a predicated instruction whose
 ;; predicate is unused.
-(define_insn "*post_ra_v_ashl<mode>3"
+(define_insn "aarch64_vashl<mode>3_const"
   [(set (match_operand:SVE_I 0 "register_operand")
 	(ashift:SVE_I
 	  (match_operand:SVE_I 1 "register_operand")
 	  (match_operand:SVE_I 2 "aarch64_simd_lshift_imm")))]
-  "TARGET_SVE && reload_completed"
+  "TARGET_SVE"
   {@ [ cons: =0 , 1 , 2   ]
      [ w	, w , vs1 ] add\t%0.<Vetype>, %1.<Vetype>, %1.<Vetype>
      [ w	, w , Dl  ] lsl\t%0.<Vetype>, %1.<Vetype>, #%2
   }
 )
 
-(define_insn "*post_ra_v_<optab><mode>3"
+(define_insn "aarch64_v<optab><mode>3_const"
   [(set (match_operand:SVE_I 0 "register_operand" "=w")
 	(SHIFTRT:SVE_I
 	  (match_operand:SVE_I 1 "register_operand" "w")
 	  (match_operand:SVE_I 2 "aarch64_simd_rshift_imm")))]
-  "TARGET_SVE && reload_completed"
+  "TARGET_SVE"
   "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
 )
 
@@ -5348,27 +5508,25 @@
 ;; Split a predicated instruction whose predicate is unused into an
 ;; unpredicated instruction.
 (define_split
-  [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
-	(unspec:SVE_FULL_F_BF
+  [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+	(unspec:SVE_F_B16B16
 	  [(match_operand:<VPRED> 1 "register_operand")
-	   (match_operand:SI 4 "aarch64_sve_gp_strictness")
-	   (match_operand:SVE_FULL_F_BF 2 "register_operand")
-	   (match_operand:SVE_FULL_F_BF 3 "register_operand")]
+	   (const_int SVE_RELAXED_GP)
+	   (match_operand:SVE_F_B16B16 2 "register_operand")
+	   (match_operand:SVE_F_B16B16 3 "register_operand")]
 	  <SVE_COND_FP>))]
-  "TARGET_SVE
-   && reload_completed
-   && INTVAL (operands[4]) == SVE_RELAXED_GP"
+  "TARGET_SVE && reload_completed"
   [(set (match_dup 0)
-	(SVE_UNPRED_FP_BINARY:SVE_FULL_F_BF (match_dup 2) (match_dup 3)))]
+	(SVE_UNPRED_FP_BINARY:SVE_F_B16B16 (match_dup 2) (match_dup 3)))]
 )
 
 ;; Unpredicated floating-point binary operations (post-RA only).
 ;; These are generated by the split above.
 (define_insn "*post_ra_<sve_fp_op><mode>3"
-  [(set (match_operand:SVE_FULL_F_BF 0 "register_operand" "=w")
-	(SVE_UNPRED_FP_BINARY:SVE_FULL_F_BF
-	  (match_operand:SVE_FULL_F_BF 1 "register_operand" "w")
-	  (match_operand:SVE_FULL_F_BF 2 "register_operand" "w")))]
+  [(set (match_operand:SVE_F_B16B16 0 "register_operand" "=w")
+	(SVE_UNPRED_FP_BINARY:SVE_F_B16B16
+	  (match_operand:SVE_F_B16B16 1 "register_operand" "w")
+	  (match_operand:SVE_F_B16B16 2 "register_operand" "w")))]
   "TARGET_SVE && reload_completed"
   "<b><sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>")
 
@@ -5400,10 +5558,10 @@
 
 ;; Unpredicated floating-point binary operations.
 (define_insn "@aarch64_sve_<optab><mode>"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
-	(unspec:SVE_FULL_F
-	  [(match_operand:SVE_FULL_F 1 "register_operand" "w")
-	   (match_operand:SVE_FULL_F 2 "register_operand" "w")]
+  [(set (match_operand:SVE_F 0 "register_operand" "=w")
+	(unspec:SVE_F
+	  [(match_operand:SVE_F 1 "register_operand" "w")
+	   (match_operand:SVE_F 2 "register_operand" "w")]
 	  SVE_FP_BINARY))]
   "TARGET_SVE"
   "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
@@ -5412,27 +5570,27 @@
 ;; Unpredicated floating-point binary operations that need to be predicated
 ;; for SVE.
 (define_expand "<optab><mode>3"
-  [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
-	(unspec:SVE_FULL_F_BF
+  [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+	(unspec:SVE_F_B16B16
 	  [(match_dup 3)
-	   (const_int SVE_RELAXED_GP)
-	   (match_operand:SVE_FULL_F_BF 1 "<sve_pred_fp_rhs1_operand>")
-	   (match_operand:SVE_FULL_F_BF 2 "<sve_pred_fp_rhs2_operand>")]
+	   (match_dup 4)
+	   (match_operand:SVE_F_B16B16 1 "<sve_pred_fp_rhs1_operand>")
+	   (match_operand:SVE_F_B16B16 2 "<sve_pred_fp_rhs2_operand>")]
 	  SVE_COND_FP_BINARY_OPTAB))]
   "TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
   {
-    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+    operands[3] = aarch64_sve_fp_pred (<MODE>mode, &operands[4]);
   }
 )
 
 ;; Predicated floating-point binary operations that have no immediate forms.
 (define_insn "@aarch64_pred_<optab><mode>"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
-	  [(match_operand:<VPRED> 1 "register_operand")
+  [(set (match_operand:SVE_F 0 "register_operand")
+	(unspec:SVE_F
+	  [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
 	   (match_operand:SI 4 "aarch64_sve_gp_strictness")
-	   (match_operand:SVE_FULL_F 2 "register_operand")
-	   (match_operand:SVE_FULL_F 3 "register_operand")]
+	   (match_operand:SVE_F 2 "register_operand")
+	   (match_operand:SVE_F 3 "register_operand")]
 	  SVE_COND_FP_BINARY_REG))]
   "TARGET_SVE"
   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
@@ -5444,30 +5602,33 @@
 
 ;; Predicated floating-point operations with merging.
 (define_expand "@cond_<optab><mode>"
-  [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
-	(unspec:SVE_FULL_F_BF
+  [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+	(unspec:SVE_F_B16B16
 	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F_BF
+	   (unspec:SVE_F_B16B16
 	     [(match_dup 1)
 	      (const_int SVE_STRICT_GP)
-	      (match_operand:SVE_FULL_F_BF 2 "<sve_pred_fp_rhs1_operand>")
-	      (match_operand:SVE_FULL_F_BF 3 "<sve_pred_fp_rhs2_operand>")]
+	      (match_operand:SVE_F_B16B16 2 "<sve_pred_fp_rhs1_operand>")
+	      (match_operand:SVE_F_B16B16 3 "<sve_pred_fp_rhs2_operand>")]
 	     SVE_COND_FP_BINARY)
-	   (match_operand:SVE_FULL_F_BF 4 "aarch64_simd_reg_or_zero")]
+	   (match_operand:SVE_F_B16B16 4 "aarch64_simd_reg_or_zero")]
 	  UNSPEC_SEL))]
   "TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
+  {
+    operands[1] = aarch64_sve_emit_masked_fp_pred (<MODE>mode, operands[1]);
+  }
 )
 
 ;; Predicated floating-point operations, merging with the first input.
 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
-  [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
-	(unspec:SVE_FULL_F_BF
+  [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+	(unspec:SVE_F_B16B16
 	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F_BF
+	   (unspec:SVE_F_B16B16
 	     [(match_operand 4)
 	      (const_int SVE_RELAXED_GP)
-	      (match_operand:SVE_FULL_F_BF 2 "register_operand")
-	      (match_operand:SVE_FULL_F_BF 3 "register_operand")]
+	      (match_operand:SVE_F_B16B16 2 "register_operand")
+	      (match_operand:SVE_F_B16B16 3 "register_operand")]
 	     SVE_COND_FP_BINARY)
 	   (match_dup 2)]
 	  UNSPEC_SEL))]
@@ -5483,14 +5644,14 @@
 )
 
 (define_insn "*cond_<optab><mode>_2_strict"
-  [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
-	(unspec:SVE_FULL_F_BF
-	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F_BF
+  [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+	(unspec:SVE_F_B16B16
+	  [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+	   (unspec:SVE_F_B16B16
 	     [(match_dup 1)
 	      (const_int SVE_STRICT_GP)
-	      (match_operand:SVE_FULL_F_BF 2 "register_operand")
-	      (match_operand:SVE_FULL_F_BF 3 "register_operand")]
+	      (match_operand:SVE_F_B16B16 2 "register_operand")
+	      (match_operand:SVE_F_B16B16 3 "register_operand")]
 	     SVE_COND_FP_BINARY)
 	   (match_dup 2)]
 	  UNSPEC_SEL))]
@@ -5503,14 +5664,14 @@
 
 ;; Same for operations that take a 1-bit constant.
 (define_insn_and_rewrite "*cond_<optab><mode>_2_const_relaxed"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+	(unspec:SVE_F
 	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F
+	   (unspec:SVE_F
 	     [(match_operand 4)
 	      (const_int SVE_RELAXED_GP)
-	      (match_operand:SVE_FULL_F 2 "register_operand")
-	      (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
+	      (match_operand:SVE_F 2 "register_operand")
+	      (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")]
 	     SVE_COND_FP_BINARY_I1)
 	   (match_dup 2)]
 	  UNSPEC_SEL))]
@@ -5526,14 +5687,14 @@
 )
 
 (define_insn "*cond_<optab><mode>_2_const_strict"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
-	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+	(unspec:SVE_F
+	  [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+	   (unspec:SVE_F
 	     [(match_dup 1)
 	      (const_int SVE_STRICT_GP)
-	      (match_operand:SVE_FULL_F 2 "register_operand")
-	      (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
+	      (match_operand:SVE_F 2 "register_operand")
+	      (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")]
 	     SVE_COND_FP_BINARY_I1)
 	   (match_dup 2)]
 	  UNSPEC_SEL))]
@@ -5546,14 +5707,14 @@
 
 ;; Predicated floating-point operations, merging with the second input.
 (define_insn_and_rewrite "*cond_<optab><mode>_3_relaxed"
-  [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
-	(unspec:SVE_FULL_F_BF
+  [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+	(unspec:SVE_F_B16B16
 	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F_BF
+	   (unspec:SVE_F_B16B16
 	     [(match_operand 4)
 	      (const_int SVE_RELAXED_GP)
-	      (match_operand:SVE_FULL_F_BF 2 "register_operand")
-	      (match_operand:SVE_FULL_F_BF 3 "register_operand")]
+	      (match_operand:SVE_F_B16B16 2 "register_operand")
+	      (match_operand:SVE_F_B16B16 3 "register_operand")]
 	     SVE_COND_FP_BINARY)
 	   (match_dup 3)]
 	  UNSPEC_SEL))]
@@ -5569,14 +5730,14 @@
 )
 
 (define_insn "*cond_<optab><mode>_3_strict"
-  [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
-	(unspec:SVE_FULL_F_BF
-	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F_BF
+  [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+	(unspec:SVE_F_B16B16
+	  [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+	   (unspec:SVE_F_B16B16
 	     [(match_dup 1)
 	      (const_int SVE_STRICT_GP)
-	      (match_operand:SVE_FULL_F_BF 2 "register_operand")
-	      (match_operand:SVE_FULL_F_BF 3 "register_operand")]
+	      (match_operand:SVE_F_B16B16 2 "register_operand")
+	      (match_operand:SVE_F_B16B16 3 "register_operand")]
 	     SVE_COND_FP_BINARY)
 	   (match_dup 3)]
 	  UNSPEC_SEL))]
@@ -5589,16 +5750,16 @@
 
 ;; Predicated floating-point operations, merging with an independent value.
 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
-  [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
-	(unspec:SVE_FULL_F_BF
+  [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+	(unspec:SVE_F_B16B16
 	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F_BF
+	   (unspec:SVE_F_B16B16
 	     [(match_operand 5)
 	      (const_int SVE_RELAXED_GP)
-	      (match_operand:SVE_FULL_F_BF 2 "register_operand")
-	      (match_operand:SVE_FULL_F_BF 3 "register_operand")]
+	      (match_operand:SVE_F_B16B16 2 "register_operand")
+	      (match_operand:SVE_F_B16B16 3 "register_operand")]
 	     SVE_COND_FP_BINARY)
-	   (match_operand:SVE_FULL_F_BF 4 "aarch64_simd_reg_or_zero")]
+	   (match_operand:SVE_F_B16B16 4 "aarch64_simd_reg_or_zero")]
 	  UNSPEC_SEL))]
   "TARGET_SVE
    && (<supports_bf16> || !<is_bf16>)
@@ -5633,16 +5794,16 @@
 )
 
 (define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
-  [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
-	(unspec:SVE_FULL_F_BF
-	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F_BF
+  [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+	(unspec:SVE_F_B16B16
+	  [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+	   (unspec:SVE_F_B16B16
 	     [(match_dup 1)
 	      (const_int SVE_STRICT_GP)
-	      (match_operand:SVE_FULL_F_BF 2 "register_operand")
-	      (match_operand:SVE_FULL_F_BF 3 "register_operand")]
+	      (match_operand:SVE_F_B16B16 2 "register_operand")
+	      (match_operand:SVE_F_B16B16 3 "register_operand")]
 	     SVE_COND_FP_BINARY)
-	   (match_operand:SVE_FULL_F_BF 4 "aarch64_simd_reg_or_zero")]
+	   (match_operand:SVE_F_B16B16 4 "aarch64_simd_reg_or_zero")]
 	  UNSPEC_SEL))]
   "TARGET_SVE
    && (<supports_bf16> || !<is_bf16>)
@@ -5671,16 +5832,16 @@
 
 ;; Same for operations that take a 1-bit constant.
 (define_insn_and_rewrite "*cond_<optab><mode>_any_const_relaxed"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+	(unspec:SVE_F
 	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F
+	   (unspec:SVE_F
 	     [(match_operand 5)
 	      (const_int SVE_RELAXED_GP)
-	      (match_operand:SVE_FULL_F 2 "register_operand")
-	      (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
+	      (match_operand:SVE_F 2 "register_operand")
+	      (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")]
 	     SVE_COND_FP_BINARY_I1)
-	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+	   (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
 	  UNSPEC_SEL))]
   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
   {@ [ cons: =0 , 1   , 2 , 4   ]
@@ -5707,16 +5868,16 @@
 )
 
 (define_insn_and_rewrite "*cond_<optab><mode>_any_const_strict"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
-	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+	(unspec:SVE_F
+	  [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+	   (unspec:SVE_F
 	     [(match_dup 1)
 	      (const_int SVE_STRICT_GP)
-	      (match_operand:SVE_FULL_F 2 "register_operand")
-	      (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
+	      (match_operand:SVE_F 2 "register_operand")
+	      (match_operand:SVE_F 3 "<sve_pred_fp_rhs2_immediate>")]
 	     SVE_COND_FP_BINARY_I1)
-	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+	   (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
 	  UNSPEC_SEL))]
   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
   {@ [ cons: =0 , 1   , 2 , 4   ]
@@ -5745,16 +5906,16 @@
 
 ;; Predicated floating-point addition.
 (define_insn "@aarch64_pred_<optab><mode>"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
-	  [(match_operand:<VPRED> 1 "register_operand")
+  [(set (match_operand:SVE_F 0 "register_operand")
+	(unspec:SVE_F
+	  [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
 	   (match_operand:SI 4 "aarch64_sve_gp_strictness")
-	   (match_operand:SVE_FULL_F 2 "register_operand")
-	   (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_operand")]
+	   (match_operand:SVE_F 2 "register_operand")
+	   (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand")]
 	  SVE_COND_FP_ADD))]
   "TARGET_SVE"
-  {@ [ cons: =0 , 1   , 2  , 3   , 4   ; attrs: movprfx ]
-     [ w        , Upl , %0 , vsA , i   ; *              ] fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+  {@ [ cons: =0 , 1   , %2 , 3   , 4   ; attrs: movprfx ]
+     [ w        , Upl , 0  , vsA , i   ; *              ] fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
      [ w        , Upl , 0  , vsN , i   ; *              ] fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
      [ w        , Upl , w  , w   , Z   ; *              ] #
      [ w        , Upl , 0  , w   , Ui1 ; *              ] fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
@@ -5767,14 +5928,14 @@
 ;; Predicated floating-point addition of a constant, merging with the
 ;; first input.
 (define_insn_and_rewrite "*cond_add<mode>_2_const_relaxed"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+	(unspec:SVE_F
 	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F
+	   (unspec:SVE_F
 	     [(match_operand 4)
 	      (const_int SVE_RELAXED_GP)
-	      (match_operand:SVE_FULL_F 2 "register_operand")
-	      (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
+	      (match_operand:SVE_F 2 "register_operand")
+	      (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
 	     UNSPEC_COND_FADD)
 	   (match_dup 2)]
 	  UNSPEC_SEL))]
@@ -5792,14 +5953,14 @@
 )
 
 (define_insn "*cond_add<mode>_2_const_strict"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
-	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+	(unspec:SVE_F
+	  [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+	   (unspec:SVE_F
 	     [(match_dup 1)
 	      (const_int SVE_STRICT_GP)
-	      (match_operand:SVE_FULL_F 2 "register_operand")
-	      (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
+	      (match_operand:SVE_F 2 "register_operand")
+	      (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
 	     UNSPEC_COND_FADD)
 	   (match_dup 2)]
 	  UNSPEC_SEL))]
@@ -5815,16 +5976,16 @@
 ;; Predicated floating-point addition of a constant, merging with an
 ;; independent value.
 (define_insn_and_rewrite "*cond_add<mode>_any_const_relaxed"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+	(unspec:SVE_F
 	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F
+	   (unspec:SVE_F
 	     [(match_operand 5)
 	      (const_int SVE_RELAXED_GP)
-	      (match_operand:SVE_FULL_F 2 "register_operand")
-	      (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
+	      (match_operand:SVE_F 2 "register_operand")
+	      (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
 	     UNSPEC_COND_FADD)
-	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+	   (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
 	  UNSPEC_SEL))]
   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
   {@ [ cons: =0 , 1   , 2 , 3   , 4   ]
@@ -5854,16 +6015,16 @@
 )
 
 (define_insn_and_rewrite "*cond_add<mode>_any_const_strict"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
-	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+	(unspec:SVE_F
+	  [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+	   (unspec:SVE_F
 	     [(match_dup 1)
 	      (const_int SVE_STRICT_GP)
-	      (match_operand:SVE_FULL_F 2 "register_operand")
-	      (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
+	      (match_operand:SVE_F 2 "register_operand")
+	      (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
 	     UNSPEC_COND_FADD)
-	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+	   (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
 	  UNSPEC_SEL))]
   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
   {@ [ cons: =0 , 1   , 2 , 3   , 4   ]
@@ -6061,12 +6222,12 @@
 
 ;; Predicated floating-point subtraction.
 (define_insn "@aarch64_pred_<optab><mode>"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
-	  [(match_operand:<VPRED> 1 "register_operand")
+  [(set (match_operand:SVE_F 0 "register_operand")
+	(unspec:SVE_F
+	  [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
 	   (match_operand:SI 4 "aarch64_sve_gp_strictness")
-	   (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_operand")
-	   (match_operand:SVE_FULL_F 3 "register_operand")]
+	   (match_operand:SVE_F 2 "aarch64_sve_float_arith_operand")
+	   (match_operand:SVE_F 3 "register_operand")]
 	  SVE_COND_FP_SUB))]
   "TARGET_SVE"
   {@ [ cons: =0 , 1   , 2   , 3 , 4   ; attrs: movprfx ]
@@ -6082,14 +6243,14 @@
 ;; Predicated floating-point subtraction from a constant, merging with the
 ;; second input.
 (define_insn_and_rewrite "*cond_sub<mode>_3_const_relaxed"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+	(unspec:SVE_F
 	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F
+	   (unspec:SVE_F
 	     [(match_operand 4)
 	      (const_int SVE_RELAXED_GP)
-	      (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
-	      (match_operand:SVE_FULL_F 3 "register_operand")]
+	      (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate")
+	      (match_operand:SVE_F 3 "register_operand")]
 	     UNSPEC_COND_FSUB)
 	   (match_dup 3)]
 	  UNSPEC_SEL))]
@@ -6105,14 +6266,14 @@
 )
 
 (define_insn "*cond_sub<mode>_3_const_strict"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
-	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+	(unspec:SVE_F
+	  [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+	   (unspec:SVE_F
 	     [(match_dup 1)
 	      (const_int SVE_STRICT_GP)
-	      (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
-	      (match_operand:SVE_FULL_F 3 "register_operand")]
+	      (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate")
+	      (match_operand:SVE_F 3 "register_operand")]
 	     UNSPEC_COND_FSUB)
 	   (match_dup 3)]
 	  UNSPEC_SEL))]
@@ -6126,16 +6287,16 @@
 ;; Predicated floating-point subtraction from a constant, merging with an
 ;; independent value.
 (define_insn_and_rewrite "*cond_sub<mode>_const_relaxed"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+	(unspec:SVE_F
 	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F
+	   (unspec:SVE_F
 	     [(match_operand 5)
 	      (const_int SVE_RELAXED_GP)
-	      (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
-	      (match_operand:SVE_FULL_F 3 "register_operand")]
+	      (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate")
+	      (match_operand:SVE_F 3 "register_operand")]
 	     UNSPEC_COND_FSUB)
-	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+	   (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
 	  UNSPEC_SEL))]
   "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])"
   {@ [ cons: =0 , 1   , 3 , 4   ]
@@ -6162,16 +6323,16 @@
 )
 
 (define_insn_and_rewrite "*cond_sub<mode>_const_strict"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
-	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+	(unspec:SVE_F
+	  [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+	   (unspec:SVE_F
 	     [(match_dup 1)
 	      (const_int SVE_STRICT_GP)
-	      (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
-	      (match_operand:SVE_FULL_F 3 "register_operand")]
+	      (match_operand:SVE_F 2 "aarch64_sve_float_arith_immediate")
+	      (match_operand:SVE_F 3 "register_operand")]
 	     UNSPEC_COND_FSUB)
-	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+	   (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
 	  UNSPEC_SEL))]
   "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])"
   {@ [ cons: =0 , 1   , 3 , 4   ]
@@ -6228,8 +6389,8 @@
 	     UNSPEC_COND_FSUB)]
 	  UNSPEC_COND_FABS))]
   "TARGET_SVE"
-  {@ [ cons: =0 , 1   , 2  , 3 ; attrs: movprfx ]
-     [ w        , Upl , %0 , w ; *              ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+  {@ [ cons: =0 , 1   , %2 , 3 ; attrs: movprfx ]
+     [ w        , Upl , 0  , w ; *              ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
      [ ?&w      , Upl , w  , w ; yes            ] movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
   }
   "&& !rtx_equal_p (operands[1], operands[5])"
@@ -6251,8 +6412,8 @@
 	     UNSPEC_COND_FSUB)]
 	  UNSPEC_COND_FABS))]
   "TARGET_SVE"
-  {@ [ cons: =0 , 1   , 2  , 3 ; attrs: movprfx ]
-     [ w        , Upl , %0 , w ; *              ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+  {@ [ cons: =0 , 1   , %2 , 3 ; attrs: movprfx ]
+     [ w        , Upl , 0  , w ; *              ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
      [ ?&w      , Upl , w  , w ; yes            ] movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
   }
 )
@@ -6484,16 +6645,16 @@
 
 ;; Predicated floating-point multiplication.
 (define_insn "@aarch64_pred_<optab><mode>"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
-	  [(match_operand:<VPRED> 1 "register_operand")
+  [(set (match_operand:SVE_F 0 "register_operand")
+	(unspec:SVE_F
+	  [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
 	   (match_operand:SI 4 "aarch64_sve_gp_strictness")
-	   (match_operand:SVE_FULL_F 2 "register_operand")
-	   (match_operand:SVE_FULL_F 3 "aarch64_sve_float_mul_operand")]
+	   (match_operand:SVE_F 2 "register_operand")
+	   (match_operand:SVE_F 3 "aarch64_sve_float_mul_operand")]
 	  SVE_COND_FP_MUL))]
   "TARGET_SVE"
-  {@ [ cons: =0 , 1   , 2  , 3   , 4   ; attrs: movprfx ]
-     [ w        , Upl , %0 , vsM , i   ; *              ] fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+  {@ [ cons: =0 , 1   , %2 , 3   , 4   ; attrs: movprfx ]
+     [ w        , Upl , 0  , vsM , i   ; *              ] fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
      [ w        , Upl , w  , w   , Z   ; *              ] #
      [ w        , Upl , 0  , w   , Ui1 ; *              ] fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
      [ ?&w      , Upl , w  , vsM , i   ; yes            ] movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
@@ -6506,13 +6667,13 @@
 
 ;; Unpredicated multiplication by selected lanes.
 (define_insn "@aarch64_mul_lane_<mode>"
-  [(set (match_operand:SVE_FULL_F_BF 0 "register_operand" "=w")
-	(mult:SVE_FULL_F_BF
-	  (unspec:SVE_FULL_F_BF
-	    [(match_operand:SVE_FULL_F_BF 2 "register_operand" "<sve_lane_con>")
+  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand" "=w")
+	(mult:SVE_FULL_F_B16B16
+	  (unspec:SVE_FULL_F_B16B16
+	    [(match_operand:SVE_FULL_F_B16B16 2 "register_operand" "<sve_lane_con>")
 	     (match_operand:SI 3 "const_int_operand")]
 	    UNSPEC_SVE_LANE_SELECT)
-	  (match_operand:SVE_FULL_F_BF 1 "register_operand" "w")))]
+	  (match_operand:SVE_FULL_F_B16B16 1 "register_operand" "w")))]
   "TARGET_SVE"
   "<b>fmul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
 )
@@ -6524,12 +6685,12 @@
 ;; -------------------------------------------------------------------------
 
 (define_expand "div<mode>3"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+	(unspec:SVE_F
 	  [(match_dup 3)
-	   (const_int SVE_RELAXED_GP)
-	   (match_operand:SVE_FULL_F 1 "nonmemory_operand")
-	   (match_operand:SVE_FULL_F 2 "register_operand")]
+	   (match_dup 4)
+	   (match_operand:SVE_F 1 "nonmemory_operand")
+	   (match_operand:SVE_F 2 "register_operand")]
 	  UNSPEC_COND_FDIV))]
   "TARGET_SVE"
   {
@@ -6537,23 +6698,23 @@
       DONE;
 
     operands[1] = force_reg (<MODE>mode, operands[1]);
-    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+    operands[3] = aarch64_sve_fp_pred (<MODE>mode, &operands[4]);
   }
 )
 
 (define_expand "@aarch64_frecpe<mode>"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
-	  [(match_operand:SVE_FULL_F 1 "register_operand")]
+  [(set (match_operand:SVE_F 0 "register_operand")
+	(unspec:SVE_F
+	  [(match_operand:SVE_F 1 "register_operand")]
 	  UNSPEC_FRECPE))]
   "TARGET_SVE"
 )
 
 (define_expand "@aarch64_frecps<mode>"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
-	  [(match_operand:SVE_FULL_F 1 "register_operand")
-	   (match_operand:SVE_FULL_F 2 "register_operand")]
+  [(set (match_operand:SVE_F 0 "register_operand")
+	(unspec:SVE_F
+	  [(match_operand:SVE_F 1 "register_operand")
+	   (match_operand:SVE_F 2 "register_operand")]
 	  UNSPEC_FRECPS))]
   "TARGET_SVE"
 )
@@ -6571,10 +6732,10 @@
 ;; by providing this, but we need to use UNSPECs since rtx logical ops
 ;; aren't defined for floating-point modes.
 (define_insn "*<optab><mode>3"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w")
-	(unspec:SVE_F
-	  [(match_operand:SVE_F 1 "register_operand" "w")
-	   (match_operand:SVE_F 2 "register_operand" "w")]
+  [(set (match_operand:SVE_F_BF 0 "register_operand" "=w")
+	(unspec:SVE_F_BF
+	  [(match_operand:SVE_F_BF 1 "register_operand" "w")
+	   (match_operand:SVE_F_BF 2 "register_operand" "w")]
 	  LOGICALF))]
   "TARGET_SVE"
   "<logicalf_op>\t%0.d, %1.d, %2.d"
@@ -6718,16 +6879,16 @@
 
 ;; Predicated floating-point maximum/minimum.
 (define_insn "@aarch64_pred_<optab><mode>"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
-	  [(match_operand:<VPRED> 1 "register_operand")
+  [(set (match_operand:SVE_F 0 "register_operand")
+	(unspec:SVE_F
+	  [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
 	   (match_operand:SI 4 "aarch64_sve_gp_strictness")
-	   (match_operand:SVE_FULL_F 2 "register_operand")
-	   (match_operand:SVE_FULL_F 3 "aarch64_sve_float_maxmin_operand")]
+	   (match_operand:SVE_F 2 "register_operand")
+	   (match_operand:SVE_F 3 "aarch64_sve_float_maxmin_operand")]
 	  SVE_COND_FP_MAXMIN))]
   "TARGET_SVE"
-  {@ [ cons: =0 , 1   , 2  , 3   ; attrs: movprfx ]
-     [ w        , Upl , %0 , vsB ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+  {@ [ cons: =0 , 1   , %2 , 3   ; attrs: movprfx ]
+     [ w        , Upl , 0  , vsB ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
      [ w        , Upl , 0  , w   ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
      [ ?&w      , Upl , w  , vsB ; yes            ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
      [ ?&w      , Upl , w  , w   ; yes            ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
@@ -6752,7 +6913,7 @@
 ;; Predicate AND.  We can reuse one of the inputs as the GP.
 ;; Doubling the second operand is the preferred implementation
 ;; of the MOV alias, so we use that instead of %1/z, %1, %2.
-(define_insn "and<mode>3"
+(define_insn "@and<mode>3"
   [(set (match_operand:PRED_ALL 0 "register_operand")
 	(and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand")
 		      (match_operand:PRED_ALL 2 "register_operand")))]
@@ -7035,8 +7196,8 @@
 	    UNSPEC_PRED_X)
 	  (match_operand:SVE_I 4 "register_operand")))]
   "TARGET_SVE"
-  {@ [ cons: =0 , 1   , 2  , 3 , 4 ; attrs: movprfx ]
-     [ w        , Upl , %0 , w , w ; *              ] mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+  {@ [ cons: =0 , 1   , %2 , 3 , 4 ; attrs: movprfx ]
+     [ w        , Upl , 0  , w , w ; *              ] mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
      [ w        , Upl , w  , w , 0 ; *              ] mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
      [ ?&w      , Upl , w  , w , w ; yes            ] movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
   }
@@ -7177,8 +7338,8 @@
 	       (match_operand:SVE_I 3 "register_operand"))]
 	    UNSPEC_PRED_X)))]
   "TARGET_SVE"
-  {@ [ cons: =0 , 1   , 2  , 3 , 4 ; attrs: movprfx ]
-     [ w        , Upl , %0 , w , w ; *              ] msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+  {@ [ cons: =0 , 1   , %2 , 3 , 4 ; attrs: movprfx ]
+     [ w        , Upl , 0  , w , w ; *              ] msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
      [ w        , Upl , w  , w , 0 ; *              ] mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
      [ ?&w      , Upl , w  , w , w ; yes            ] movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
   }
@@ -7434,33 +7595,33 @@
 
 ;; Unpredicated floating-point ternary operations.
 (define_expand "<optab><mode>4"
-  [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
-	(unspec:SVE_FULL_F_BF
+  [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+	(unspec:SVE_F_B16B16
 	  [(match_dup 4)
-	   (const_int SVE_RELAXED_GP)
-	   (match_operand:SVE_FULL_F_BF 1 "register_operand")
-	   (match_operand:SVE_FULL_F_BF 2 "register_operand")
-	   (match_operand:SVE_FULL_F_BF 3 "register_operand")]
+	   (match_dup 5)
+	   (match_operand:SVE_F_B16B16 1 "register_operand")
+	   (match_operand:SVE_F_B16B16 2 "register_operand")
+	   (match_operand:SVE_F_B16B16 3 "register_operand")]
 	  SVE_COND_FP_TERNARY))]
   "TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
   {
-    operands[4] = aarch64_ptrue_reg (<VPRED>mode);
+    operands[4] = aarch64_sve_fp_pred (<MODE>mode, &operands[5]);
   }
 )
 
 ;; Predicated floating-point ternary operations.
 (define_insn "@aarch64_pred_<optab><mode>"
-  [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
-	(unspec:SVE_FULL_F_BF
-	  [(match_operand:<VPRED> 1 "register_operand")
+  [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+	(unspec:SVE_F_B16B16
+	  [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
 	   (match_operand:SI 5 "aarch64_sve_gp_strictness")
-	   (match_operand:SVE_FULL_F_BF 2 "register_operand")
-	   (match_operand:SVE_FULL_F_BF 3 "register_operand")
-	   (match_operand:SVE_FULL_F_BF 4 "register_operand")]
+	   (match_operand:SVE_F_B16B16 2 "register_operand")
+	   (match_operand:SVE_F_B16B16 3 "register_operand")
+	   (match_operand:SVE_F_B16B16 4 "register_operand")]
 	  SVE_COND_FP_TERNARY))]
   "TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
-  {@ [ cons: =0 , 1   , 2  , 3 , 4 ; attrs: movprfx , is_rev ]
-     [ w        , Upl , %w , w , 0 ; *   , *    ] <b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+  {@ [ cons: =0 , 1   , %2  , 3 , 4 ; attrs: movprfx , is_rev ]
+     [ w        , Upl , w , w , 0 ; *   , *    ] <b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
      [ w        , Upl , 0  , w , w ; *   , true ] <b><sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
      [ ?&w      , Upl , w  , w , w ; yes , *    ] movprfx\t%0, %4\;<b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
   }
@@ -7470,17 +7631,17 @@
 
 ;; Predicated floating-point ternary operations with merging.
 (define_expand "@cond_<optab><mode>"
-  [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
-	(unspec:SVE_FULL_F_BF
+  [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+	(unspec:SVE_F_B16B16
 	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F_BF
+	   (unspec:SVE_F_B16B16
 	     [(match_dup 1)
 	      (const_int SVE_STRICT_GP)
-	      (match_operand:SVE_FULL_F_BF 2 "register_operand")
-	      (match_operand:SVE_FULL_F_BF 3 "register_operand")
-	      (match_operand:SVE_FULL_F_BF 4 "register_operand")]
+	      (match_operand:SVE_F_B16B16 2 "register_operand")
+	      (match_operand:SVE_F_B16B16 3 "register_operand")
+	      (match_operand:SVE_F_B16B16 4 "register_operand")]
 	     SVE_COND_FP_TERNARY)
-	   (match_operand:SVE_FULL_F_BF 5 "aarch64_simd_reg_or_zero")]
+	   (match_operand:SVE_F_B16B16 5 "aarch64_simd_reg_or_zero")]
 	  UNSPEC_SEL))]
   "TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
 {
@@ -7488,20 +7649,22 @@
      second of the two.  */
   if (rtx_equal_p (operands[3], operands[5]))
     std::swap (operands[2], operands[3]);
+
+  operands[1] = aarch64_sve_emit_masked_fp_pred (<MODE>mode, operands[1]);
 })
 
 ;; Predicated floating-point ternary operations, merging with the
 ;; first input.
 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+	(unspec:SVE_F
 	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F
+	   (unspec:SVE_F
 	     [(match_operand 5)
 	      (const_int SVE_RELAXED_GP)
-	      (match_operand:SVE_FULL_F 2 "register_operand")
-	      (match_operand:SVE_FULL_F 3 "register_operand")
-	      (match_operand:SVE_FULL_F 4 "register_operand")]
+	      (match_operand:SVE_F 2 "register_operand")
+	      (match_operand:SVE_F 3 "register_operand")
+	      (match_operand:SVE_F 4 "register_operand")]
 	     SVE_COND_FP_TERNARY)
 	   (match_dup 2)]
 	  UNSPEC_SEL))]
@@ -7517,15 +7680,15 @@
 )
 
 (define_insn "*cond_<optab><mode>_2_strict"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
-	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F
+  [(set (match_operand:SVE_F 0 "register_operand")
+	(unspec:SVE_F
+	  [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+	   (unspec:SVE_F
 	     [(match_dup 1)
 	      (const_int SVE_STRICT_GP)
-	      (match_operand:SVE_FULL_F 2 "register_operand")
-	      (match_operand:SVE_FULL_F 3 "register_operand")
-	      (match_operand:SVE_FULL_F 4 "register_operand")]
+	      (match_operand:SVE_F 2 "register_operand")
+	      (match_operand:SVE_F 3 "register_operand")
+	      (match_operand:SVE_F 4 "register_operand")]
 	     SVE_COND_FP_TERNARY)
 	   (match_dup 2)]
 	  UNSPEC_SEL))]
@@ -7539,15 +7702,15 @@
 ;; Predicated floating-point ternary operations, merging with the
 ;; third input.
 (define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed"
-  [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
-	(unspec:SVE_FULL_F_BF
+  [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+	(unspec:SVE_F_B16B16
 	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F_BF
+	   (unspec:SVE_F_B16B16
 	     [(match_operand 5)
 	      (const_int SVE_RELAXED_GP)
-	      (match_operand:SVE_FULL_F_BF 2 "register_operand")
-	      (match_operand:SVE_FULL_F_BF 3 "register_operand")
-	      (match_operand:SVE_FULL_F_BF 4 "register_operand")]
+	      (match_operand:SVE_F_B16B16 2 "register_operand")
+	      (match_operand:SVE_F_B16B16 3 "register_operand")
+	      (match_operand:SVE_F_B16B16 4 "register_operand")]
 	     SVE_COND_FP_TERNARY)
 	   (match_dup 4)]
 	  UNSPEC_SEL))]
@@ -7563,15 +7726,15 @@
 )
 
 (define_insn "*cond_<optab><mode>_4_strict"
-  [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
-	(unspec:SVE_FULL_F_BF
-	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F_BF
+  [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+	(unspec:SVE_F_B16B16
+	  [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+	   (unspec:SVE_F_B16B16
 	     [(match_dup 1)
 	      (const_int SVE_STRICT_GP)
-	      (match_operand:SVE_FULL_F_BF 2 "register_operand")
-	      (match_operand:SVE_FULL_F_BF 3 "register_operand")
-	      (match_operand:SVE_FULL_F_BF 4 "register_operand")]
+	      (match_operand:SVE_F_B16B16 2 "register_operand")
+	      (match_operand:SVE_F_B16B16 3 "register_operand")
+	      (match_operand:SVE_F_B16B16 4 "register_operand")]
 	     SVE_COND_FP_TERNARY)
 	   (match_dup 4)]
 	  UNSPEC_SEL))]
@@ -7585,17 +7748,17 @@
 ;; Predicated floating-point ternary operations, merging with an
 ;; independent value.
 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
-  [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
-	(unspec:SVE_FULL_F_BF
+  [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+	(unspec:SVE_F_B16B16
 	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F_BF
+	   (unspec:SVE_F_B16B16
 	     [(match_operand 6)
 	      (const_int SVE_RELAXED_GP)
-	      (match_operand:SVE_FULL_F_BF 2 "register_operand")
-	      (match_operand:SVE_FULL_F_BF 3 "register_operand")
-	      (match_operand:SVE_FULL_F_BF 4 "register_operand")]
+	      (match_operand:SVE_F_B16B16 2 "register_operand")
+	      (match_operand:SVE_F_B16B16 3 "register_operand")
+	      (match_operand:SVE_F_B16B16 4 "register_operand")]
 	     SVE_COND_FP_TERNARY)
-	   (match_operand:SVE_FULL_F_BF 5 "aarch64_simd_reg_or_zero")]
+	   (match_operand:SVE_F_B16B16 5 "aarch64_simd_reg_or_zero")]
 	  UNSPEC_SEL))]
   "TARGET_SVE
    && (<supports_bf16> || !<is_bf16>)
@@ -7631,17 +7794,17 @@
 )
 
 (define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
-  [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
-	(unspec:SVE_FULL_F_BF
-	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F_BF
+  [(set (match_operand:SVE_F_B16B16 0 "register_operand")
+	(unspec:SVE_F_B16B16
+	  [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+	   (unspec:SVE_F_B16B16
 	     [(match_dup 1)
 	      (const_int SVE_STRICT_GP)
-	      (match_operand:SVE_FULL_F_BF 2 "register_operand")
-	      (match_operand:SVE_FULL_F_BF 3 "register_operand")
-	      (match_operand:SVE_FULL_F_BF 4 "register_operand")]
+	      (match_operand:SVE_F_B16B16 2 "register_operand")
+	      (match_operand:SVE_F_B16B16 3 "register_operand")
+	      (match_operand:SVE_F_B16B16 4 "register_operand")]
 	     SVE_COND_FP_TERNARY)
-	   (match_operand:SVE_FULL_F_BF 5 "aarch64_simd_reg_or_zero")]
+	   (match_operand:SVE_F_B16B16 5 "aarch64_simd_reg_or_zero")]
 	  UNSPEC_SEL))]
   "TARGET_SVE
    && (<supports_bf16> || !<is_bf16>)
@@ -7672,14 +7835,14 @@
 ;; Unpredicated FMLA and FMLS by selected lanes.  It doesn't seem worth using
 ;; (fma ...) since target-independent code won't understand the indexing.
 (define_insn "@aarch64_<optab>_lane_<mode>"
-  [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
-	(unspec:SVE_FULL_F_BF
-	  [(match_operand:SVE_FULL_F_BF 1 "register_operand")
-	   (unspec:SVE_FULL_F_BF
-	     [(match_operand:SVE_FULL_F_BF 2 "register_operand")
+  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
+	(unspec:SVE_FULL_F_B16B16
+	  [(match_operand:SVE_FULL_F_B16B16 1 "register_operand")
+	   (unspec:SVE_FULL_F_B16B16
+	     [(match_operand:SVE_FULL_F_B16B16 2 "register_operand")
 	      (match_operand:SI 3 "const_int_operand")]
 	     UNSPEC_SVE_LANE_SELECT)
-	   (match_operand:SVE_FULL_F_BF 4 "register_operand")]
+	   (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
 	  SVE_FP_TERNARY_LANE))]
   "TARGET_SVE"
   {@ [ cons: =0 , 1 , 2              , 4 ; attrs: movprfx ]
@@ -8040,20 +8203,23 @@
 ;;
 ;; For unpacked vectors, it doesn't really matter whether SEL uses the
 ;; the container size or the element size.  If SEL used the container size,
-;; it would ignore undefined bits of the predicate but would copy the
-;; upper (undefined) bits of each container along with the defined bits.
-;; If SEL used the element size, it would use undefined bits of the predicate
-;; to select between undefined elements in each input vector.  Thus the only
-;; difference is whether the undefined bits in a container always come from
-;; the same input as the defined bits, or whether the choice can vary
-;; independently of the defined bits.
+;; it would would copy the upper (undefined) bits of each container along
+;; with the corresponding defined bits.  If SEL used the element size,
+;; it would use separate predicate bits to select between the undefined
+;; elements in each input vector; these seperate predicate bits might
+;; themselves be undefined, depending on the mode of the predicate.
+;;
+;; Thus the only difference is whether the undefined bits in a container
+;; always come from the same input as the defined bits, or whether the
+;; choice can vary independently of the defined bits.
 ;;
 ;; For the other instructions, using the element size is more natural,
 ;; so we do that for SEL as well.
+;;
 (define_insn "*vcond_mask_<mode><vpred>"
   [(set (match_operand:SVE_ALL 0 "register_operand")
 	(unspec:SVE_ALL
-	  [(match_operand:<VPRED> 3 "register_operand")
+	  [(match_operand:<VPRED> 3 "aarch64_predicate_operand")
 	   (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm")
 	   (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")]
 	  UNSPEC_SEL))]
@@ -8192,6 +8358,71 @@
   }
 )
 
+;; Likewise, but yield a VNx16BI result regardless of the element width.
+;; The .b case is equivalent to the above.
+(define_expand "@aarch64_pred_cmp<cmp_op><mode>_acle"
+  [(parallel
+     [(set (match_operand:<VPRED> 0 "register_operand")
+	   (unspec:<VPRED>
+	     [(match_operand:<VPRED> 1 "register_operand")
+	      (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+	      (SVE_INT_CMP:<VPRED>
+		(match_operand:VNx16QI_ONLY 3 "register_operand")
+		(match_operand:VNx16QI_ONLY 4 "aarch64_sve_cmp_<sve_imm_con>_operand"))]
+	     UNSPEC_PRED_Z))
+      (clobber (reg:CC_NZC CC_REGNUM))])]
+  "TARGET_SVE"
+)
+
+;; For wider elements, bitcast the predicate result to a VNx16BI and use
+;; an (and ...) to indicate that only every second, fourth, or eighth bit
+;; is set.
+(define_expand "@aarch64_pred_cmp<cmp_op><mode>_acle"
+  [(parallel
+     [(set (match_operand:VNx16BI 0 "register_operand")
+	   (and:VNx16BI
+	     (subreg:VNx16BI
+	       (unspec:<VPRED>
+		 [(match_operand:<VPRED> 1 "register_operand")
+		  (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+		  (SVE_INT_CMP:<VPRED>
+		    (match_operand:SVE_FULL_HSDI 3 "register_operand")
+		    (match_operand:SVE_FULL_HSDI 4 "aarch64_sve_cmp_<sve_imm_con>_operand"))]
+		 UNSPEC_PRED_Z)
+	       0)
+	     (match_dup 5)))
+      (clobber (reg:CC_NZC CC_REGNUM))])]
+  "TARGET_SVE"
+  {
+    operands[5] = aarch64_ptrue_all (GET_MODE_UNIT_SIZE (<MODE>mode));
+  }
+)
+
+(define_insn "*aarch64_pred_cmp<cmp_op><mode>_acle"
+  [(set (match_operand:VNx16BI 0 "register_operand")
+	(and:VNx16BI
+	  (subreg:VNx16BI
+	    (unspec:<VPRED>
+	      [(match_operand:<VPRED> 1 "register_operand")
+	       (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+	       (SVE_INT_CMP:<VPRED>
+		 (match_operand:SVE_FULL_HSDI 3 "register_operand")
+		 (match_operand:SVE_FULL_HSDI 4 "aarch64_sve_cmp_<sve_imm_con>_operand"))]
+	      UNSPEC_PRED_Z)
+	    0)
+	  (match_operand:<VPRED> 5 "aarch64_ptrue_all_operand")))
+   (clobber (reg:CC_NZC CC_REGNUM))]
+  "TARGET_SVE"
+  {@ [ cons: =0 , 1  , 3 , 4            ; attrs: pred_clobber ]
+     [ &Upa     , Upl, w , <sve_imm_con>; yes                 ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #%4
+     [ ?Upl     , 0  , w , <sve_imm_con>; yes                 ] ^
+     [ Upa      , Upl, w , <sve_imm_con>; no                  ] ^
+     [ &Upa     , Upl, w , w            ; yes                 ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>
+     [ ?Upl     , 0  , w , w            ; yes                 ] ^
+     [ Upa      , Upl, w , w            ; no                  ] ^
+  }
+)
+
 ;; Predicated integer comparisons in which both the flag and predicate
 ;; results are interesting.
 (define_insn_and_rewrite "*cmp<cmp_op><mode>_cc"
@@ -8233,6 +8464,49 @@
   }
 )
 
+(define_insn_and_rewrite "*cmp<cmp_op><mode>_acle_cc"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand:VNx16BI 1 "register_operand")
+	   (match_operand 4)
+	   (match_operand:SI 5 "aarch64_sve_ptrue_flag")
+	   (unspec:<VPRED>
+	     [(match_operand 6)
+	      (match_operand:SI 7 "aarch64_sve_ptrue_flag")
+	      (SVE_INT_CMP:<VPRED>
+		(match_operand:SVE_FULL_HSDI 2 "register_operand")
+		(match_operand:SVE_FULL_HSDI 3 "aarch64_sve_cmp_<sve_imm_con>_operand"))]
+	     UNSPEC_PRED_Z)]
+	  UNSPEC_PTEST))
+   (set (match_operand:VNx16BI 0 "register_operand")
+	(and:VNx16BI
+	  (subreg:VNx16BI
+	    (unspec:<VPRED>
+	      [(match_dup 6)
+	       (match_dup 7)
+	       (SVE_INT_CMP:<VPRED>
+		 (match_dup 2)
+		 (match_dup 3))]
+	      UNSPEC_PRED_Z)
+	    0)
+	  (match_operand:<VPRED> 8 "aarch64_ptrue_all_operand")))]
+  "TARGET_SVE
+   && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
+  {@ [ cons: =0 , 1   , 2 , 3            ; attrs: pred_clobber ]
+     [ &Upa     ,  Upl, w , <sve_imm_con>; yes                 ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
+     [ ?Upl     ,  0  , w , <sve_imm_con>; yes                 ] ^
+     [ Upa      ,  Upl, w , <sve_imm_con>; no                  ] ^
+     [ &Upa     ,  Upl, w , w            ; yes                 ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>
+     [ ?Upl     ,  0  , w , w            ; yes                 ] ^
+     [ Upa      ,  Upl, w , w            ; no                  ] ^
+  }
+  "&& !rtx_equal_p (operands[4], operands[6])"
+  {
+    operands[6] = copy_rtx (operands[4]);
+    operands[7] = operands[5];
+  }
+)
+
 ;; Predicated integer comparisons in which only the flags result is
 ;; interesting.
 (define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest"
@@ -8298,14 +8572,52 @@
       (clobber (reg:CC_NZC CC_REGNUM))])]
 )
 
+(define_insn_and_split "*cmp<cmp_op><mode>_acle_and"
+  [(set (match_operand:VNx16BI 0 "register_operand" "=Upa, Upa")
+	(and:VNx16BI
+	  (and:VNx16BI
+	    (subreg:VNx16BI
+	      (unspec:<VPRED>
+		[(match_operand 4)
+		 (const_int SVE_KNOWN_PTRUE)
+		 (SVE_INT_CMP:<VPRED>
+		   (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w")
+		   (match_operand:SVE_FULL_HSDI 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
+		UNSPEC_PRED_Z)
+	      0)
+	    (match_operand:VNx16BI 1 "register_operand" "Upl, Upl"))
+          (match_operand:<VPRED> 5 "aarch64_ptrue_all_operand")))
+   (clobber (reg:CC_NZC CC_REGNUM))]
+  "TARGET_SVE"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (match_dup 0)
+	   (and:VNx16BI
+	     (subreg:VNx16BI
+	       (unspec:<VPRED>
+		 [(match_dup 1)
+		  (const_int SVE_MAYBE_NOT_PTRUE)
+		  (SVE_INT_CMP:<VPRED>
+		    (match_dup 2)
+		    (match_dup 3))]
+		 UNSPEC_PRED_Z)
+	       0)
+	     (match_dup 5)))
+      (clobber (reg:CC_NZC CC_REGNUM))])]
+  {
+    operands[1] = gen_lowpart (<VPRED>mode, operands[1]);
+  }
+)
+
 ;; Predicated integer wide comparisons.
 (define_insn "@aarch64_pred_cmp<cmp_op><mode>_wide"
   [(set (match_operand:<VPRED> 0 "register_operand")
 	(unspec:<VPRED>
-	  [(match_operand:VNx16BI 1 "register_operand")
+	  [(match_operand:<VPRED> 1 "register_operand")
 	   (match_operand:SI 2 "aarch64_sve_ptrue_flag")
 	   (unspec:<VPRED>
-	     [(match_operand:SVE_FULL_BHSI 3 "register_operand")
+	     [(match_operand:VNx16QI_ONLY 3 "register_operand")
 	      (match_operand:VNx2DI 4 "register_operand")]
 	     SVE_COND_INT_CMP_WIDE)]
 	  UNSPEC_PRED_Z))
@@ -8318,16 +8630,61 @@
   }
 )
 
+(define_expand "@aarch64_pred_cmp<cmp_op><mode>_wide"
+  [(parallel
+     [(set (match_operand:VNx16BI 0 "register_operand")
+	   (and:VNx16BI
+	     (subreg:VNx16BI
+	       (unspec:<VPRED>
+		 [(match_operand:<VPRED> 1 "register_operand")
+		  (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+		  (unspec:<VPRED>
+		    [(match_operand:SVE_FULL_HSI 3 "register_operand")
+		     (match_operand:VNx2DI 4 "register_operand")]
+		    SVE_COND_INT_CMP_WIDE)]
+		 UNSPEC_PRED_Z)
+	       0)
+	     (match_dup 5)))
+      (clobber (reg:CC_NZC CC_REGNUM))])]
+  "TARGET_SVE"
+  {
+    operands[5] = aarch64_ptrue_all (GET_MODE_UNIT_SIZE (<MODE>mode));
+  }
+)
+
+(define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide"
+  [(set (match_operand:VNx16BI 0 "register_operand")
+	(and:VNx16BI
+	  (subreg:VNx16BI
+	    (unspec:<VPRED>
+	      [(match_operand:<VPRED> 1 "register_operand")
+	       (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+	       (unspec:<VPRED>
+		 [(match_operand:SVE_FULL_HSI 3 "register_operand")
+		  (match_operand:VNx2DI 4 "register_operand")]
+		 SVE_COND_INT_CMP_WIDE)]
+	      UNSPEC_PRED_Z)
+	    0)
+	  (match_operand:<VPRED> 5 "aarch64_ptrue_all_operand")))
+   (clobber (reg:CC_NZC CC_REGNUM))]
+  "TARGET_SVE"
+  {@ [ cons: =0, 1   , 2, 3, 4; attrs: pred_clobber ]
+     [ &Upa    ,  Upl,  , w, w; yes                 ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.d
+     [ ?Upl    ,  0  ,  , w, w; yes                 ] ^
+     [ Upa     ,  Upl,  , w, w; no                  ] ^
+  }
+)
+
 ;; Predicated integer wide comparisons in which both the flag and
 ;; predicate results are interesting.
-(define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_cc"
+(define_insn_and_rewrite "*aarch64_pred_cmp<cmp_op><mode>_wide_cc"
   [(set (reg:CC_NZC CC_REGNUM)
 	(unspec:CC_NZC
 	  [(match_operand:VNx16BI 1 "register_operand")
 	   (match_operand 4)
 	   (match_operand:SI 5 "aarch64_sve_ptrue_flag")
 	   (unspec:<VPRED>
-	     [(match_operand:VNx16BI 6 "register_operand")
+	     [(match_operand:<VPRED> 6 "register_operand")
 	      (match_operand:SI 7 "aarch64_sve_ptrue_flag")
 	      (unspec:<VPRED>
 		[(match_operand:SVE_FULL_BHSI 2 "register_operand")
@@ -8351,18 +8708,65 @@
      [ ?Upl    ,  0  , w, w, Upl; yes                 ] ^
      [ Upa     ,  Upl, w, w, Upl; no                  ] ^
   }
+  "&& !rtx_equal_p (operands[4], operands[6])"
+  {
+    operands[6] = copy_rtx (operands[4]);
+    operands[7] = operands[5];
+  }
+)
+
+(define_insn_and_rewrite "*aarch64_pred_cmp<cmp_op><mode>_wide_cc"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand:VNx16BI 1 "register_operand")
+	   (match_operand 4)
+	   (match_operand:SI 5 "aarch64_sve_ptrue_flag")
+	   (unspec:<VPRED>
+	     [(match_operand:<VPRED> 6 "register_operand")
+	      (match_operand:SI 7 "aarch64_sve_ptrue_flag")
+	      (unspec:<VPRED>
+		[(match_operand:SVE_FULL_HSI 2 "register_operand")
+		 (match_operand:VNx2DI 3 "register_operand")]
+		SVE_COND_INT_CMP_WIDE)]
+	     UNSPEC_PRED_Z)]
+	  UNSPEC_PTEST))
+   (set (match_operand:VNx16BI 0 "register_operand")
+	(and:VNx16BI
+	  (subreg:VNx16BI
+	    (unspec:<VPRED>
+	      [(match_dup 6)
+	       (match_dup 7)
+	       (unspec:<VPRED>
+		 [(match_dup 2)
+		  (match_dup 3)]
+		 SVE_COND_INT_CMP_WIDE)]
+	      UNSPEC_PRED_Z)
+	    0)
+	  (match_operand:<VPRED> 8 "aarch64_ptrue_all_operand")))]
+  "TARGET_SVE
+   && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
+  {@ [ cons: =0, 1   , 2, 3, 6  ; attrs: pred_clobber ]
+     [ &Upa    ,  Upl, w, w, Upl; yes                 ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d
+     [ ?Upl    ,  0  , w, w, Upl; yes                 ] ^
+     [ Upa     ,  Upl, w, w, Upl; no                  ] ^
+  }
+  "&& !rtx_equal_p (operands[4], operands[6])"
+  {
+    operands[6] = copy_rtx (operands[4]);
+    operands[7] = operands[5];
+  }
 )
 
 ;; Predicated integer wide comparisons in which only the flags result
 ;; is interesting.
-(define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_ptest"
+(define_insn_and_rewrite "*aarch64_pred_cmp<cmp_op><mode>_wide_ptest"
   [(set (reg:CC_NZC CC_REGNUM)
 	(unspec:CC_NZC
 	  [(match_operand:VNx16BI 1 "register_operand")
 	   (match_operand 4)
 	   (match_operand:SI 5 "aarch64_sve_ptrue_flag")
 	   (unspec:<VPRED>
-	     [(match_operand:VNx16BI 6 "register_operand")
+	     [(match_operand:<VPRED> 6 "register_operand")
 	      (match_operand:SI 7 "aarch64_sve_ptrue_flag")
 	      (unspec:<VPRED>
 		[(match_operand:SVE_FULL_BHSI 2 "register_operand")
@@ -8378,6 +8782,11 @@
      [ ?Upl     ,  0  , w, w, Upl; yes                 ] ^
      [ Upa      ,  Upl, w, w, Upl; no                  ] ^
   }
+  "&& !rtx_equal_p (operands[4], operands[6])"
+  {
+    operands[6] = copy_rtx (operands[4]);
+    operands[7] = operands[5];
+  }
 )
 
 ;; -------------------------------------------------------------------------
@@ -8415,6 +8824,58 @@
   "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
 )
 
+;; Likewise, but yield a VNx16BI result regardless of the element width.
+;; The .b case is equivalent to the above.
+(define_expand "@aarch64_sve_while_<while_optab_cmp><GPI:mode><VNx16BI_ONLY:mode>_acle"
+  [(parallel
+     [(set (match_operand:VNx16BI_ONLY 0 "register_operand")
+	   (unspec:VNx16BI_ONLY
+	     [(const_int SVE_WHILE_B)
+	      (match_operand:GPI 1 "aarch64_reg_or_zero")
+	      (match_operand:GPI 2 "aarch64_reg_or_zero")]
+	     SVE_WHILE))
+      (clobber (reg:CC_NZC CC_REGNUM))])]
+  "TARGET_SVE"
+)
+
+;; For wider elements, bitcast the predicate result to a VNx16BI and use
+;; an (and ...) to indicate that only every second, fourth, or eighth bit
+;; is set.
+(define_expand "@aarch64_sve_while_<while_optab_cmp><GPI:mode><PRED_HSD:mode>_acle"
+  [(parallel
+     [(set (match_operand:VNx16BI 0 "register_operand")
+	   (and:VNx16BI
+	     (subreg:VNx16BI
+	       (unspec:PRED_HSD
+		 [(const_int SVE_WHILE_B)
+		  (match_operand:GPI 1 "aarch64_reg_or_zero")
+		  (match_operand:GPI 2 "aarch64_reg_or_zero")]
+		 SVE_WHILE)
+	       0)
+	     (match_dup 3)))
+      (clobber (reg:CC_NZC CC_REGNUM))])]
+  "TARGET_SVE"
+  {
+    operands[3] = aarch64_ptrue_all (<data_bytes>);
+  }
+)
+
+(define_insn "*aarch64_sve_while_<while_optab_cmp><GPI:mode><PRED_HSD:mode>_acle"
+  [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+	(and:VNx16BI
+	  (subreg:VNx16BI
+	    (unspec:PRED_HSD
+	      [(const_int SVE_WHILE_B)
+	       (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
+	       (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
+	      SVE_WHILE)
+	    0)
+	  (match_operand:PRED_HSD 3 "aarch64_ptrue_all_operand")))
+   (clobber (reg:CC_NZC CC_REGNUM))]
+  "TARGET_SVE"
+  "while<cmp_op>\t%0.<PRED_HSD:Vetype>, %<w>1, %<w>2"
+)
+
 ;; The WHILE instructions set the flags in the same way as a PTEST with
 ;; a PTRUE GP.  Handle the case in which both results are useful.  The GP
 ;; operands to the PTEST aren't needed, so we allow them to be anything.
@@ -8446,6 +8907,38 @@
   }
 )
 
+(define_insn_and_rewrite "*while_<while_optab_cmp><GPI:mode><PRED_HSD:mode>_acle_cc"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand 3)
+	   (match_operand 4)
+	   (const_int SVE_KNOWN_PTRUE)
+	   (unspec:PRED_HSD
+	     [(const_int SVE_WHILE_B)
+	      (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
+	      (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
+	     SVE_WHILE)]
+	  UNSPEC_PTEST))
+   (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+	(and:VNx16BI
+	  (subreg:VNx16BI
+	    (unspec:PRED_HSD [(const_int SVE_WHILE_B)
+			      (match_dup 1)
+			      (match_dup 2)]
+			     SVE_WHILE)
+	    0)
+	  (match_operand:PRED_HSD 5 "aarch64_ptrue_all_operand")))]
+  "TARGET_SVE"
+  "while<cmp_op>\t%0.<PRED_HSD:Vetype>, %<w>1, %<w>2"
+  ;; Force the compiler to drop the unused predicate operand, so that we
+  ;; don't have an unnecessary PTRUE.
+  "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))"
+  {
+    operands[3] = CONSTM1_RTX (VNx16BImode);
+    operands[4] = CONSTM1_RTX (<PRED_HSD:MODE>mode);
+  }
+)
+
 ;; Same, but handle the case in which only the flags result is useful.
 (define_insn_and_rewrite "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_ptest"
   [(set (reg:CC_NZC CC_REGNUM)
@@ -8490,8 +8983,8 @@
 (define_expand "vec_cmp<mode><vpred>"
   [(set (match_operand:<VPRED> 0 "register_operand")
 	(match_operator:<VPRED> 1 "comparison_operator"
-	  [(match_operand:SVE_FULL_F 2 "register_operand")
-	   (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]))]
+	  [(match_operand:SVE_F 2 "register_operand")
+	   (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")]))]
   "TARGET_SVE"
   {
     aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]),
@@ -8504,10 +8997,10 @@
 (define_insn "@aarch64_pred_fcm<cmp_op><mode>"
   [(set (match_operand:<VPRED> 0 "register_operand")
 	(unspec:<VPRED>
-	  [(match_operand:<VPRED> 1 "register_operand")
+	  [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
 	   (match_operand:SI 2 "aarch64_sve_ptrue_flag")
-	   (match_operand:SVE_FULL_F 3 "register_operand")
-	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+	   (match_operand:SVE_F 3 "register_operand")
+	   (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
 	  SVE_COND_FP_CMP_I0))]
   "TARGET_SVE"
   {@ [ cons: =0 , 1   , 3 , 4   ]
@@ -8516,19 +9009,90 @@
   }
 )
 
+(define_expand "@aarch64_pred_fcm<cmp_op><mode>_acle"
+  [(set (match_operand:VNx16BI 0 "register_operand")
+	(and:VNx16BI
+	  (subreg:VNx16BI
+	    (unspec:<VPRED>
+	      [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+	       (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+	       (match_operand:SVE_F 3 "register_operand")
+	       (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
+	      SVE_COND_FP_CMP_I0)
+	    0)
+	  (match_dup 5)))]
+  "TARGET_SVE"
+  {
+    operands[5] = aarch64_ptrue_all (GET_MODE_UNIT_SIZE (<MODE>mode));
+  }
+)
+
+(define_insn "*aarch64_pred_fcm<cmp_op><mode>_acle"
+  [(set (match_operand:VNx16BI 0 "register_operand")
+	(and:VNx16BI
+	  (subreg:VNx16BI
+	    (unspec:<VPRED>
+	      [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+	       (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+	       (match_operand:SVE_F 3 "register_operand")
+	       (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
+	      SVE_COND_FP_CMP_I0)
+	    0)
+	  (match_operand:<VPRED> 5 "aarch64_ptrue_all_operand")))]
+  "TARGET_SVE"
+  {@ [ cons: =0 , 1   , 3 , 4   ]
+     [ Upa      , Upl , w , Dz  ] fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #0.0
+     [ Upa      , Upl , w , w   ] fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>
+  }
+)
+
 ;; Same for unordered comparisons.
 (define_insn "@aarch64_pred_fcmuo<mode>"
   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
 	(unspec:<VPRED>
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	  [(match_operand:<VPRED> 1 "aarch64_predicate_operand" "Upl")
 	   (match_operand:SI 2 "aarch64_sve_ptrue_flag")
-	   (match_operand:SVE_FULL_F 3 "register_operand" "w")
-	   (match_operand:SVE_FULL_F 4 "register_operand" "w")]
+	   (match_operand:SVE_F 3 "register_operand" "w")
+	   (match_operand:SVE_F 4 "register_operand" "w")]
 	  UNSPEC_COND_FCMUO))]
   "TARGET_SVE"
   "fcmuo\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
 )
 
+(define_expand "@aarch64_pred_fcmuo<mode>_acle"
+  [(set (match_operand:VNx16BI 0 "register_operand")
+	(and:VNx16BI
+	  (subreg:VNx16BI
+	    (unspec:<VPRED>
+	      [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
+	       (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+	       (match_operand:SVE_F 3 "register_operand")
+	       (match_operand:SVE_F 4 "register_operand")]
+	      UNSPEC_COND_FCMUO)
+	    0)
+	  (match_dup 5)))]
+  "TARGET_SVE"
+  {
+    operands[5] = aarch64_ptrue_all (GET_MODE_UNIT_SIZE (<MODE>mode));
+  }
+)
+
+(define_insn "*aarch64_pred_fcmuo<mode>_acle"
+  [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+	(and:VNx16BI
+	  (subreg:VNx16BI
+	    (unspec:<VPRED>
+	      [(match_operand:<VPRED> 1 "aarch64_predicate_operand" "Upl")
+	       (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+	       (match_operand:SVE_F 3 "register_operand" "w")
+	       (match_operand:SVE_F 4 "register_operand" "w")]
+	      UNSPEC_COND_FCMUO)
+	    0)
+	  (match_operand:<VPRED> 5 "aarch64_ptrue_all_operand")))]
+  "TARGET_SVE"
+  "fcmuo\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
+)
+
 ;; Floating-point comparisons predicated on a PTRUE, with the results ANDed
 ;; with another predicate P.  This does not have the same trapping behavior
 ;; as predicating the comparison itself on P, but it's a legitimate fold,
@@ -8543,8 +9107,8 @@
 	  (unspec:<VPRED>
 	    [(match_operand:<VPRED> 1)
 	     (const_int SVE_KNOWN_PTRUE)
-	     (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
-	     (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
+	     (match_operand:SVE_F 2 "register_operand" "w, w")
+	     (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
 	    SVE_COND_FP_CMP_I0)
 	  (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))]
   "TARGET_SVE"
@@ -8566,8 +9130,8 @@
 	  (unspec:<VPRED>
 	    [(match_operand:<VPRED> 1)
 	     (const_int SVE_KNOWN_PTRUE)
-	     (match_operand:SVE_FULL_F 2 "register_operand" "w")
-	     (match_operand:SVE_FULL_F 3 "register_operand" "w")]
+	     (match_operand:SVE_F 2 "register_operand" "w")
+	     (match_operand:SVE_F 3 "register_operand" "w")]
 	    UNSPEC_COND_FCMUO)
 	  (match_operand:<VPRED> 4 "register_operand" "Upl")))]
   "TARGET_SVE"
@@ -8593,8 +9157,8 @@
 	      (unspec:<VPRED>
 	        [(match_operand:<VPRED> 1)
 	         (const_int SVE_KNOWN_PTRUE)
-	         (match_operand:SVE_FULL_F 2 "register_operand" "w")
-	         (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
+	         (match_operand:SVE_F 2 "register_operand" "w")
+	         (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "wDz")]
 	        SVE_COND_FP_CMP_I0))
 	    (match_operand:<VPRED> 4 "register_operand" "Upa"))
 	  (match_dup:<VPRED> 1)))
@@ -8630,8 +9194,8 @@
 	      (unspec:<VPRED>
 	        [(match_operand:<VPRED> 1)
 	         (const_int SVE_KNOWN_PTRUE)
-	         (match_operand:SVE_FULL_F 2 "register_operand" "w")
-	         (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
+	         (match_operand:SVE_F 2 "register_operand" "w")
+	         (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "wDz")]
 	        SVE_COND_FP_CMP_I0))
 	    (not:<VPRED>
 	      (match_operand:<VPRED> 4 "register_operand" "Upa")))
@@ -8661,6 +9225,7 @@
 }
 )
 
+;; Same for unordered comparisons.
 (define_insn_and_split "*fcmuo<mode>_bic_combine"
   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
 	(and:<VPRED>
@@ -8669,8 +9234,8 @@
 	      (unspec:<VPRED>
 	        [(match_operand:<VPRED> 1)
 	         (const_int SVE_KNOWN_PTRUE)
-	         (match_operand:SVE_FULL_F 2 "register_operand" "w")
-	         (match_operand:SVE_FULL_F 3 "register_operand" "w")]
+	         (match_operand:SVE_F 2 "register_operand" "w")
+	         (match_operand:SVE_F 3 "register_operand" "w")]
 	        UNSPEC_COND_FCMUO))
 	    (match_operand:<VPRED> 4 "register_operand" "Upa"))
 	  (match_dup:<VPRED> 1)))
@@ -8696,7 +9261,6 @@
 }
 )
 
-;; Same for unordered comparisons.
 (define_insn_and_split "*fcmuo<mode>_nor_combine"
   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
 	(and:<VPRED>
@@ -8705,8 +9269,8 @@
 	      (unspec:<VPRED>
 	        [(match_operand:<VPRED> 1)
 	         (const_int SVE_KNOWN_PTRUE)
-	         (match_operand:SVE_FULL_F 2 "register_operand" "w")
-	         (match_operand:SVE_FULL_F 3 "register_operand" "w")]
+	         (match_operand:SVE_F 2 "register_operand" "w")
+	         (match_operand:SVE_F 3 "register_operand" "w")]
 	        UNSPEC_COND_FCMUO))
 	    (not:<VPRED>
 	      (match_operand:<VPRED> 4 "register_operand" "Upa")))
@@ -8747,23 +9311,30 @@
 ;; -------------------------------------------------------------------------
 
 ;; Predicated floating-point absolute comparisons.
-(define_expand "@aarch64_pred_fac<cmp_op><mode>"
-  [(set (match_operand:<VPRED> 0 "register_operand")
-	(unspec:<VPRED>
-	  [(match_operand:<VPRED> 1 "register_operand")
-	   (match_operand:SI 2 "aarch64_sve_ptrue_flag")
-	   (unspec:SVE_FULL_F
-	     [(match_dup 1)
-	      (match_dup 2)
-	      (match_operand:SVE_FULL_F 3 "register_operand")]
-	     UNSPEC_COND_FABS)
-	   (unspec:SVE_FULL_F
-	     [(match_dup 1)
-	      (match_dup 2)
-	      (match_operand:SVE_FULL_F 4 "register_operand")]
-	     UNSPEC_COND_FABS)]
-	  SVE_COND_FP_ABS_CMP))]
+(define_expand "@aarch64_pred_fac<cmp_op><mode>_acle"
+  [(set (match_operand:VNx16BI 0 "register_operand")
+	(and:VNx16BI
+	  (subreg:VNx16BI
+	    (unspec:<VPRED>
+	      [(match_operand:<VPRED> 1 "register_operand")
+	       (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+	       (unspec:SVE_FULL_F
+		 [(match_dup 1)
+		  (match_dup 2)
+		  (match_operand:SVE_FULL_F 3 "register_operand")]
+		 UNSPEC_COND_FABS)
+	       (unspec:SVE_FULL_F
+		 [(match_dup 1)
+		  (match_dup 2)
+		  (match_operand:SVE_FULL_F 4 "register_operand")]
+		 UNSPEC_COND_FABS)]
+	      SVE_COND_FP_ABS_CMP)
+	    0)
+	  (match_dup 5)))]
   "TARGET_SVE"
+  {
+    operands[5] = aarch64_ptrue_all (GET_MODE_UNIT_SIZE (<MODE>mode));
+  }
 )
 
 (define_insn_and_rewrite "*aarch64_pred_fac<cmp_op><mode>_relaxed"
@@ -8812,6 +9383,30 @@
   "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
 )
 
+(define_insn "*aarch64_pred_fac<cmp_op><mode>_strict_acle"
+  [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+	(and:VNx16BI
+	  (subreg:VNx16BI
+	    (unspec:<VPRED>
+	      [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	       (match_operand:SI 4 "aarch64_sve_ptrue_flag")
+	       (unspec:SVE_FULL_F
+		 [(match_dup 1)
+		  (match_operand:SI 5 "aarch64_sve_gp_strictness")
+		  (match_operand:SVE_FULL_F 2 "register_operand" "w")]
+		 UNSPEC_COND_FABS)
+	       (unspec:SVE_FULL_F
+		 [(match_dup 1)
+		  (match_operand:SI 6 "aarch64_sve_gp_strictness")
+		  (match_operand:SVE_FULL_F 3 "register_operand" "w")]
+		 UNSPEC_COND_FABS)]
+	      SVE_COND_FP_ABS_CMP)
+	    0)
+	  (match_operand:<VPRED> 7 "aarch64_ptrue_all_operand")))]
+  "TARGET_SVE"
+  "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [PRED] Select
 ;; -------------------------------------------------------------------------
@@ -9260,7 +9855,30 @@
 	(unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")]
 			 UNSPEC_REV))]
   "TARGET_SVE"
-  "rev\t%0.<Vetype>, %1.<Vetype>")
+  "rev\t%0.<Vetype>, %1.<Vetype>"
+)
+
+(define_expand "@aarch64_sve_rev<mode>_acle"
+  [(set (match_operand:VNx16BI 0 "register_operand")
+	(unspec:VNx16BI
+	  [(match_operand:VNx16BI 1 "register_operand")
+	   (match_dup:PRED_ALL 2)]
+	  UNSPEC_REV_PRED))]
+  "TARGET_SVE"
+  {
+    operands[2] = CONST0_RTX (<MODE>mode);
+  }
+)
+
+(define_insn "*aarch64_sve_rev<mode>_acle"
+  [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+	(unspec:VNx16BI
+	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
+	   (match_operand:PRED_ALL 2 "aarch64_simd_imm_zero")]
+	  UNSPEC_REV_PRED))]
+  "TARGET_SVE"
+  "rev\t%0.<Vetype>, %1.<Vetype>"
+)
 
 ;; -------------------------------------------------------------------------
 ;; ---- [PRED] Special-purpose binary permutes
@@ -9285,18 +9903,39 @@
   "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
 )
 
-;; Special purpose permute used by the predicate generation instructions.
-;; Unlike the normal permute patterns, these instructions operate on VNx16BI
-;; regardless of the element size, so that all input and output bits are
-;; well-defined.  Operand 3 then indicates the size of the permute.
-(define_insn "@aarch64_sve_trn1_conv<mode>"
+;; Special-purpose permutes used by the ACLE intrinsics and predicate
+;; generation instructions.  Unlike the normal permute patterns, these
+;; instructions operate on VNx16BI regardless of the element size, so that
+;; all input and output bits are well-defined.  Operand 3 then indicates
+;; the size of the permute.
+;;
+;; To make generation easier, this pattern embeds the permute type as the
+;; fourth operand to the unspec.  On the one hand, this avoids overloading
+;; unspecs like UNSPEC_ZIP1 to represent two different operations.  On the
+;; other hand, it avoids having a separate unspec for each variant, and
+;; having to map from one kind of unspec to the other.
+(define_expand "@aarch64_sve_<perm_insn><mode>_acle"
+  [(set (match_operand:VNx16BI 0 "register_operand")
+	(unspec:VNx16BI [(match_operand:VNx16BI 1 "register_operand")
+			 (match_operand:VNx16BI 2 "register_operand")
+			 (match_dup:PRED_ALL 3)
+			 (const_int PERMUTE)]
+			UNSPEC_PERMUTE_PRED))]
+  "TARGET_SVE"
+  {
+    operands[3] = CONST0_RTX (<MODE>mode);
+  }
+)
+
+(define_insn "*aarch64_sve_<perm_insn><mode>_acle"
   [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
 	(unspec:VNx16BI [(match_operand:VNx16BI 1 "register_operand" "Upa")
 			 (match_operand:VNx16BI 2 "register_operand" "Upa")
-			 (match_operand:PRED_ALL 3 "aarch64_simd_imm_zero")]
-			UNSPEC_TRN1_CONV))]
+			 (match_operand:PRED_ALL 3 "aarch64_simd_imm_zero")
+			 (const_int PERMUTE)]
+			UNSPEC_PERMUTE_PRED))]
   "TARGET_SVE"
-  "trn1\t%0.<PRED_ALL:Vetype>, %1.<PRED_ALL:Vetype>, %2.<PRED_ALL:Vetype>"
+  "<perm_insn>\t%0.<PRED_ALL:Vetype>, %1.<PRED_ALL:Vetype>, %2.<PRED_ALL:Vetype>"
 )
 
 ;; =========================================================================
@@ -9379,18 +10018,37 @@
 ;; - FCVTZU
 ;; -------------------------------------------------------------------------
 
-;; Unpredicated conversion of floats to integers of the same size (HF to HI,
-;; SF to SI or DF to DI).
-(define_expand "<optab><mode><v_int_equiv>2"
-  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
-	(unspec:<V_INT_EQUIV>
+;; Unpredicated conversion of floats to integers of the same size or wider,
+;; excluding conversions from DF (see below).
+(define_expand "<optab><SVE_HSF:mode><SVE_HSDI:mode>2"
+  [(set (match_operand:SVE_HSDI 0 "register_operand")
+	(unspec:SVE_HSDI
+	  [(match_dup 2)
+	   (match_dup 3)
+	   (match_operand:SVE_HSF 1 "register_operand")]
+	  SVE_COND_FCVTI))]
+  "TARGET_SVE
+   && (~(<SVE_HSDI:self_mask> | <SVE_HSDI:narrower_mask>) & <SVE_HSF:self_mask>) == 0"
+  {
+    operands[2] = aarch64_sve_fp_pred (<SVE_HSDI:MODE>mode, &operands[3]);
+  }
+)
+
+;; SI <- DF can't use SI <- trunc (DI <- DF) without -ffast-math, so this
+;; truncating variant of FCVTZ{S,U} is useful for auto-vectorization.
+;;
+;; DF is the only source mode for which the mask used above doesn't apply,
+;; we define a separate pattern for it here.
+(define_expand "<optab><VNx2DF_ONLY:mode><SVE_2SDI:mode>2"
+  [(set (match_operand:SVE_2SDI 0 "register_operand")
+	(unspec:SVE_2SDI
 	  [(match_dup 2)
 	   (const_int SVE_RELAXED_GP)
-	   (match_operand:SVE_FULL_F 1 "register_operand")]
+	   (match_operand:VNx2DF_ONLY 1 "register_operand")]
 	  SVE_COND_FCVTI))]
   "TARGET_SVE"
   {
-    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+    operands[2] = aarch64_ptrue_reg (VNx2BImode);
   }
 )
 
@@ -9409,18 +10067,37 @@
   }
 )
 
-;; Predicated narrowing float-to-integer conversion.
-(define_insn "@aarch64_sve_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
-  [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
-	(unspec:VNx4SI_ONLY
+;; As above, for pairs used by the auto-vectorizer only.
+(define_insn "*aarch64_sve_<optab>_nontrunc<SVE_PARTIAL_F:mode><SVE_HSDI:mode>"
+  [(set (match_operand:SVE_HSDI 0 "register_operand")
+	(unspec:SVE_HSDI
+	  [(match_operand:<SVE_HSDI:VPRED> 1 "aarch64_predicate_operand")
+	   (match_operand:SI 3 "aarch64_sve_gp_strictness")
+	   (match_operand:SVE_PARTIAL_F 2 "register_operand")]
+	  SVE_COND_FCVTI))]
+   "TARGET_SVE
+   && (~(<SVE_HSDI:self_mask> | <SVE_HSDI:narrower_mask>) & <SVE_PARTIAL_F:self_mask>) == 0"
+  {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
+     [ w        , Upl , 0 ; *              ] fcvtz<su>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_PARTIAL_F:Vetype>
+     [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;fcvtz<su>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_PARTIAL_F:Vetype>
+  }
+)
+
+;; Predicated narrowing float-to-integer conversion.  The VNx2DF->VNx4SI
+;; variant is provided for the ACLE, where the zeroed odd-indexed lanes are
+;; significant.  The VNx2DF->VNx2SI variant is provided for auto-vectorization,
+;; where the upper 32 bits of each container are ignored.
+(define_insn "@aarch64_sve_<optab>_trunc<VNx2DF_ONLY:mode><SVE_SI:mode>"
+  [(set (match_operand:SVE_SI 0 "register_operand")
+	(unspec:SVE_SI
 	  [(match_operand:VNx2BI 1 "register_operand")
 	   (match_operand:SI 3 "aarch64_sve_gp_strictness")
 	   (match_operand:VNx2DF_ONLY 2 "register_operand")]
 	  SVE_COND_FCVTI))]
   "TARGET_SVE"
   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
-     [ w        , Upl , 0 ; *              ] fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
-     [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
+     [ w        , Upl , 0 ; *              ] fcvtz<su>\t%0.<SVE_SI:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
+     [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;fcvtz<su>\t%0.<SVE_SI:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
   }
 )
 
@@ -9468,6 +10145,31 @@
   }
 )
 
+;; As above, for pairs that are used by the auto-vectorizer only.
+(define_insn_and_rewrite "*cond_<optab>_nontrunc<SVE_PARTIAL_F:mode><SVE_HSDI:mode>_relaxed"
+  [(set (match_operand:SVE_HSDI 0 "register_operand")
+	(unspec:SVE_HSDI
+	  [(match_operand:<SVE_HSDI:VPRED> 1 "register_operand")
+	   (unspec:SVE_HSDI
+	     [(match_operand 4)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:SVE_PARTIAL_F 2 "register_operand")]
+	     SVE_COND_FCVTI)
+	   (match_operand:SVE_HSDI 3 "aarch64_simd_reg_or_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE
+  && (~(<SVE_HSDI:self_mask> | <SVE_HSDI:narrower_mask>) & <SVE_PARTIAL_F:self_mask>) == 0"
+  {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
+     [ &w       , Upl , w , 0  ; *              ] fcvtz<su>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_PARTIAL_F:Vetype>
+     [ &w       , Upl , w , Dz ; yes            ] movprfx\t%0.<SVE_HSDI:Vetype>, %1/z, %2.<SVE_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_PARTIAL_F:Vetype>
+     [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;fcvtz<su>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_PARTIAL_F:Vetype>
+  }
+  "&& !rtx_equal_p (operands[1], operands[4])"
+  {
+    operands[4] = copy_rtx (operands[1]);
+  }
+)
+
 (define_insn "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>_strict"
   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
 	(unspec:SVE_FULL_HSDI
@@ -9521,6 +10223,29 @@
   }
 )
 
+(define_insn_and_rewrite "*cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx2SI_ONLY:mode>_relaxed"
+  [(set (match_operand:VNx2SI_ONLY 0 "register_operand")
+	(unspec:VNx2SI_ONLY
+	  [(match_operand:VNx2BI 1 "register_operand")
+	   (unspec:VNx2SI_ONLY
+	     [(match_operand 4)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:VNx2DF_ONLY 2 "register_operand")]
+	     SVE_COND_FCVTI)
+	   (match_operand:VNx2SI_ONLY 3 "aarch64_simd_reg_or_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
+     [ &w       , Upl , w , 0  ; *              ] fcvtz<su>\t%0.<VNx2SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
+     [ &w       , Upl , w , Dz ; yes            ] movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;fcvtz<su>\t%0.<VNx2SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
+     [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;fcvtz<su>\t%0.<VNx2SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
+  }
+  "&& !rtx_equal_p (operands[1], operands[4])"
+  {
+    operands[4] = copy_rtx (operands[1]);
+  }
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [INT<-FP] Packs
 ;; -------------------------------------------------------------------------
@@ -9565,18 +10290,19 @@
 ;; - UCVTF
 ;; -------------------------------------------------------------------------
 
-;; Unpredicated conversion of integers to floats of the same size
-;; (HI to HF, SI to SF or DI to DF).
-(define_expand "<optab><v_int_equiv><mode>2"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
+;; Unpredicated conversion of integers to floats of the same size or
+;; narrower.
+(define_expand "<optab><SVE_HSDI:mode><SVE_F:mode>2"
+  [(set (match_operand:SVE_F 0 "register_operand")
+	(unspec:SVE_F
 	  [(match_dup 2)
-	   (const_int SVE_RELAXED_GP)
-	   (match_operand:<V_INT_EQUIV> 1 "register_operand")]
+	   (match_dup 3)
+	   (match_operand:SVE_HSDI 1 "register_operand")]
 	  SVE_COND_ICVTF))]
-  "TARGET_SVE"
+  "TARGET_SVE
+   && (~(<SVE_HSDI:self_mask> | <SVE_HSDI:narrower_mask>) & <SVE_F:self_mask>) == 0"
   {
-    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+    operands[2] = aarch64_sve_fp_pred (<SVE_HSDI:MODE>mode, &operands[3]);
   }
 )
 
@@ -9596,6 +10322,22 @@
   }
 )
 
+;; As above, for pairs that are used by the auto-vectorizer only.
+(define_insn "*aarch64_sve_<optab>_nonextend<SVE_HSDI:mode><SVE_PARTIAL_F:mode>"
+  [(set (match_operand:SVE_PARTIAL_F 0 "register_operand")
+	(unspec:SVE_PARTIAL_F
+	  [(match_operand:<SVE_HSDI:VPRED> 1 "aarch64_predicate_operand")
+	   (match_operand:SI 3 "aarch64_sve_gp_strictness")
+	   (match_operand:SVE_HSDI 2 "register_operand")]
+	  SVE_COND_ICVTF))]
+  "TARGET_SVE
+   && (~(<SVE_HSDI:self_mask> | <SVE_HSDI:narrower_mask>) & <SVE_PARTIAL_F:self_mask>) == 0"
+  {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
+     [ w        , Upl , 0 ; *              ] <su>cvtf\t%0.<SVE_PARTIAL_F:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>
+     [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;<su>cvtf\t%0.<SVE_PARTIAL_F:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>
+  }
+)
+
 ;; Predicated widening integer-to-float conversion.
 (define_insn "@aarch64_sve_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
   [(set (match_operand:VNx2DF_ONLY 0 "register_operand")
@@ -9655,6 +10397,31 @@
   }
 )
 
+;; As above, for pairs that are used by the auto-vectorizer only.
+(define_insn_and_rewrite "*cond_<optab>_nonextend<SVE_HSDI:mode><SVE_PARTIAL_F:mode>_relaxed"
+  [(set (match_operand:SVE_PARTIAL_F 0 "register_operand")
+	(unspec:SVE_PARTIAL_F
+	  [(match_operand:<SVE_HSDI:VPRED> 1 "register_operand")
+	   (unspec:SVE_PARTIAL_F
+	     [(match_operand 4)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:SVE_HSDI 2 "register_operand")]
+	     SVE_COND_ICVTF)
+	   (match_operand:SVE_PARTIAL_F 3 "aarch64_simd_reg_or_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE
+   && (~(<SVE_HSDI:self_mask> | <SVE_HSDI:narrower_mask>) & <SVE_PARTIAL_F:self_mask>) == 0"
+  {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
+     [ &w       , Upl , w , 0  ; *              ] <su>cvtf\t%0.<SVE_PARTIAL_F:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>
+     [ &w       , Upl , w , Dz ; yes            ] movprfx\t%0.<SVE_HSDI:Vetype>, %1/z, %2.<SVE_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_PARTIAL_F:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>
+     [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_PARTIAL_F:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>
+  }
+  "&& !rtx_equal_p (operands[1], operands[4])"
+  {
+    operands[4] = copy_rtx (operands[1]);
+  }
+)
+
 (define_insn "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>_strict"
   [(set (match_operand:SVE_FULL_F 0 "register_operand")
 	(unspec:SVE_FULL_F
@@ -9779,6 +10546,27 @@
   }
 )
 
+;; -------------------------------------------------------------------------
+;; ---- [FP<-FP] Truncating conversions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FCVT
+;; -------------------------------------------------------------------------
+
+;; Unpredicated float-to-float truncation.
+(define_expand "trunc<SVE_SDF:mode><SVE_PARTIAL_HSF:mode>2"
+  [(set (match_operand:SVE_PARTIAL_HSF 0 "register_operand")
+       (unspec:SVE_PARTIAL_HSF
+         [(match_dup 2)
+          (match_dup 3)
+          (match_operand:SVE_SDF 1 "register_operand")]
+         SVE_COND_FCVT))]
+  "TARGET_SVE && (~<SVE_SDF:narrower_mask> & <SVE_PARTIAL_HSF:self_mask>) == 0"
+  {
+    operands[2] = aarch64_sve_fp_pred (<SVE_SDF:MODE>mode, &operands[3]);
+  }
+)
+
 ;; Predicated float-to-float truncation.
 (define_insn "@aarch64_sve_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
   [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
@@ -9794,6 +10582,21 @@
   }
 )
 
+;; As above, for pairs that are used by the auto-vectorizer only.
+(define_insn "*aarch64_sve_<optab>_trunc<SVE_SDF:mode><SVE_PARTIAL_HSF:mode>"
+  [(set (match_operand:SVE_PARTIAL_HSF 0 "register_operand")
+       (unspec:SVE_PARTIAL_HSF
+         [(match_operand:<SVE_SDF:VPRED> 1 "aarch64_predicate_operand")
+          (match_operand:SI 3 "aarch64_sve_gp_strictness")
+          (match_operand:SVE_SDF 2 "register_operand")]
+         SVE_COND_FCVT))]
+  "TARGET_SVE && (~<SVE_SDF:narrower_mask> & <SVE_PARTIAL_HSF:self_mask>) == 0"
+  {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
+     [ w        , Upl , 0 ; *              ] fcvt\t%0.<SVE_PARTIAL_HSF:Vetype>, %1/m, %2.<SVE_SDF:Vetype>
+     [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;fcvt\t%0.<SVE_PARTIAL_HSF:Vetype>, %1/m, %2.<SVE_SDF:Vetype>
+  }
+)
+
 ;; Predicated float-to-float truncation with merging.
 (define_expand "@cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
   [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
@@ -9828,6 +10631,30 @@
   }
 )
 
+;; As above, for pairs that are used by the auto-vectorizer only.
+(define_insn_and_rewrite "*cond_<optab>_trunc<SVE_SDF:mode><SVE_PARTIAL_HSF:mode>"
+  [(set (match_operand:SVE_PARTIAL_HSF 0 "register_operand")
+	(unspec:SVE_PARTIAL_HSF
+	  [(match_operand:<SVE_SDF:VPRED> 1 "register_operand")
+	   (unspec:SVE_PARTIAL_HSF
+	     [(match_operand 4)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:SVE_SDF 2 "register_operand")]
+	     SVE_COND_FCVT)
+	   (match_operand:SVE_PARTIAL_HSF 3 "aarch64_simd_reg_or_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && (~<SVE_SDF:narrower_mask> & <SVE_PARTIAL_HSF:self_mask>) == 0"
+  {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
+     [ w        , Upl , w , 0  ; *              ] fcvt\t%0.<SVE_PARTIAL_HSF:Vetype>, %1/m, %2.<SVE_SDF:Vetype>
+     [ ?&w      , Upl , w , Dz ; yes            ] movprfx\t%0.<SVE_SDF:Vetype>, %1/z, %2.<SVE_SDF:Vetype>\;fcvt\t%0.<SVE_PARTIAL_HSF:Vetype>, %1/m, %2.<SVE_SDF:Vetype>
+     [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;fcvt\t%0.<SVE_PARTIAL_HSF:Vetype>, %1/m, %2.<SVE_SDF:Vetype>
+  }
+  "&& !rtx_equal_p (operands[1], operands[4])"
+  {
+    operands[4] = copy_rtx (operands[1]);
+  }
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [FP<-FP] Packs (bfloat16)
 ;; -------------------------------------------------------------------------
@@ -9936,6 +10763,27 @@
   }
 )
 
+;; -------------------------------------------------------------------------
+;; ---- [FP<-FP] Extending conversions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FCVT
+;; -------------------------------------------------------------------------
+
+;; Unpredicated float-to-float extension.
+(define_expand "extend<SVE_PARTIAL_HSF:mode><SVE_SDF:mode>2"
+  [(set (match_operand:SVE_SDF 0 "register_operand")
+       (unspec:SVE_SDF
+         [(match_dup 2)
+          (match_dup 3)
+          (match_operand:SVE_PARTIAL_HSF 1 "register_operand")]
+         SVE_COND_FCVT))]
+  "TARGET_SVE && (~<SVE_SDF:narrower_mask> & <SVE_PARTIAL_HSF:self_mask>) == 0"
+  {
+    operands[2] = aarch64_sve_fp_pred (<SVE_SDF:MODE>mode, &operands[3]);
+  }
+)
+
 ;; Predicated float-to-float extension.
 (define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
   [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
@@ -9951,6 +10799,21 @@
   }
 )
 
+;; As above, for pairs that are used by the auto-vectorizer only.
+(define_insn "*aarch64_sve_<optab>_nontrunc<SVE_PARTIAL_HSF:mode><SVE_SDF:mode>"
+  [(set (match_operand:SVE_SDF 0 "register_operand")
+	(unspec:SVE_SDF
+	  [(match_operand:<SVE_SDF:VPRED> 1 "aarch64_predicate_operand")
+	   (match_operand:SI 3 "aarch64_sve_gp_strictness")
+	   (match_operand:SVE_PARTIAL_HSF 2 "register_operand")]
+	  SVE_COND_FCVT))]
+  "TARGET_SVE && (~<SVE_SDF:narrower_mask> & <SVE_PARTIAL_HSF:self_mask>) == 0"
+  {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
+     [ w        , Upl , 0 ; *              ] fcvt\t%0.<SVE_SDF:Vetype>, %1/m, %2.<SVE_PARTIAL_HSF:Vetype>
+     [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;fcvt\t%0.<SVE_SDF:Vetype>, %1/m, %2.<SVE_PARTIAL_HSF:Vetype>
+  }
+)
+
 ;; Predicated float-to-float extension with merging.
 (define_expand "@cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
   [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
@@ -9985,6 +10848,30 @@
   }
 )
 
+;; As above, for pairs that are used by the auto-vectorizer only.
+(define_insn_and_rewrite "*cond_<optab>_nontrunc<SVE_PARTIAL_HSF:mode><SVE_SDF:mode>_relaxed"
+  [(set (match_operand:SVE_SDF 0 "register_operand")
+	(unspec:SVE_SDF
+	  [(match_operand:<SVE_SDF:VPRED> 1 "register_operand")
+	   (unspec:SVE_SDF
+	     [(match_operand 4)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:SVE_PARTIAL_HSF 2 "register_operand")]
+	     SVE_COND_FCVT)
+	   (match_operand:SVE_SDF 3 "aarch64_simd_reg_or_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && (~<SVE_SDF:narrower_mask> & <SVE_PARTIAL_HSF:self_mask>) == 0"
+  {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
+     [ w        , Upl , w , 0  ; *              ] fcvt\t%0.<SVE_SDF:Vetype>, %1/m, %2.<SVE_PARTIAL_HSF:Vetype>
+     [ ?&w      , Upl , w , Dz ; yes            ] movprfx\t%0.<SVE_SDF:Vetype>, %1/z, %2.<SVE_SDF:Vetype>\;fcvt\t%0.<SVE_SDF:Vetype>, %1/m, %2.<SVE_PARTIAL_HSF:Vetype>
+     [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;fcvt\t%0.<SVE_SDF:Vetype>, %1/m, %2.<SVE_PARTIAL_HSF:Vetype>
+  }
+  "&& !rtx_equal_p (operands[1], operands[4])"
+  {
+    operands[4] = copy_rtx (operands[1]);
+  }
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [PRED<-PRED] Packs
 ;; -------------------------------------------------------------------------
@@ -10037,6 +10924,34 @@
   "punpk<perm_hilo>\t%0.h, %1.b"
 )
 
+(define_expand "@aarch64_sve_punpk<perm_hilo>_acle"
+  [(set (match_operand:VNx16BI 0 "register_operand")
+	(and:VNx16BI
+	  (subreg:VNx16BI
+	    (unspec:VNx8BI
+	      [(match_operand:VNx16BI 1 "register_operand")]
+	      UNPACK_UNSIGNED)
+	    0)
+	  (match_dup 2)))]
+  "TARGET_SVE"
+  {
+    operands[2] = aarch64_ptrue_all (2);
+  }
+)
+
+(define_insn "*aarch64_sve_punpk<perm_hilo>_acle"
+  [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+	(and:VNx16BI
+	  (subreg:VNx16BI
+	    (unspec:VNx8BI
+	      [(match_operand:VNx16BI 1 "register_operand" "Upa")]
+	      UNPACK_UNSIGNED)
+	    0)
+	  (match_operand:VNx8BI 2 "aarch64_ptrue_all_operand")))]
+  "TARGET_SVE"
+  "punpk<perm_hilo>\t%0.h, %1.b"
+)
+
 ;; =========================================================================
 ;; == Vector partitioning
 ;; =========================================================================
@@ -10261,14 +11176,49 @@
 ;; -------------------------------------------------------------------------
 
 (define_insn "@aarch64_sve_<sve_pred_op><mode>"
-  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
-	(unspec:PRED_ALL
-	  [(match_operand:PRED_ALL 1 "register_operand" "Upa")
+  [(set (match_operand:VNx16BI_ONLY 0 "register_operand" "=Upa")
+	(unspec:VNx16BI_ONLY
+	  [(match_operand:VNx16BI_ONLY 1 "register_operand" "Upa")
 	   (match_operand:SI 2 "aarch64_sve_ptrue_flag")
-	   (match_operand:PRED_ALL 3 "register_operand" "0")]
+	   (match_operand:VNx16BI_ONLY 3 "register_operand" "0")]
 	  SVE_PITER))
    (clobber (reg:CC_NZC CC_REGNUM))]
-  "TARGET_SVE && <max_elem_bits> >= <elem_bits>"
+  "TARGET_SVE"
+  "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
+)
+
+(define_expand "@aarch64_sve_<sve_pred_op><mode>"
+  [(parallel
+     [(set (match_operand:VNx16BI 0 "register_operand")
+	   (and:VNx16BI
+	     (subreg:VNx16BI
+	       (unspec:PRED_HSD
+		 [(match_operand:PRED_HSD 1 "register_operand")
+		  (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+		  (match_operand:PRED_HSD 3 "register_operand")]
+		 PNEXT_ONLY)
+	       0)
+	     (match_dup 4)))
+      (clobber (reg:CC_NZC CC_REGNUM))])]
+  "TARGET_SVE"
+  {
+    operands[4] = aarch64_ptrue_all (<data_bytes>);
+  }
+)
+
+(define_insn "*aarch64_sve_<sve_pred_op><mode>"
+  [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+	(and:VNx16BI
+	  (subreg:VNx16BI
+	    (unspec:PRED_HSD
+	      [(match_operand:PRED_HSD 1 "register_operand" "Upa")
+	       (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+	       (match_operand:PRED_HSD 3 "register_operand" "0")]
+	      PNEXT_ONLY)
+	    0)
+	  (match_operand:PRED_HSD 4 "aarch64_ptrue_all_operand")))
+   (clobber (reg:CC_NZC CC_REGNUM))]
+  "TARGET_SVE"
   "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
 )
 
@@ -10302,6 +11252,38 @@
   }
 )
 
+(define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_cc"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
+	   (match_operand 2)
+	   (match_operand:SI 3 "aarch64_sve_ptrue_flag")
+	   (unspec:PRED_HSD
+	     [(match_operand 4)
+	      (match_operand:SI 5 "aarch64_sve_ptrue_flag")
+	      (match_operand:PRED_HSD 6 "register_operand" "0")]
+	     PNEXT_ONLY)]
+	  UNSPEC_PTEST))
+   (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+	(and:VNx16BI
+	  (subreg:VNx16BI
+	    (unspec:PRED_HSD
+	      [(match_dup 4)
+	       (match_dup 5)
+	       (match_dup 6)]
+	      PNEXT_ONLY)
+	    0)
+	  (match_operand:PRED_HSD 7 "aarch64_ptrue_all_operand")))]
+  "TARGET_SVE
+   && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])"
+  "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
+  "&& !rtx_equal_p (operands[2], operands[4])"
+  {
+    operands[4] = operands[2];
+    operands[5] = operands[3];
+  }
+)
+
 ;; Same, but with only the flags result being interesting.
 (define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_ptest"
   [(set (reg:CC_NZC CC_REGNUM)
@@ -11163,16 +12145,12 @@
 
 (define_insn "@aarch64_sve_set_neonq_<mode>"
   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
-      (unspec:SVE_FULL
-	[(match_operand:SVE_FULL 1 "register_operand" "w")
-	(match_operand:<V128> 2 "register_operand" "w")
-	(match_operand:<VPRED> 3 "register_operand" "Upl")]
-	UNSPEC_SET_NEONQ))]
+	(unspec:SVE_FULL
+	  [(match_operand:SVE_FULL 1 "register_operand" "w")
+	   (match_operand:<V128> 2 "register_operand" "w")
+	   (match_operand:<VPRED> 3 "register_operand" "Upl")]
+	  UNSPEC_SET_NEONQ))]
   "TARGET_SVE
    && BYTES_BIG_ENDIAN"
-  {
-    operands[2] = lowpart_subreg (<MODE>mode, operands[2],
-                                  GET_MODE (operands[2]));
-    return "sel\t%0.<Vetype>, %3, %2.<Vetype>, %1.<Vetype>";
-  }
+  "sel\t%0.<Vetype>, %3, %Z2.<Vetype>, %1.<Vetype>"
 )
diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
index 3e08e09..a3cbbce 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -334,12 +334,21 @@
 ;; - LD1Q (SVE2p1)
 ;; -------------------------------------------------------------------------
 
-;; Model this as operating on the largest valid element size, which is DI.
-;; This avoids having to define move patterns & more for VNx1TI, which would
-;; be difficult without a non-gather form of LD1Q.
-(define_insn "aarch64_gather_ld1q"
-  [(set (match_operand:VNx2DI 0 "register_operand")
-	(unspec:VNx2DI
+;; For little-endian targets, it would be enough to use a single pattern,
+;; with a subreg to bitcast the result to whatever mode is needed.
+;; However, on big-endian targets, the bitcast would need to be an
+;; aarch64_sve_reinterpret instruction.  That would interact badly
+;; with the "&" and "?" constraints in this pattern: if the result
+;; of the reinterpret needs to be in the same register as the index,
+;; the RA would tend to prefer to allocate a separate register for the
+;; intermediate (uncast) result, even if the reinterpret prefers tying.
+;;
+;; The index is logically VNx1DI rather than VNx2DI, but introducing
+;; and using VNx1DI would just create more bitcasting.  The ACLE intrinsic
+;; uses svuint64_t, which corresponds to VNx2DI.
+(define_insn "@aarch64_gather_ld1q<mode>"
+  [(set (match_operand:SVE_FULL 0 "register_operand")
+	(unspec:SVE_FULL
 	  [(match_operand:VNx2BI 1 "register_operand")
 	   (match_operand:DI 2 "aarch64_reg_or_zero")
 	   (match_operand:VNx2DI 3 "register_operand")
@@ -784,8 +793,8 @@
 	    (match_operand:SVE_FULL_I 2 "register_operand"))
 	  (match_operand:SVE_FULL_I 3 "register_operand")))]
   "TARGET_SVE2p1_OR_SME"
-  {@ [cons: =0,  1, 2, 3; attrs: movprfx]
-     [       w, %0, w, w; *             ] <su>clamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
+  {@ [cons: =0, %1, 2, 3; attrs: movprfx]
+     [       w,  0, w, w; *             ] <su>clamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
      [     ?&w,  w, w, w; yes           ] movprfx\t%0, %1\;<su>clamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
   }
 )
@@ -804,8 +813,8 @@
 	     (match_operand:SVE_FULL_I 3 "register_operand"))]
 	  UNSPEC_PRED_X))]
   "TARGET_SVE2p1_OR_SME"
-  {@ [cons: =0,  1, 2, 3; attrs: movprfx]
-     [       w, %0, w, w; *             ] #
+  {@ [cons: =0, %1, 2, 3; attrs: movprfx]
+     [       w,  0, w, w; *             ] #
      [     ?&w,  w, w, w; yes           ] #
   }
   "&& true"
@@ -1337,12 +1346,12 @@
 
 ;; Predicated B16B16 binary operations.
 (define_insn "@aarch64_pred_<optab><mode>"
-  [(set (match_operand:VNx8BF_ONLY 0 "register_operand")
-	(unspec:VNx8BF_ONLY
-	  [(match_operand:<VPRED> 1 "register_operand")
+  [(set (match_operand:SVE_BF 0 "register_operand")
+	(unspec:SVE_BF
+	  [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
 	   (match_operand:SI 4 "aarch64_sve_gp_strictness")
-	   (match_operand:VNx8BF_ONLY 2 "register_operand")
-	   (match_operand:VNx8BF_ONLY 3 "register_operand")]
+	   (match_operand:SVE_BF 2 "register_operand")
+	   (match_operand:SVE_BF 3 "register_operand")]
 	  SVE_COND_FP_BINARY_OPTAB))]
   "TARGET_SSVE_B16B16 && <supports_bf16>"
   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx , is_rev ]
@@ -1373,8 +1382,8 @@
 	   (match_operand:SVE_CLAMP_F 3 "register_operand")]
 	  UNSPEC_FMINNM))]
   ""
-  {@ [cons: =0,  1, 2, 3; attrs: movprfx]
-     [       w, %0, w, w; *             ] <b>fclamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
+  {@ [cons: =0, %1, 2, 3; attrs: movprfx]
+     [       w,  0, w, w; *             ] <b>fclamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
      [     ?&w,  w, w, w; yes           ] movprfx\t%0, %1\;<b>fclamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
   }
 )
@@ -1393,8 +1402,8 @@
 	   (match_operand:SVE_CLAMP_F 3 "register_operand")]
 	  UNSPEC_COND_FMINNM))]
   ""
-  {@ [cons: =0,  1, 2, 3; attrs: movprfx]
-     [       w, %0, w, w; *             ] #
+  {@ [cons: =0, %1, 2, 3; attrs: movprfx]
+     [       w,  0, w, w; *             ] #
      [     ?&w,  w, w, w; yes           ] #
   }
   "&& true"
@@ -1626,9 +1635,9 @@
 	       (match_operand:SVE_FULL_I 2 "register_operand")))]
 	  UNSPEC_PRED_X))]
   "TARGET_SVE2"
-  {@ [ cons: =0 , 1  , 2 ; attrs: movprfx ]
-     [ w        , %0 , w ; *              ] nbsl\t%0.d, %0.d, %2.d, %0.d
-     [ ?&w      , w  , w ; yes            ] movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %0.d
+  {@ [ cons: =0 , %1 , 2 ; attrs: movprfx ]
+     [ w        , 0  , w ; *              ] nbsl\t%0.d, %0.d, %2.d, %0.d
+     [ ?&w      , w  , w ; yes            ] movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %1.d
   }
   "&& !CONSTANT_P (operands[3])"
   {
@@ -1636,6 +1645,20 @@
   }
 )
 
+(define_insn "*aarch64_sve2_unpred_nor<mode>"
+  [(set (match_operand:VDQ_I 0 "register_operand")
+	(and:VDQ_I
+	  (not:VDQ_I
+	    (match_operand:VDQ_I 1 "register_operand"))
+	  (not:VDQ_I
+	    (match_operand:VDQ_I 2 "register_operand"))))]
+  "TARGET_SVE2"
+  {@ [ cons: =0 , %1 , 2 ; attrs: movprfx ]
+     [ w        , 0  , w ; *              ] nbsl\t%Z0.d, %Z0.d, %Z2.d, %Z0.d
+     [ ?&w      , w  , w ; yes            ] movprfx\t%Z0, %Z1\;nbsl\t%Z0.d, %Z0.d, %Z2.d, %Z1.d
+  }
+)
+
 ;; Use NBSL for vector NAND.
 (define_insn_and_rewrite "*aarch64_sve2_nand<mode>"
   [(set (match_operand:SVE_FULL_I 0 "register_operand")
@@ -1648,8 +1671,8 @@
 	       (match_operand:SVE_FULL_I 2 "register_operand")))]
 	  UNSPEC_PRED_X))]
   "TARGET_SVE2"
-  {@ [ cons: =0 , 1  , 2 ; attrs: movprfx ]
-     [ w        , %0 , w ; *              ] nbsl\t%0.d, %0.d, %2.d, %2.d
+  {@ [ cons: =0 , %1 , 2 ; attrs: movprfx ]
+     [ w        , 0  , w ; *              ] nbsl\t%0.d, %0.d, %2.d, %2.d
      [ ?&w      , w  , w ; yes            ] movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %2.d
   }
   "&& !CONSTANT_P (operands[3])"
@@ -1658,6 +1681,21 @@
   }
 )
 
+;; Same as above but unpredicated and including Advanced SIMD modes.
+(define_insn "*aarch64_sve2_nand_unpred<mode>"
+  [(set (match_operand:VDQ_I 0 "register_operand")
+	(ior:VDQ_I
+	  (not:VDQ_I
+	    (match_operand:VDQ_I 1 "register_operand"))
+	  (not:VDQ_I
+	    (match_operand:VDQ_I 2 "register_operand"))))]
+  "TARGET_SVE2"
+  {@ [ cons: =0 , %1 , 2 ; attrs: movprfx ]
+     [ w        , 0  , w ; *              ] nbsl\t%Z0.d, %Z0.d, %Z2.d, %Z2.d
+     [ ?&w      , w  , w ; yes            ] movprfx\t%Z0, %Z1\;nbsl\t%Z0.d, %Z0.d, %Z2.d, %Z2.d
+  }
+)
+
 ;; Unpredicated bitwise select.
 ;; (op3 ? bsl_mov : bsl_dup) == (((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)
 (define_expand "@aarch64_sve2_bsl<mode>"
@@ -1918,6 +1956,40 @@
   }
 )
 
+;; Vector EON (~(x, y)) using BSL2N.
+(define_insn_and_rewrite "*aarch64_sve2_bsl2n_eon<mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand")
+	(unspec:SVE_FULL_I
+	  [(match_operand 3)
+	   (not:SVE_FULL_I
+	     (xor:SVE_FULL_I
+		(match_operand:SVE_FULL_I 1 "register_operand")
+		(match_operand:SVE_FULL_I 2 "register_operand")))]
+	    UNSPEC_PRED_X))]
+  "TARGET_SVE2"
+  {@ [ cons: =0, 1, 2 ; attrs: movprfx ]
+     [ w  ,      0, w ; *              ] bsl2n\t%0.d, %0.d, %0.d, %2.d
+     [ ?&w,      w, w ; yes            ] movprfx\t%0, %1\;bsl2n\t%0.d, %0.d, %1.d, %2.d
+  }
+  "&& !CONSTANT_P (operands[3])"
+  {
+    operands[3] = CONSTM1_RTX (<VPRED>mode);
+  }
+)
+
+(define_insn "*aarch64_sve2_eon_bsl2n_unpred<mode>"
+  [(set (match_operand:VDQ_I 0 "register_operand")
+       (not:VDQ_I
+         (xor:VDQ_I
+           (match_operand:VDQ_I 1 "register_operand")
+           (match_operand:VDQ_I 2 "register_operand"))))]
+  "TARGET_SVE2"
+  {@ [ cons: =0, 1, 2 ; attrs: movprfx ]
+     [ w  ,      0, w ; *              ] bsl2n\t%Z0.d, %Z0.d, %Z0.d, %Z2.d
+     [ ?&w,      w, w ; yes            ] movprfx\t%Z0, %Z1\;bsl2n\t%Z0.d, %Z0.d, %Z1.d, %Z2.d
+  }
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [INT] Shift-and-accumulate operations
 ;; -------------------------------------------------------------------------
@@ -1932,40 +2004,27 @@
 (define_expand "@aarch64_sve_add_<sve_int_op><mode>"
   [(set (match_operand:SVE_FULL_I 0 "register_operand")
 	(plus:SVE_FULL_I
-	  (unspec:SVE_FULL_I
-	    [(match_dup 4)
-	     (SHIFTRT:SVE_FULL_I
-	       (match_operand:SVE_FULL_I 2 "register_operand")
-	       (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))]
-	    UNSPEC_PRED_X)
-	 (match_operand:SVE_FULL_I 1 "register_operand")))]
+	  (SHIFTRT:SVE_FULL_I
+	    (match_operand:SVE_FULL_I 2 "register_operand")
+	    (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))
+	  (match_operand:SVE_FULL_I 1 "register_operand")))]
   "TARGET_SVE2"
-  {
-    operands[4] = CONSTM1_RTX (<VPRED>mode);
-  }
 )
 
 ;; Pattern-match SSRA and USRA as a predicated operation whose predicate
 ;; isn't needed.
-(define_insn_and_rewrite "*aarch64_sve2_sra<mode>"
+(define_insn "*aarch64_sve2_sra<mode>"
   [(set (match_operand:SVE_FULL_I 0 "register_operand")
 	(plus:SVE_FULL_I
-	  (unspec:SVE_FULL_I
-	    [(match_operand 4)
-	     (SHIFTRT:SVE_FULL_I
-	       (match_operand:SVE_FULL_I 2 "register_operand")
-	       (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))]
-	    UNSPEC_PRED_X)
+	  (SHIFTRT:SVE_FULL_I
+	    (match_operand:SVE_FULL_I 2 "register_operand")
+	    (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))
 	 (match_operand:SVE_FULL_I 1 "register_operand")))]
   "TARGET_SVE2"
   {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
      [ w        , 0 , w ; *              ] <sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3
      [ ?&w      , w , w ; yes            ] movprfx\t%0, %1\;<sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3
   }
-  "&& !CONSTANT_P (operands[4])"
-  {
-    operands[4] = CONSTM1_RTX (<VPRED>mode);
-  }
 )
 
 ;; SRSRA and URSRA.
@@ -2152,14 +2211,14 @@
 ;; - FDOT (2-way, indexed) (FP8DOT2)
 ;; -------------------------------------------------------------------------
 (define_insn "@aarch64_sve_dot<mode>"
-  [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
-	(unspec:SVE_FULL_HSF
-	  [(match_operand:SVE_FULL_HSF 1 "register_operand")
+  [(set (match_operand:SVE_FULL_HSF_FP8_FDOT 0 "register_operand")
+	(unspec:SVE_FULL_HSF_FP8_FDOT
+	  [(match_operand:SVE_FULL_HSF_FP8_FDOT 1 "register_operand")
 	   (match_operand:VNx16QI 2 "register_operand")
 	   (match_operand:VNx16QI 3 "register_operand")
 	   (reg:DI FPM_REGNUM)]
 	  UNSPEC_DOT_FP8))]
-  "TARGET_SSVE_FP8DOT4 && !(<MODE>mode == VNx8HFmode && !TARGET_SSVE_FP8DOT2)"
+  ""
   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
      [ w        , 0 , w , w ; *              ] fdot\t%0.<Vetype>, %2.b, %3.b
      [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %1\;fdot\t%0.<Vetype>, %2.b, %3.b
@@ -2167,15 +2226,15 @@
 )
 
 (define_insn "@aarch64_sve_dot_lane<mode>"
-  [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
-	(unspec:SVE_FULL_HSF
-	  [(match_operand:SVE_FULL_HSF 1 "register_operand")
+  [(set (match_operand:SVE_FULL_HSF_FP8_FDOT 0 "register_operand")
+	(unspec:SVE_FULL_HSF_FP8_FDOT
+	  [(match_operand:SVE_FULL_HSF_FP8_FDOT 1 "register_operand")
 	   (match_operand:VNx16QI 2 "register_operand")
 	   (match_operand:VNx16QI 3 "register_operand")
 	   (match_operand:SI 4 "const_int_operand")
 	   (reg:DI FPM_REGNUM)]
 	  UNSPEC_DOT_LANE_FP8))]
-  "TARGET_SSVE_FP8DOT4 && !(<MODE>mode == VNx8HFmode && !TARGET_SSVE_FP8DOT2)"
+  ""
   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
      [ w        , 0 , w , y ; *              ] fdot\t%0.<Vetype>, %2.b, %3.b[%4]
      [ ?&w      , w , w , y ; yes            ] movprfx\t%0, %1\;fdot\t%0.<Vetype>, %2.b, %3.b[%4]
@@ -2715,17 +2774,14 @@
 ;; Optimize ((a + b) >> n) where n is half the bitsize of the vector
 (define_insn "*bitmask_shift_plus<mode>"
   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
-	(unspec:SVE_FULL_HSDI
-	   [(match_operand:<VPRED> 1)
-	    (lshiftrt:SVE_FULL_HSDI
-	      (plus:SVE_FULL_HSDI
-		(match_operand:SVE_FULL_HSDI 2 "register_operand" "w")
-		(match_operand:SVE_FULL_HSDI 3 "register_operand" "w"))
-	      (match_operand:SVE_FULL_HSDI 4
-		 "aarch64_simd_shift_imm_vec_exact_top" ""))]
-          UNSPEC_PRED_X))]
+	(lshiftrt:SVE_FULL_HSDI
+	  (plus:SVE_FULL_HSDI
+	    (match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
+	    (match_operand:SVE_FULL_HSDI 2 "register_operand" "w"))
+	  (match_operand:SVE_FULL_HSDI 3
+	    "aarch64_simd_shift_imm_vec_exact_top" "")))]
   "TARGET_SVE2"
-  "addhnb\t%0.<Ventype>, %2.<Vetype>, %3.<Vetype>"
+  "addhnb\t%0.<Ventype>, %1.<Vetype>, %2.<Vetype>"
 )
 
 ;; -------------------------------------------------------------------------
@@ -2951,8 +3007,8 @@
 	     UNSPEC_COND_FABS)]
 	  SVE_COND_SMAXMIN))]
   "TARGET_FAMINMAX && TARGET_SVE2_OR_SME2"
-  {@ [ cons: =0 , 1   , 2  , 3 ; attrs: movprfx ]
-     [ w        , Upl , %0 , w ; *              ] <faminmax_cond_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+  {@ [ cons: =0 , 1   , %2 , 3 ; attrs: movprfx ]
+     [ w        , Upl , 0  , w ; *              ] <faminmax_cond_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
      [ ?&w      , Upl , w  , w ; yes            ] movprfx\t%0, %2\;<faminmax_cond_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
   }
   "&& (!rtx_equal_p (operands[1], operands[5])
@@ -4012,8 +4068,8 @@
 	  [(match_operand:<VPRED> 1 "register_operand")
 	   (match_operand:SI 2 "aarch64_sve_ptrue_flag")
 	   (unspec:<VPRED>
-	     [(match_operand:SVE_FULL_BHI 3 "register_operand")
-	      (match_operand:SVE_FULL_BHI 4 "register_operand")]
+	     [(match_operand:VNx16QI_ONLY 3 "register_operand")
+	      (match_operand:VNx16QI_ONLY 4 "register_operand")]
 	     SVE2_MATCH)]
 	  UNSPEC_PRED_Z))
    (clobber (reg:CC_NZC CC_REGNUM))]
@@ -4025,6 +4081,51 @@
   }
 )
 
+(define_expand "@aarch64_pred_<sve_int_op><mode>"
+  [(parallel
+     [(set (match_operand:VNx16BI 0 "register_operand")
+	   (and:VNx16BI
+	     (subreg:VNx16BI
+	       (unspec:<VPRED>
+		 [(match_operand:<VPRED> 1 "register_operand")
+		  (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+		  (unspec:<VPRED>
+		    [(match_operand:VNx8HI_ONLY 3 "register_operand")
+		     (match_operand:VNx8HI_ONLY 4 "register_operand")]
+		    SVE2_MATCH)]
+		 UNSPEC_PRED_Z)
+	       0)
+	     (match_dup 5)))
+      (clobber (reg:CC_NZC CC_REGNUM))])]
+  "TARGET_SVE2 && TARGET_NON_STREAMING"
+  {
+    operands[5] = aarch64_ptrue_all (GET_MODE_UNIT_SIZE (<MODE>mode));
+  }
+)
+
+(define_insn "*aarch64_pred_<sve_int_op><mode>"
+  [(set (match_operand:VNx16BI 0 "register_operand")
+	(and:VNx16BI
+	  (subreg:VNx16BI
+	    (unspec:<VPRED>
+	      [(match_operand:<VPRED> 1 "register_operand")
+	       (match_operand:SI 2 "aarch64_sve_ptrue_flag")
+	       (unspec:<VPRED>
+		 [(match_operand:VNx8HI_ONLY 3 "register_operand")
+		  (match_operand:VNx8HI_ONLY 4 "register_operand")]
+		 SVE2_MATCH)]
+	      UNSPEC_PRED_Z)
+	    0)
+	  (match_operand:<VPRED> 5 "aarch64_ptrue_all_operand")))
+   (clobber (reg:CC_NZC CC_REGNUM))]
+  "TARGET_SVE2 && TARGET_NON_STREAMING"
+  {@ [ cons: =0, 1  , 3, 4; attrs: pred_clobber ]
+     [ &Upa    , Upl, w, w; yes                 ] <sve_int_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>
+     [ ?Upl    , 0  , w, w; yes                 ] ^
+     [ Upa     , Upl, w, w; no                  ] ^
+  }
+)
+
 ;; Predicated string matching in which both the flag and predicate results
 ;; are interesting.
 (define_insn_and_rewrite "*aarch64_pred_<sve_int_op><mode>_cc"
@@ -4062,6 +4163,45 @@
   }
 )
 
+(define_insn_and_rewrite "*aarch64_pred_<sve_int_op><mode>_cc"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand:VNx16BI 1 "register_operand" "Upl")
+	   (match_operand 4)
+	   (match_operand:SI 5 "aarch64_sve_ptrue_flag")
+	   (unspec:<VPRED>
+	     [(match_operand 6)
+	      (match_operand:SI 7 "aarch64_sve_ptrue_flag")
+	      (unspec:<VPRED>
+		[(match_operand:VNx8HI_ONLY 2 "register_operand" "w")
+		 (match_operand:VNx8HI_ONLY 3 "register_operand" "w")]
+		SVE2_MATCH)]
+	     UNSPEC_PRED_Z)]
+	  UNSPEC_PTEST))
+   (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+	(and:VNx16BI
+	  (subreg:VNx16BI
+	    (unspec:<VPRED>
+	      [(match_dup 6)
+	       (match_dup 7)
+	       (unspec:<VPRED>
+		 [(match_dup 2)
+		  (match_dup 3)]
+		 SVE2_MATCH)]
+	      UNSPEC_PRED_Z)
+	    0)
+	  (match_operand:<VPRED> 8 "aarch64_ptrue_all_operand")))]
+  "TARGET_SVE2
+   && TARGET_NON_STREAMING
+   && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
+  "<sve_int_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+  "&& !rtx_equal_p (operands[4], operands[6])"
+  {
+    operands[6] = copy_rtx (operands[4]);
+    operands[7] = operands[5];
+  }
+)
+
 ;; Predicated string matching in which only the flags result is interesting.
 (define_insn_and_rewrite "*aarch64_pred_<sve_int_op><mode>_ptest"
   [(set (reg:CC_NZC CC_REGNUM)
diff --git a/gcc/config/aarch64/aarch64-sys-regs.def b/gcc/config/aarch64/aarch64-sys-regs.def
index 39e6c5c..d7ef6da 100644
--- a/gcc/config/aarch64/aarch64-sys-regs.def
+++ b/gcc/config/aarch64/aarch64-sys-regs.def
@@ -572,12 +572,12 @@
   SYSREG ("mdrar_el1",		CPENC (2,0,1,0,0),	F_REG_READ,		AARCH64_NO_FEATURES)
   SYSREG ("mdscr_el1",		CPENC (2,0,0,2,2),	0,			AARCH64_NO_FEATURES)
   SYSREG ("mdselr_el1",		CPENC (2,0,0,4,2),	F_ARCHEXT,		AARCH64_FEATURE (DEBUGv8p9))
-  SYSREG ("mecid_a0_el2",	CPENC (3,4,10,8,1),	0,			AARCH64_NO_FEATURES)
-  SYSREG ("mecid_a1_el2",	CPENC (3,4,10,8,3),	0,			AARCH64_NO_FEATURES)
-  SYSREG ("mecid_p0_el2",	CPENC (3,4,10,8,0),	0,			AARCH64_NO_FEATURES)
-  SYSREG ("mecid_p1_el2",	CPENC (3,4,10,8,2),	0,			AARCH64_NO_FEATURES)
-  SYSREG ("mecid_rl_a_el3",	CPENC (3,6,10,10,1),	0,			AARCH64_NO_FEATURES)
-  SYSREG ("mecidr_el2",		CPENC (3,4,10,8,7),	F_REG_READ,		AARCH64_NO_FEATURES)
+  SYSREG ("mecid_a0_el2",	CPENC (3,4,10,8,1),	F_ARCHEXT,		AARCH64_FEATURE (V8_7A))
+  SYSREG ("mecid_a1_el2",	CPENC (3,4,10,8,3),	F_ARCHEXT,		AARCH64_FEATURE (V8_7A))
+  SYSREG ("mecid_p0_el2",	CPENC (3,4,10,8,0),	F_ARCHEXT,		AARCH64_FEATURE (V8_7A))
+  SYSREG ("mecid_p1_el2",	CPENC (3,4,10,8,2),	F_ARCHEXT,		AARCH64_FEATURE (V8_7A))
+  SYSREG ("mecid_rl_a_el3",	CPENC (3,6,10,10,1),	F_ARCHEXT,		AARCH64_FEATURE (V8_7A))
+  SYSREG ("mecidr_el2",		CPENC (3,4,10,8,7),	F_REG_READ|F_ARCHEXT,	AARCH64_FEATURE (V8_7A))
   SYSREG ("mfar_el3",		CPENC (3,6,6,0,5),	0,			AARCH64_NO_FEATURES)
   SYSREG ("midr_el1",		CPENC (3,0,0,0,0),	F_REG_READ,		AARCH64_NO_FEATURES)
   SYSREG ("mpam0_el1",		CPENC (3,0,10,5,1),	0,			AARCH64_NO_FEATURES)
@@ -1145,8 +1145,8 @@
   SYSREG ("vbar_el2",		CPENC (3,4,12,0,0),	0,			AARCH64_NO_FEATURES)
   SYSREG ("vbar_el3",		CPENC (3,6,12,0,0),	0,			AARCH64_NO_FEATURES)
   SYSREG ("vdisr_el2",		CPENC (3,4,12,1,1),	F_ARCHEXT,		AARCH64_FEATURE (RAS))
-  SYSREG ("vmecid_a_el2",	CPENC (3,4,10,9,1),	0,			AARCH64_NO_FEATURES)
-  SYSREG ("vmecid_p_el2",	CPENC (3,4,10,9,0),	0,			AARCH64_NO_FEATURES)
+  SYSREG ("vmecid_a_el2",	CPENC (3,4,10,9,1),	F_ARCHEXT,		AARCH64_FEATURE (V8_7A))
+  SYSREG ("vmecid_p_el2",	CPENC (3,4,10,9,0),	F_ARCHEXT,		AARCH64_FEATURE (V8_7A))
   SYSREG ("vmpidr_el2",		CPENC (3,4,0,0,5),	0,			AARCH64_NO_FEATURES)
   SYSREG ("vncr_el2",		CPENC (3,4,2,2,0),	F_ARCHEXT,		AARCH64_FEATURE (V8_4A))
   SYSREG ("vpidr_el2",		CPENC (3,4,0,0,0),	0,			AARCH64_NO_FEATURES)
diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
index 982074c..40ff147 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from aarch64-cores.def
 (define_attr "tune"
-	"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88,thunderxt88p1,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,fujitsu_monaka,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,oryon1,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexr82ae,applea12,applem1_0,applem1_1,applem1_2,applem1_3,applem2_0,applem2_1,applem2_2,applem2_3,applem3_0,cortexa510,cortexa520,cortexa520ae,cortexa710,cortexa715,cortexa720,cortexa720ae,cortexa725,cortexx2,cortexx3,cortexx4,cortexx925,neoversen2,cobalt100,neoversen3,neoversev2,grace,neoversev3,neoversev3ae,demeter,olympus,generic,generic_armv8_a,generic_armv9_a"
+	"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88,thunderxt88p1,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,fujitsu_monaka,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,oryon1,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexr82ae,applea12,applem1_0,applem1_1,applem1_2,applem1_3,applem2_0,applem2_1,applem2_2,applem2_3,applem3_0,cortexa510,cortexa520,cortexa520ae,cortexa710,cortexa715,cortexa720,cortexa720ae,cortexa725,cortexx2,cortexx3,cortexx4,cortexx925,neoversen2,cobalt100,neoversen3,neoversev2,grace,neoversev3,neoversev3ae,demeter,olympus,gb10,generic,generic_armv8_a,generic_armv9_a"
 	(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def b/gcc/config/aarch64/aarch64-tuning-flags.def
index f2c916e..dd91324 100644
--- a/gcc/config/aarch64/aarch64-tuning-flags.def
+++ b/gcc/config/aarch64/aarch64-tuning-flags.def
@@ -44,6 +44,8 @@ AARCH64_EXTRA_TUNING_OPTION ("avoid_cross_loop_fma", AVOID_CROSS_LOOP_FMA)
 
 AARCH64_EXTRA_TUNING_OPTION ("fully_pipelined_fma", FULLY_PIPELINED_FMA)
 
+AARCH64_EXTRA_TUNING_OPTION ("avoid_ldapur", AVOID_LDAPUR)
+
 /* Enable is the target prefers to use a fresh register for predicate outputs
    rather than re-use an input predicate register.  */
 AARCH64_EXTRA_TUNING_OPTION ("avoid_pred_rmw", AVOID_PRED_RMW)
diff --git a/gcc/config/aarch64/aarch64-vxworks.h b/gcc/config/aarch64/aarch64-vxworks.h
index 41adada..7b4da93 100644
--- a/gcc/config/aarch64/aarch64-vxworks.h
+++ b/gcc/config/aarch64/aarch64-vxworks.h
@@ -66,9 +66,8 @@ along with GCC; see the file COPYING3.  If not see
 #define VXWORKS_PERSONALITY "llvm"
 
 /* VxWorks uses R18 as a TCB pointer.  We must pick something else as
-   the static chain and R18 needs to be claimed "fixed".  Until we
-   arrange to override the common parts of the port family to
-   acknowledge the latter, configure --with-specs="-ffixed-r18".  */
+   the static chain and R18 needs to be claimed "fixed" (TARGET_OS_USES_R18
+   does that in aarch64_conditional_register_usage).  */
 #undef  STATIC_CHAIN_REGNUM
 #define STATIC_CHAIN_REGNUM 9
-
+#define TARGET_OS_USES_R18
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 433ec97..ef9c165 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -83,6 +83,7 @@
 #include "rtlanal.h"
 #include "tree-dfa.h"
 #include "asan.h"
+#include "aarch64-elf-metadata.h"
 #include "aarch64-feature-deps.h"
 #include "config/arm/aarch-common.h"
 #include "config/arm/aarch-common-protos.h"
@@ -108,6 +109,10 @@
    and 1 MOVI/DUP (same size as a call).  */
 #define MAX_SET_SIZE(speed) (speed ? 256 : 96)
 
+#ifndef HAVE_AS_AEABI_BUILD_ATTRIBUTES
+#define HAVE_AS_AEABI_BUILD_ATTRIBUTES 0
+#endif
+
 /* Flags that describe how a function shares certain architectural state
    with its callers.
 
@@ -351,7 +356,8 @@ static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
 static bool aarch64_builtin_support_vector_misalignment (machine_mode mode,
 							 const_tree type,
 							 int misalignment,
-							 bool is_packed);
+							 bool is_packed,
+							 bool is_gather_scatter);
 static machine_mode aarch64_simd_container_mode (scalar_mode, poly_int64);
 static bool aarch64_print_address_internal (FILE*, machine_mode, rtx,
 					    aarch64_addr_query_type);
@@ -424,6 +430,7 @@ static const struct aarch64_flag_desc aarch64_tuning_flags[] =
 #include "tuning_models/neoversev2.h"
 #include "tuning_models/neoversev3.h"
 #include "tuning_models/neoversev3ae.h"
+#include "tuning_models/olympus.h"
 #include "tuning_models/a64fx.h"
 #include "tuning_models/fujitsu_monaka.h"
 
@@ -954,6 +961,44 @@ svpattern_token (enum aarch64_svpattern pattern)
   gcc_unreachable ();
 }
 
+/* Return true if RHS is an operand suitable for a CB<cc> (immediate)
+   instruction.  OP_CODE determines the type of the comparison.  */
+bool
+aarch64_cb_rhs (rtx_code op_code, rtx rhs)
+{
+  if (!CONST_INT_P (rhs))
+    return REG_P (rhs);
+
+  HOST_WIDE_INT rhs_val = INTVAL (rhs);
+
+  switch (op_code)
+    {
+    case EQ:
+    case NE:
+    case LT:
+    case LTU:
+    case GE:
+    case GEU:
+      /* EQ/NE  range is 0 .. 63.
+	 LT/LTU range is 0 .. 63.
+	 GE/GEU range is 1 .. 64 => GT x - 1, but also supports 0 via XZR.
+	 So the intersection is 0 .. 63. */
+      return IN_RANGE (rhs_val, 0, 63);
+
+    case GT:
+    case GTU:
+    case LE:
+    case LEU:
+      /* GT/GTU range is  0 .. 63
+	 LE/LEU range is -1 .. 62 => LT x + 1.
+	 So the intersection is 0 .. 62. */
+      return IN_RANGE (rhs_val, 0, 62);
+
+    default:
+      return false;
+    }
+}
+
 /* Return the location of a piece that is known to be passed or returned
    in registers.  FIRST_ZR is the first unused vector argument register
    and FIRST_PR is the first unused predicate argument register.  */
@@ -2842,10 +2887,47 @@ aarch64_gen_compare_reg_maybe_ze (RTX_CODE code, rtx x, rtx y,
   return aarch64_gen_compare_reg (code, x, y);
 }
 
+/* Split IMM into two 12-bit halves, producing an EQ/NE comparison vs X.
+   TMP may be a scratch.  This optimizes a sequence from
+	mov	x0, #imm1
+	movk	x0, #imm2, lsl 16  -- x0 contains CST
+	cmp	x1, x0
+   into the shorter:
+	sub	tmp, x1, #(CST & 0xfff000)
+	subs	tmp, tmp, #(CST & 0x000fff)
+*/
+rtx
+aarch64_gen_compare_split_imm24 (rtx x, rtx imm, rtx tmp)
+{
+  HOST_WIDE_INT lo_imm = UINTVAL (imm) & 0xfff;
+  HOST_WIDE_INT hi_imm = UINTVAL (imm) & 0xfff000;
+  enum machine_mode mode = GET_MODE (x);
+
+  if (GET_CODE (tmp) == SCRATCH)
+    tmp = gen_reg_rtx (mode);
+
+  emit_insn (gen_add3_insn (tmp, x, GEN_INT (-hi_imm)));
+  /* TODO: We don't need the gpr result of the second insn. */
+  switch (mode)
+    {
+    case SImode:
+      tmp = gen_addsi3_compare0 (tmp, tmp, GEN_INT (-lo_imm));
+      break;
+    case DImode:
+      tmp = gen_adddi3_compare0 (tmp, tmp, GEN_INT (-lo_imm));
+      break;
+    default:
+      abort ();
+    }
+  emit_insn (tmp);
+
+  return gen_rtx_REG (CC_NZmode, CC_REGNUM);
+}
+
 /* Generate conditional branch to LABEL, comparing X to 0 using CODE.
    Return the jump instruction.  */
 
-static rtx
+rtx
 aarch64_gen_compare_zero_and_branch (rtx_code code, rtx x,
 				     rtx_code_label *label)
 {
@@ -2879,10 +2961,10 @@ aarch64_gen_test_and_branch (rtx_code code, rtx x, int bitnum,
       emit_insn (gen_aarch64_and3nr_compare0 (mode, x, mask));
       rtx cc_reg = gen_rtx_REG (CC_NZVmode, CC_REGNUM);
       rtx x = gen_rtx_fmt_ee (code, CC_NZVmode, cc_reg, const0_rtx);
-      return gen_condjump (x, cc_reg, label);
+      return gen_aarch64_bcond (x, cc_reg, label);
     }
-  return gen_aarch64_tb (code, mode, mode,
-			 x, gen_int_mode (bitnum, mode), label);
+  return gen_aarch64_tbz (code, mode, mode,
+			   x, gen_int_mode (bitnum, mode), label);
 }
 
 /* Consider the operation:
@@ -3201,8 +3283,7 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
 	  aarch64_emit_call_insn (gen_tlsgd_small_si (result, imm));
 	else
 	  aarch64_emit_call_insn (gen_tlsgd_small_di (result, imm));
-	insns = get_insns ();
-	end_sequence ();
+	insns = end_sequence ();
 
 	RTL_CONST_CALL_P (insns) = 1;
 	emit_libcall_block (insns, tmp_reg, result, imm);
@@ -3667,6 +3748,14 @@ aarch64_partial_ptrue_length (rtx_vector_builder &builder,
   if (builder.nelts_per_pattern () == 3)
     return 0;
 
+  /* It is conservatively correct to drop the element size to a lower value,
+     and we must do so if the predicate consists of a leading "foreground"
+     sequence that is smaller than the element size.  Without this,
+     we would test only one bit and so treat everything as either an
+     all-true or an all-false predicate.  */
+  if (builder.nelts_per_pattern () == 2)
+    elt_size = MIN (elt_size, builder.npatterns ());
+
   /* Skip over leading set bits.  */
   unsigned int nelts = builder.encoded_nelts ();
   unsigned int i = 0;
@@ -3698,6 +3787,24 @@ aarch64_partial_ptrue_length (rtx_vector_builder &builder,
   return vl;
 }
 
+/* Return:
+
+   * -1 if all bits of PRED are set
+   * N if PRED has N leading set bits followed by all clear bits
+   * 0 if PRED does not have any of these forms.  */
+
+int
+aarch64_partial_ptrue_length (rtx pred)
+{
+  rtx_vector_builder builder;
+  if (!aarch64_get_sve_pred_bits (builder, pred))
+    return 0;
+
+  auto elt_size = vector_element_size (GET_MODE_BITSIZE (GET_MODE (pred)),
+				       GET_MODE_NUNITS (GET_MODE (pred)));
+  return aarch64_partial_ptrue_length (builder, elt_size);
+}
+
 /* See if there is an svpattern that encodes an SVE predicate of mode
    PRED_MODE in which the first VL bits are set and the rest are clear.
    Return the pattern if so, otherwise return AARCH64_NUM_SVPATTERNS.
@@ -3830,18 +3937,91 @@ aarch64_sve_same_pred_for_ptest_p (rtx *pred1, rtx *pred2)
   return (ptrue1_p && ptrue2_p) || rtx_equal_p (pred1[0], pred2[0]);
 }
 
+
+/* Generate a predicate to control partial SVE mode DATA_MODE as if it
+   were fully packed, enabling the defined elements only.  */
+rtx
+aarch64_sve_packed_pred (machine_mode data_mode)
+{
+  unsigned int container_bytes
+    = aarch64_sve_container_bits (data_mode) / BITS_PER_UNIT;
+  /* Enable the significand of each container only.  */
+  rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all (container_bytes));
+  /* Predicate at the element size.  */
+  machine_mode pmode
+    = aarch64_sve_pred_mode (GET_MODE_UNIT_SIZE (data_mode)).require ();
+  return gen_lowpart (pmode, ptrue);
+}
+
+/* Generate a predicate and strictness value to govern a floating-point
+   operation with SVE mode DATA_MODE.
+
+   If DATA_MODE is a partial vector mode, this pair prevents the operation
+   from interpreting undefined elements - unless we don't need to suppress
+   their trapping behavior.  */
+rtx
+aarch64_sve_fp_pred (machine_mode data_mode, rtx *strictness)
+{
+   unsigned int vec_flags = aarch64_classify_vector_mode (data_mode);
+   if (flag_trapping_math && (vec_flags & VEC_PARTIAL))
+     {
+       if (strictness)
+	 *strictness = gen_int_mode (SVE_STRICT_GP, SImode);
+       return aarch64_sve_packed_pred (data_mode);
+     }
+   if (strictness)
+     *strictness = gen_int_mode (SVE_RELAXED_GP, SImode);
+   /* Use the VPRED mode.  */
+   return aarch64_ptrue_reg (aarch64_sve_pred_mode (data_mode));
+}
+
+/* PRED is a predicate that governs an operation on DATA_MODE.  If DATA_MODE
+   is a partial vector mode, and if exceptions must be suppressed for its
+   undefined elements, convert PRED from a container-level predicate to
+   an element-level predicate and ensure that the undefined elements
+   are inactive.  Make no changes otherwise.
+
+   Return the resultant predicate.  */
+rtx
+aarch64_sve_emit_masked_fp_pred (machine_mode data_mode, rtx pred)
+{
+  unsigned int vec_flags = aarch64_classify_vector_mode (data_mode);
+  if (flag_trapping_math && (vec_flags & VEC_PARTIAL))
+    {
+      /* Generate an element-level mask.  */
+      rtx mask = aarch64_sve_packed_pred (data_mode);
+      machine_mode pmode = GET_MODE (mask);
+
+      /* Apply the existing predicate.  */
+      rtx dst = gen_reg_rtx (pmode);
+      emit_insn (gen_and3 (pmode, dst, mask,
+			   gen_lowpart (pmode, pred)));
+      return dst;
+    }
+
+  return pred;
+}
+
 /* Emit a comparison CMP between OP0 and OP1, both of which have mode
    DATA_MODE, and return the result in a predicate of mode PRED_MODE.
-   Use TARGET as the target register if nonnull and convenient.  */
+   Use TARGET as the target register if nonnull and convenient.
+
+   PRED_MODE can be either VNx16BI or the natural predicate mode for
+   DATA_MODE.  */
 
 static rtx
 aarch64_sve_emit_int_cmp (rtx target, machine_mode pred_mode, rtx_code cmp,
 			  machine_mode data_mode, rtx op1, rtx op2)
 {
-  insn_code icode = code_for_aarch64_pred_cmp (cmp, data_mode);
+  auto src_pred_mode = aarch64_sve_pred_mode (data_mode);
+  insn_code icode;
+  if (known_eq (GET_MODE_NUNITS (pred_mode), GET_MODE_NUNITS (data_mode)))
+    icode = code_for_aarch64_pred_cmp (cmp, data_mode);
+  else
+    icode = code_for_aarch64_pred_cmp_acle (cmp, data_mode);
   expand_operand ops[5];
   create_output_operand (&ops[0], target, pred_mode);
-  create_input_operand (&ops[1], CONSTM1_RTX (pred_mode), pred_mode);
+  create_input_operand (&ops[1], CONSTM1_RTX (src_pred_mode), src_pred_mode);
   create_integer_operand (&ops[2], SVE_KNOWN_PTRUE);
   create_input_operand (&ops[3], op1, data_mode);
   create_input_operand (&ops[4], op2, data_mode);
@@ -3849,15 +4029,14 @@ aarch64_sve_emit_int_cmp (rtx target, machine_mode pred_mode, rtx_code cmp,
   return ops[0].value;
 }
 
-/* Use a comparison to convert integer vector SRC into MODE, which is
-   the corresponding SVE predicate mode.  Use TARGET for the result
-   if it's nonnull and convenient.  */
+/* Use a comparison to convert integer vector SRC into VNx16BI.
+   Use TARGET for the result if it's nonnull and convenient.  */
 
 rtx
-aarch64_convert_sve_data_to_pred (rtx target, machine_mode mode, rtx src)
+aarch64_convert_sve_data_to_pred (rtx target, rtx src)
 {
   machine_mode src_mode = GET_MODE (src);
-  return aarch64_sve_emit_int_cmp (target, mode, NE, src_mode,
+  return aarch64_sve_emit_int_cmp (target, VNx16BImode, NE, src_mode,
 				   src, CONST0_RTX (src_mode));
 }
 
@@ -5939,9 +6118,9 @@ aarch64_sve_move_pred_via_while (rtx target, machine_mode mode,
 				 unsigned int vl)
 {
   rtx limit = force_reg (DImode, gen_int_mode (vl, DImode));
-  target = aarch64_target_reg (target, mode);
-  emit_insn (gen_while (UNSPEC_WHILELO, DImode, mode,
-			target, const0_rtx, limit));
+  target = aarch64_target_reg (target, VNx16BImode);
+  emit_insn (gen_aarch64_sve_while_acle (UNSPEC_WHILELO, DImode, mode,
+					 target, const0_rtx, limit));
   return target;
 }
 
@@ -6087,8 +6266,7 @@ aarch64_expand_sve_const_pred_trn (rtx target, rtx_vector_builder &builder,
      operands but permutes them as though they had mode MODE.  */
   machine_mode mode = aarch64_sve_pred_mode (permute_size).require ();
   target = aarch64_target_reg (target, GET_MODE (a));
-  rtx type_reg = CONST0_RTX (mode);
-  emit_insn (gen_aarch64_sve_trn1_conv (mode, target, a, b, type_reg));
+  emit_insn (gen_aarch64_sve_acle (UNSPEC_TRN1, mode, target, a, b));
   return target;
 }
 
@@ -6170,8 +6348,7 @@ aarch64_expand_sve_const_pred (rtx target, rtx_vector_builder &builder)
   for (unsigned int i = 0; i < builder.encoded_nelts (); ++i)
     int_builder.quick_push (INTVAL (builder.elt (i))
 			    ? constm1_rtx : const0_rtx);
-  return aarch64_convert_sve_data_to_pred (target, VNx16BImode,
-					   int_builder.build ());
+  return aarch64_convert_sve_data_to_pred (target, int_builder.build ());
 }
 
 /* Set DEST to immediate IMM.  */
@@ -6410,19 +6587,51 @@ aarch64_stack_protect_canary_mem (machine_mode mode, rtx decl_rtl,
   return gen_rtx_MEM (mode, force_reg (Pmode, addr));
 }
 
-/* Emit an SVE predicated move from SRC to DEST.  PRED is a predicate
-   that is known to contain PTRUE.  */
+/* Emit a load/store from a subreg of SRC to a subreg of DEST.
+   The subregs have mode NEW_MODE. Use only for reg<->mem moves.  */
+void
+aarch64_emit_load_store_through_mode (rtx dest, rtx src, machine_mode new_mode)
+{
+  gcc_assert ((MEM_P (dest) && register_operand (src, VOIDmode))
+	      || (MEM_P (src) && register_operand (dest, VOIDmode)));
+  auto mode = GET_MODE (dest);
+  auto int_mode = aarch64_sve_int_mode (mode);
+  if (MEM_P (src))
+    {
+      rtx tmp = force_reg (new_mode, adjust_address (src, new_mode, 0));
+      tmp = force_lowpart_subreg (int_mode, tmp, new_mode);
+      emit_move_insn (dest, force_lowpart_subreg (mode, tmp, int_mode));
+    }
+  else
+    {
+      src = force_lowpart_subreg (int_mode, src, mode);
+      emit_move_insn (adjust_address (dest, new_mode, 0),
+		      force_lowpart_subreg (new_mode, src, int_mode));
+    }
+}
+
+/* PRED is a predicate that is known to contain PTRUE.
+   For 128-bit VLS loads/stores, emit LDR/STR.
+   Else, emit an SVE predicated move from SRC to DEST.  */
 
 void
 aarch64_emit_sve_pred_move (rtx dest, rtx pred, rtx src)
 {
-  expand_operand ops[3];
   machine_mode mode = GET_MODE (dest);
-  create_output_operand (&ops[0], dest, mode);
-  create_input_operand (&ops[1], pred, GET_MODE(pred));
-  create_input_operand (&ops[2], src, mode);
-  temporary_volatile_ok v (true);
-  expand_insn (code_for_aarch64_pred_mov (mode), 3, ops);
+  if ((MEM_P (dest) || MEM_P (src))
+      && known_eq (GET_MODE_SIZE (mode), 16)
+      && aarch64_classify_vector_mode (mode) == VEC_SVE_DATA
+      && !BYTES_BIG_ENDIAN)
+    aarch64_emit_load_store_through_mode (dest, src, V16QImode);
+  else
+    {
+      expand_operand ops[3];
+      create_output_operand (&ops[0], dest, mode);
+      create_input_operand (&ops[1], pred, GET_MODE(pred));
+      create_input_operand (&ops[2], src, mode);
+      temporary_volatile_ok v (true);
+      expand_insn (code_for_aarch64_pred_mov (mode), 3, ops);
+    }
 }
 
 /* Expand a pre-RA SVE data move from SRC to DEST in which at least one
@@ -6591,6 +6800,27 @@ aarch64_split_sve_subreg_move (rtx dest, rtx ptrue, rtx src)
 			       dest, ptrue, src));
 }
 
+/* Set predicate register DEST such that every element has the scalar
+   boolean value in SRC, with any nonzero source counting as "true".
+   MODE is a MODE_VECTOR_BOOL that determines the element size;
+   DEST can have this mode or VNx16BImode.  In the latter case,
+   the upper bits of each element are defined to be zero, as for
+   the .H, .S, and .D forms of PTRUE.  */
+
+void
+aarch64_emit_sve_pred_vec_duplicate (machine_mode mode, rtx dest, rtx src)
+{
+  rtx tmp = gen_reg_rtx (DImode);
+  emit_insn (gen_ashldi3 (tmp, gen_lowpart (DImode, src),
+			  gen_int_mode (63, DImode)));
+  if (GET_MODE (dest) == VNx16BImode)
+    emit_insn (gen_aarch64_sve_while_acle (UNSPEC_WHILELO, DImode, mode,
+					   dest, const0_rtx, tmp));
+  else
+    emit_insn (gen_while (UNSPEC_WHILELO, DImode, mode,
+			  dest, const0_rtx, tmp));
+}
+
 static bool
 aarch64_function_ok_for_sibcall (tree, tree exp)
 {
@@ -8699,6 +8929,13 @@ aarch_bti_j_insn_p (rtx_insn *insn)
   return GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_BTI_J;
 }
 
+/* Return TRUE if Pointer Authentication for the return address is enabled.  */
+bool
+aarch64_pacret_enabled (void)
+{
+  return (aarch_ra_sign_scope != AARCH_FUNCTION_NONE);
+}
+
 /* Return TRUE if Guarded Control Stack is enabled.  */
 bool
 aarch64_gcs_enabled (void)
@@ -9417,13 +9654,16 @@ aarch64_emit_stack_tie (rtx reg)
 }
 
 /* Allocate POLY_SIZE bytes of stack space using TEMP1 and TEMP2 as scratch
-   registers.  If POLY_SIZE is not large enough to require a probe this function
-   will only adjust the stack.  When allocating the stack space
-   FRAME_RELATED_P is then used to indicate if the allocation is frame related.
-   FINAL_ADJUSTMENT_P indicates whether we are allocating the area below
-   the saved registers.  If we are then we ensure that any allocation
-   larger than the ABI defined buffer needs a probe so that the
-   invariant of having a 1KB buffer is maintained.
+   registers, given that the stack pointer is currently BYTES_BELOW_SP bytes
+   above the bottom of the static frame.
+
+   If POLY_SIZE is not large enough to require a probe this function will only
+   adjust the stack.  When allocating the stack space FRAME_RELATED_P is then
+   used to indicate if the allocation is frame related.  FINAL_ADJUSTMENT_P
+   indicates whether we are allocating the area below the saved registers.
+   If we are then we ensure that any allocation larger than the ABI defined
+   buffer needs a probe so that the invariant of having a 1KB buffer is
+   maintained.
 
    We emit barriers after each stack adjustment to prevent optimizations from
    breaking the invariant that we never drop the stack more than a page.  This
@@ -9440,6 +9680,7 @@ aarch64_emit_stack_tie (rtx reg)
 static void
 aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
 					poly_int64 poly_size,
+					poly_int64 bytes_below_sp,
 					aarch64_isa_mode force_isa_mode,
 					bool frame_related_p,
 					bool final_adjustment_p)
@@ -9503,8 +9744,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
 			  poly_size, temp1, temp2, force_isa_mode,
 			  false, true);
 
-      rtx_insn *insn = get_last_insn ();
-
+      auto initial_cfa_offset = frame.frame_size - bytes_below_sp;
+      auto final_cfa_offset = initial_cfa_offset + poly_size;
       if (frame_related_p)
 	{
 	  /* This is done to provide unwinding information for the stack
@@ -9514,28 +9755,31 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
 	     The tie will expand to nothing but the optimizers will not touch
 	     the instruction.  */
 	  rtx stack_ptr_copy = gen_rtx_REG (Pmode, STACK_CLASH_SVE_CFA_REGNUM);
-	  emit_move_insn (stack_ptr_copy, stack_pointer_rtx);
+	  auto *insn = emit_move_insn (stack_ptr_copy, stack_pointer_rtx);
 	  aarch64_emit_stack_tie (stack_ptr_copy);
 
 	  /* We want the CFA independent of the stack pointer for the
 	     duration of the loop.  */
-	  add_reg_note (insn, REG_CFA_DEF_CFA, stack_ptr_copy);
+	  add_reg_note (insn, REG_CFA_DEF_CFA,
+			plus_constant (Pmode, stack_ptr_copy,
+				       initial_cfa_offset));
 	  RTX_FRAME_RELATED_P (insn) = 1;
 	}
 
       rtx probe_const = gen_int_mode (min_probe_threshold, Pmode);
       rtx guard_const = gen_int_mode (guard_size, Pmode);
 
-      insn = emit_insn (gen_probe_sve_stack_clash (Pmode, stack_pointer_rtx,
-						   stack_pointer_rtx, temp1,
-						   probe_const, guard_const));
+      auto *insn
+	= emit_insn (gen_probe_sve_stack_clash (Pmode, stack_pointer_rtx,
+						stack_pointer_rtx, temp1,
+						probe_const, guard_const));
 
       /* Now reset the CFA register if needed.  */
       if (frame_related_p)
 	{
 	  add_reg_note (insn, REG_CFA_DEF_CFA,
-			gen_rtx_PLUS (Pmode, stack_pointer_rtx,
-				      gen_int_mode (poly_size, Pmode)));
+			plus_constant (Pmode, stack_pointer_rtx,
+				       final_cfa_offset));
 	  RTX_FRAME_RELATED_P (insn) = 1;
 	}
 
@@ -9581,12 +9825,13 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
 	 We can determine which allocation we are doing by looking at
 	 the value of FRAME_RELATED_P since the final allocations are not
 	 frame related.  */
+      auto cfa_offset = frame.frame_size - (bytes_below_sp - rounded_size);
       if (frame_related_p)
 	{
 	  /* We want the CFA independent of the stack pointer for the
 	     duration of the loop.  */
 	  add_reg_note (insn, REG_CFA_DEF_CFA,
-			plus_constant (Pmode, temp1, rounded_size));
+			plus_constant (Pmode, temp1, cfa_offset));
 	  RTX_FRAME_RELATED_P (insn) = 1;
 	}
 
@@ -9608,7 +9853,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
       if (frame_related_p)
 	{
 	  add_reg_note (insn, REG_CFA_DEF_CFA,
-			plus_constant (Pmode, stack_pointer_rtx, rounded_size));
+			plus_constant (Pmode, stack_pointer_rtx, cfa_offset));
 	  RTX_FRAME_RELATED_P (insn) = 1;
 	}
 
@@ -9916,17 +10161,22 @@ aarch64_expand_prologue (void)
      code below does not handle it for -fstack-clash-protection.  */
   gcc_assert (known_eq (initial_adjust, 0) || callee_adjust == 0);
 
+  /* The offset of the current SP from the bottom of the static frame.  */
+  poly_int64 bytes_below_sp = frame_size;
+
   /* Will only probe if the initial adjustment is larger than the guard
      less the amount of the guard reserved for use by the caller's
      outgoing args.  */
   aarch64_allocate_and_probe_stack_space (tmp0_rtx, tmp1_rtx, initial_adjust,
-					  force_isa_mode, true, false);
+					  bytes_below_sp, force_isa_mode,
+					  true, false);
+  bytes_below_sp -= initial_adjust;
 
   if (callee_adjust != 0)
-    aarch64_push_regs (reg1, reg2, callee_adjust);
-
-  /* The offset of the current SP from the bottom of the static frame.  */
-  poly_int64 bytes_below_sp = frame_size - initial_adjust - callee_adjust;
+    {
+      aarch64_push_regs (reg1, reg2, callee_adjust);
+      bytes_below_sp -= callee_adjust;
+    }
 
   if (emit_frame_chain)
     {
@@ -9994,7 +10244,7 @@ aarch64_expand_prologue (void)
 		  || known_eq (frame.reg_offset[VG_REGNUM], bytes_below_sp));
       aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx,
 					      sve_callee_adjust,
-					      force_isa_mode,
+					      bytes_below_sp, force_isa_mode,
 					      !frame_pointer_needed, false);
       bytes_below_sp -= sve_callee_adjust;
     }
@@ -10005,10 +10255,11 @@ aarch64_expand_prologue (void)
 
   /* We may need to probe the final adjustment if it is larger than the guard
      that is assumed by the called.  */
-  gcc_assert (known_eq (bytes_below_sp, final_adjust));
   aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust,
-					  force_isa_mode,
+					  bytes_below_sp, force_isa_mode,
 					  !frame_pointer_needed, true);
+  bytes_below_sp -= final_adjust;
+  gcc_assert (known_eq (bytes_below_sp, 0));
   if (emit_frame_chain && maybe_ne (final_adjust, 0))
     aarch64_emit_stack_tie (hard_frame_pointer_rtx);
 
@@ -14171,42 +14422,58 @@ aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
   if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
     {
       /* Conditional branch.  */
-      if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
+      enum machine_mode cmpmode = GET_MODE (inner);
+      if (GET_MODE_CLASS (cmpmode) == MODE_CC)
 	return true;
-      else
+
+      if (comparator == const0_rtx)
 	{
-	  if (cmpcode == NE || cmpcode == EQ)
+	  switch (cmpcode)
 	    {
-	      if (comparator == const0_rtx)
+	    case NE:
+	    case EQ:
+	      if (cmpmode != SImode && cmpmode != DImode)
+		break;
+	      if (GET_CODE (inner) == ZERO_EXTRACT)
 		{
-		  /* TBZ/TBNZ/CBZ/CBNZ.  */
-		  if (GET_CODE (inner) == ZERO_EXTRACT)
-		    /* TBZ/TBNZ.  */
-		    *cost += rtx_cost (XEXP (inner, 0), VOIDmode,
-				       ZERO_EXTRACT, 0, speed);
-		  else
-		    /* CBZ/CBNZ.  */
-		    *cost += rtx_cost (inner, VOIDmode, cmpcode, 0, speed);
-
+		  /* TBZ/TBNZ.  */
+		  *cost += rtx_cost (XEXP (inner, 0), VOIDmode,
+				     ZERO_EXTRACT, 0, speed);
 		  return true;
 		}
-	      if (register_operand (inner, VOIDmode)
-		  && aarch64_imm24 (comparator, VOIDmode))
-		{
-		  /* SUB and SUBS.  */
-		  *cost += COSTS_N_INSNS (2);
-		  if (speed)
-		    *cost += extra_cost->alu.arith * 2;
-		  return true;
-		}
-	    }
-	  else if (cmpcode == LT || cmpcode == GE)
-	    {
-	      /* TBZ/TBNZ.  */
-	      if (comparator == const0_rtx)
-		return true;
+	      /* FALLTHRU */
+
+	    case LT:
+	    case GE:
+	      /* CBZ/CBNZ/TBZ/TBNZ.  */
+	      *cost += rtx_cost (inner, cmpmode, cmpcode, 0, speed);
+	      return true;
+
+	    default:
+	      break;
 	    }
 	}
+
+      if ((cmpcode == NE || cmpcode == EQ)
+	  && (cmpmode == SImode || cmpmode == DImode)
+	  && aarch64_split_imm24 (comparator, cmpmode))
+	{
+	  /* SUB and SUBS.  */
+	  *cost += rtx_cost (inner, cmpmode, cmpcode, 0, speed);
+	  *cost += COSTS_N_INSNS (2);
+	  if (speed)
+	    *cost += extra_cost->alu.arith * 2;
+	  return true;
+	}
+
+      if (TARGET_CMPBR)
+	{
+	  *cost += rtx_cost (inner, cmpmode, cmpcode, 0, speed);
+	  if ((cmpmode != SImode && cmpmode != DImode)
+	      || !aarch64_cb_rhs (cmpcode, comparator))
+	    *cost += rtx_cost (comparator, cmpmode, cmpcode, 1, speed);
+	  return true;
+	}
     }
   else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
     {
@@ -14507,6 +14774,13 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
 	 we don't need to consider that here.  */
       if (x == const0_rtx)
 	*cost = 0;
+      /* If the outer is a COMPARE which is used by the middle-end
+	 and the constant fits how the cmp instruction allows, say the cost
+	 is the same as 1 insn.  */
+      else if (outer == COMPARE
+	       && (aarch64_uimm12_shift (INTVAL (x))
+		   || aarch64_uimm12_shift (-UINTVAL (x))))
+	*cost = COSTS_N_INSNS (1);
       else
 	{
 	  /* To an approximation, building any other constant is
@@ -15693,11 +15967,14 @@ cost_plus:
 	break;
     case CONST_VECTOR:
 	{
-	  /* Load using MOVI/MVNI.  */
-	  if (aarch64_simd_valid_mov_imm (x))
-	    *cost = extra_cost->vect.movi;
-	  else /* Load using constant pool.  */
-	    *cost = extra_cost->ldst.load;
+	  if (speed)
+	    {
+	      /* Load using MOVI/MVNI.  */
+	      if (aarch64_simd_valid_mov_imm (x))
+		*cost += extra_cost->vect.movi;
+	      else /* Load using constant pool.  */
+		*cost += extra_cost->ldst.load;
+	    }
 	  break;
 	}
     case VEC_CONCAT:
@@ -15706,7 +15983,8 @@ cost_plus:
 	break;
     case VEC_DUPLICATE:
 	/* Load using a DUP.  */
-	*cost = extra_cost->vect.dup;
+	if (speed)
+	  *cost += extra_cost->vect.dup;
 	return false;
     case VEC_SELECT:
 	{
@@ -15714,13 +15992,16 @@ cost_plus:
 	  *cost = rtx_cost (op0, GET_MODE (op0), VEC_SELECT, 0, speed);
 
 	  /* cost subreg of 0 as free, otherwise as DUP */
-	  rtx op1 = XEXP (x, 1);
-	  if (vec_series_lowpart_p (mode, GET_MODE (op1), op1))
-	    ;
-	  else if (vec_series_highpart_p (mode, GET_MODE (op1), op1))
-	    *cost = extra_cost->vect.dup;
-	  else
-	    *cost = extra_cost->vect.extract;
+	  if (speed)
+	    {
+	      rtx op1 = XEXP (x, 1);
+	      if (vec_series_lowpart_p (mode, GET_MODE (op1), op1))
+		;
+	      else if (vec_series_highpart_p (mode, GET_MODE (op1), op1))
+		*cost += extra_cost->vect.dup;
+	      else
+		*cost += extra_cost->vect.extract;
+	    }
 	  return true;
 	}
     default:
@@ -16776,6 +17057,14 @@ private:
      or vector loop.  There is one entry for each tuning option of
      interest.  */
   auto_vec<aarch64_vec_op_count, 2> m_ops;
+
+  /* When doing inner-loop vectorization the constraints on the data-refs in the
+     outer-loop could limit the inner loop references.  i.e. the outerloop can
+     force the inner-loop to do a load and splat which will result in the loop
+     being entirely scalar as all lanes work on a duplicate.  Currently we don't
+     support unrolling of the inner loop independently from the outerloop during
+     outer-loop vectorization which tends to lead to pipeline bubbles.  */
+  bool m_loop_fully_scalar_dup = false;
 };
 
 aarch64_vector_costs::aarch64_vector_costs (vec_info *vinfo,
@@ -16996,8 +17285,8 @@ aarch64_ld234_st234_vectors (vect_cost_for_stmt kind, stmt_vec_info stmt_info,
       && STMT_VINFO_DATA_REF (stmt_info))
     {
       stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
-      if (stmt_info
-	  && vect_mem_access_type (stmt_info, node) == VMAT_LOAD_STORE_LANES)
+      if (node
+	  && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_LOAD_STORE_LANES)
 	return DR_GROUP_SIZE (stmt_info);
     }
   return 0;
@@ -17097,13 +17386,14 @@ aarch64_multiply_add_p (vec_info *vinfo, stmt_vec_info stmt_info,
 
 static bool
 aarch64_bool_compound_p (vec_info *vinfo, stmt_vec_info stmt_info,
-			 unsigned int vec_flags)
+			 slp_tree node, unsigned int vec_flags)
 {
   gassign *assign = dyn_cast<gassign *> (stmt_info->stmt);
   if (!assign
+      || !node
       || gimple_assign_rhs_code (assign) != BIT_AND_EXPR
-      || !STMT_VINFO_VECTYPE (stmt_info)
-      || !VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_info)))
+      || !SLP_TREE_VECTYPE (node)
+      || !VECTOR_BOOLEAN_TYPE_P (SLP_TREE_VECTYPE (node)))
     return false;
 
   for (int i = 1; i < 3; ++i)
@@ -17138,10 +17428,11 @@ aarch64_bool_compound_p (vec_info *vinfo, stmt_vec_info stmt_info,
    instructions.  */
 static unsigned int
 aarch64_sve_in_loop_reduction_latency (vec_info *vinfo,
+				       slp_tree node,
 				       stmt_vec_info stmt_info,
 				       const sve_vec_cost *sve_costs)
 {
-  switch (vect_reduc_type (vinfo, stmt_info))
+  switch (vect_reduc_type (vinfo, node))
     {
     case EXTRACT_LAST_REDUCTION:
       return sve_costs->clast_cost;
@@ -17181,7 +17472,9 @@ aarch64_sve_in_loop_reduction_latency (vec_info *vinfo,
    - If VEC_FLAGS & VEC_ANY_SVE, return the loop carry latency of the
      SVE implementation.  */
 static unsigned int
-aarch64_in_loop_reduction_latency (vec_info *vinfo, stmt_vec_info stmt_info,
+aarch64_in_loop_reduction_latency (vec_info *vinfo,
+				   slp_tree node,
+				   stmt_vec_info stmt_info,
 				   unsigned int vec_flags)
 {
   const cpu_vector_cost *vec_costs = aarch64_tune_params.vec_costs;
@@ -17194,7 +17487,8 @@ aarch64_in_loop_reduction_latency (vec_info *vinfo, stmt_vec_info stmt_info,
   if (sve_costs)
     {
       unsigned int latency
-	= aarch64_sve_in_loop_reduction_latency (vinfo, stmt_info, sve_costs);
+	= aarch64_sve_in_loop_reduction_latency (vinfo, node,
+						 stmt_info, sve_costs);
       if (latency)
 	return latency;
     }
@@ -17268,8 +17562,9 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind,
      for each element.  We therefore need to divide the full-instruction
      cost by the number of elements in the vector.  */
   if (kind == scalar_load
+      && node
       && sve_costs
-      && vect_mem_access_type (stmt_info, node) == VMAT_GATHER_SCATTER)
+      && mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node)))
     {
       unsigned int nunits = vect_nunits_for_cost (vectype);
       /* Test for VNx2 modes, which have 64-bit containers.  */
@@ -17281,8 +17576,9 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind,
   /* Detect cases in which a scalar_store is really storing one element
      in a scatter operation.  */
   if (kind == scalar_store
+      && node
       && sve_costs
-      && vect_mem_access_type (stmt_info, node) == VMAT_GATHER_SCATTER)
+      && mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node)))
     return sve_costs->scatter_store_elt_cost;
 
   /* Detect cases in which vec_to_scalar represents an in-loop reduction.  */
@@ -17291,7 +17587,8 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind,
       && sve_costs)
     {
       unsigned int latency
-	= aarch64_sve_in_loop_reduction_latency (vinfo, stmt_info, sve_costs);
+	= aarch64_sve_in_loop_reduction_latency (vinfo, node,
+						 stmt_info, sve_costs);
       if (latency)
 	return latency;
     }
@@ -17440,7 +17737,7 @@ aarch64_adjust_stmt_cost (vec_info *vinfo, vect_cost_for_stmt kind,
 
 	  /* For vector boolean ANDs with a compare operand we just need
 	     one insn.  */
-	  if (aarch64_bool_compound_p (vinfo, stmt_info, vec_flags))
+	  if (aarch64_bool_compound_p (vinfo, stmt_info, node, vec_flags))
 	    return 0;
 	}
 
@@ -17473,13 +17770,12 @@ aarch64_adjust_stmt_cost (vec_info *vinfo, vect_cost_for_stmt kind,
 
    with the single accumulator being read and written multiple times.  */
 static bool
-aarch64_force_single_cycle (vec_info *vinfo, stmt_vec_info stmt_info)
+aarch64_force_single_cycle (vec_info *vinfo, slp_tree node)
 {
-  if (!STMT_VINFO_REDUC_DEF (stmt_info))
+  auto reduc_info = info_for_reduction (as_a <loop_vec_info> (vinfo), node);
+  if (!reduc_info)
     return false;
-
-  auto reduc_info = info_for_reduction (vinfo, stmt_info);
-  return STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info);
+  return VECT_REDUC_INFO_FORCE_SINGLE_CYCLE (reduc_info);
 }
 
 /* COUNT, KIND and STMT_INFO are the same as for vector_costs::add_stmt_cost
@@ -17503,8 +17799,10 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind,
       && vect_is_reduction (stmt_info))
     {
       unsigned int base
-	= aarch64_in_loop_reduction_latency (m_vinfo, stmt_info, m_vec_flags);
-      if (aarch64_force_single_cycle (m_vinfo, stmt_info))
+	= aarch64_in_loop_reduction_latency (m_vinfo, node,
+					     stmt_info, m_vec_flags);
+      if (m_costing_for_scalar
+	  || aarch64_force_single_cycle (m_vinfo, node))
 	/* ??? Ideally we'd use a tree to reduce the copies down to 1 vector,
 	   and then accumulate that, but at the moment the loop-carried
 	   dependency includes all copies.  */
@@ -17521,7 +17819,7 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind,
 
       /* Assume that bool AND with compare operands will become a single
 	 operation.  */
-      if (aarch64_bool_compound_p (m_vinfo, stmt_info, m_vec_flags))
+      if (aarch64_bool_compound_p (m_vinfo, stmt_info, node, m_vec_flags))
 	return;
     }
 
@@ -17538,7 +17836,7 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind,
   if (stmt_info
       && kind == vec_to_scalar
       && (m_vec_flags & VEC_ADVSIMD)
-      && vect_mem_access_type (stmt_info, node) == VMAT_GATHER_SCATTER)
+      && mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node)))
     {
       auto dr = STMT_VINFO_DATA_REF (stmt_info);
       tree dr_ref = DR_REF (dr);
@@ -17551,7 +17849,7 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind,
 		{
 		  if (gimple_vuse (SSA_NAME_DEF_STMT (offset)))
 		    {
-		      if (STMT_VINFO_TYPE (stmt_info) == load_vec_info_type)
+		      if (SLP_TREE_TYPE (node) == load_vec_info_type)
 			ops->loads += count - 1;
 		      else
 			  /* Stores want to count both the index to array and data to
@@ -17617,7 +17915,7 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind,
      have only accounted for one.  */
   if (stmt_info
       && (kind == vector_stmt || kind == vec_to_scalar)
-      && vect_reduc_type (m_vinfo, stmt_info) == COND_REDUCTION)
+      && vect_reduc_type (m_vinfo, node) == COND_REDUCTION)
     ops->general_ops += count;
 
   /* Count the predicate operations needed by an SVE comparison.  */
@@ -17653,7 +17951,7 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind,
   if (stmt_info
       && sve_issue
       && (kind == scalar_load || kind == scalar_store)
-      && vect_mem_access_type (stmt_info, node) == VMAT_GATHER_SCATTER)
+      && mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node)))
     {
       unsigned int pairs = CEIL (count, 2);
       ops->pred_ops += sve_issue->gather_scatter_pair_pred_ops * pairs;
@@ -17762,6 +18060,17 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
 				     tree vectype, int misalign,
 				     vect_cost_model_location where)
 {
+  /* When costing for scalars, vectype will be NULL; so look up the type via
+     stmt_info's statement.  */
+  if (m_costing_for_scalar && stmt_info)
+    {
+      gcc_assert (!vectype);
+      /* This won't work for e.g. gconds or other statements without a lhs,
+	 but those only work on GPR anyway and this is the best we can do.  */
+      if (tree lhs = gimple_get_lhs (STMT_VINFO_STMT (stmt_info)))
+	vectype = TREE_TYPE (lhs);
+    }
+
   fractional_cost stmt_cost
     = aarch64_builtin_vectorization_cost (kind, vectype, misalign);
 
@@ -17771,12 +18080,34 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
 
   /* Do one-time initialization based on the vinfo.  */
   loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo);
-  if (!m_analyzed_vinfo)
+  if (!m_analyzed_vinfo && !m_costing_for_scalar)
     {
       if (loop_vinfo)
 	analyze_loop_vinfo (loop_vinfo);
 
       m_analyzed_vinfo = true;
+      if (in_inner_loop_p)
+	m_loop_fully_scalar_dup = true;
+    }
+
+  /* Detect whether the loop is working on fully duplicated lanes.  This would
+     only be possible with inner loop vectorization since otherwise we wouldn't
+     try to vectorize.  */
+  if (in_inner_loop_p
+      && node
+      && m_loop_fully_scalar_dup
+      && SLP_TREE_LANES (node) == 1
+      && !SLP_TREE_CHILDREN (node).exists ())
+    {
+      /* Check if load is a duplicate.  */
+      if (gimple_vuse (stmt_info->stmt)
+	  && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_INVARIANT)
+	;
+      else if (SLP_TREE_DEF_TYPE (node) == vect_constant_def
+	       || SLP_TREE_DEF_TYPE (node) == vect_external_def)
+	;
+      else
+	m_loop_fully_scalar_dup = false;
     }
 
   /* Apply the heuristic described above m_stp_sequence_cost.  */
@@ -17808,8 +18139,10 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
 
       /* Check if we've seen an SVE gather/scatter operation and which size.  */
       if (kind == scalar_load
+	  && node
+	  && vectype
 	  && aarch64_sve_mode_p (TYPE_MODE (vectype))
-	  && vect_mem_access_type (stmt_info, node) == VMAT_GATHER_SCATTER)
+	  && mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node)))
 	{
 	  const sve_vec_cost *sve_costs = aarch64_tune_params.vec_costs->sve;
 	  if (sve_costs)
@@ -18141,8 +18474,19 @@ adjust_body_cost (loop_vec_info loop_vinfo,
   if (m_vec_flags & VEC_ANY_SVE)
     threshold = CEIL (threshold, aarch64_estimated_sve_vq ());
 
-  if (m_num_vector_iterations >= 1
-      && m_num_vector_iterations < threshold)
+  /* Increase the cost of the vector code if it looks like the vector code has
+     limited throughput due to outer-loop vectorization.  */
+  if (m_loop_fully_scalar_dup)
+    {
+      body_cost *= estimated_vf;
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_NOTE, vect_location,
+			 "Increasing body cost to %d because vector code has"
+			 " low throughput of per iteration due to splats\n",
+			 body_cost);
+    }
+  else if (m_num_vector_iterations >= 1
+	   && m_num_vector_iterations < threshold)
     {
       if (dump_enabled_p ())
 	dump_printf_loc (MSG_NOTE, vect_location,
@@ -18632,6 +18976,8 @@ aarch64_adjust_generic_arch_tuning (struct tune_params &current_tune)
   if (TARGET_SVE2)
     current_tune.extra_tuning_flags
       &= ~AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS;
+  if (!AARCH64_HAVE_ISA(V8_8A))
+    aarch64_tune_params.extra_tuning_flags |= AARCH64_EXTRA_TUNE_AVOID_LDAPUR;
 }
 
 static void
@@ -18696,7 +19042,10 @@ aarch64_override_options_internal (struct gcc_options *opts)
   /* Make a copy of the tuning parameters attached to the core, which
      we may later overwrite.  */
   aarch64_tune_params = *(tune->tune);
-  if (tune->tune == &generic_tunings)
+
+  if (tune->tune == &generic_tunings
+      || tune->tune == &generic_armv8_a_tunings
+      || tune->tune == &generic_armv9_a_tunings)
     aarch64_adjust_generic_arch_tuning (aarch64_tune_params);
 
   if (opts->x_aarch64_override_tune_string)
@@ -18748,9 +19097,16 @@ aarch64_override_options_internal (struct gcc_options *opts)
       aarch64_stack_protector_guard_offset = offs;
     }
 
-  if ((flag_sanitize & SANITIZE_SHADOW_CALL_STACK)
-      && !fixed_regs[R18_REGNUM])
-    error ("%<-fsanitize=shadow-call-stack%> requires %<-ffixed-x18%>");
+  if ((flag_sanitize & SANITIZE_SHADOW_CALL_STACK))
+    {
+      if (!fixed_regs[R18_REGNUM])
+	error ("%<-fsanitize=shadow-call-stack%> requires %<-ffixed-x18%>");
+#ifdef TARGET_OS_USES_R18
+      else
+	sorry ("%<-fsanitize=shadow-call-stack%> conflicts with the use of"
+	       " register x18 by the target operating system");
+#endif
+    }
 
   aarch64_feature_flags isa_flags = aarch64_get_isa_flags (opts);
   if ((isa_flags & (AARCH64_FL_SM_ON | AARCH64_FL_ZA_ON))
@@ -18901,6 +19257,20 @@ aarch64_override_options_internal (struct gcc_options *opts)
   if (TARGET_SME && !TARGET_SVE2)
     sorry ("no support for %qs without %qs", "sme", "sve2");
 
+  /* Set scalar costing to a high value such that we always pick
+     vectorization.  Increase scalar costing by 10000%.  */
+  if (opts->x_flag_aarch64_max_vectorization)
+    SET_OPTION_IF_UNSET (opts, &global_options_set,
+			 param_vect_scalar_cost_multiplier, 10000);
+
+  /* Synchronize the -mautovec-preference and aarch64_autovec_preference using
+     whichever one is not default.  If both are set then prefer the param flag
+     over the parameters.  */
+  if (opts->x_autovec_preference != AARCH64_AUTOVEC_DEFAULT)
+    SET_OPTION_IF_UNSET (opts, &global_options_set,
+			 aarch64_autovec_preference,
+			 opts->x_autovec_preference);
+
   aarch64_override_options_after_change_1 (opts);
 }
 
@@ -19651,6 +20021,8 @@ static const struct aarch64_attribute_info aarch64_attributes[] =
      OPT_msign_return_address_ },
   { "outline-atomics", aarch64_attr_bool, true, NULL,
      OPT_moutline_atomics},
+  { "max-vectorization", aarch64_attr_bool, false, NULL,
+     OPT_mmax_vectorization},
   { NULL, aarch64_attr_custom, false, NULL, OPT____ }
 };
 
@@ -19769,8 +20141,9 @@ aarch64_process_one_target_attr (char *arg_str)
 	      if (valid)
 		{
 		  set_option (&global_options, NULL, p_attr->opt_num, value,
-			      NULL, DK_UNSPECIFIED, input_location,
-			      global_dc);
+			      NULL,
+			      static_cast<int> (diagnostics::kind::unspecified),
+			      input_location, global_dc);
 		}
 	      else
 		{
@@ -20282,6 +20655,8 @@ aarch64_compare_version_priority (tree decl1, tree decl2)
      unsigned long _size; // Size of the struct, so it can grow.
      unsigned long _hwcap;
      unsigned long _hwcap2;
+     unsigned long _hwcap3;
+     unsigned long _hwcap4;
    }
  */
 
@@ -20298,14 +20673,24 @@ build_ifunc_arg_type ()
   tree field3 = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
 			    get_identifier ("_hwcap2"),
 			    long_unsigned_type_node);
+  tree field4 = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
+			    get_identifier ("_hwcap3"),
+			    long_unsigned_type_node);
+  tree field5 = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
+			    get_identifier ("_hwcap4"),
+			    long_unsigned_type_node);
 
   DECL_FIELD_CONTEXT (field1) = ifunc_arg_type;
   DECL_FIELD_CONTEXT (field2) = ifunc_arg_type;
   DECL_FIELD_CONTEXT (field3) = ifunc_arg_type;
+  DECL_FIELD_CONTEXT (field4) = ifunc_arg_type;
+  DECL_FIELD_CONTEXT (field5) = ifunc_arg_type;
 
   TYPE_FIELDS (ifunc_arg_type) = field1;
   DECL_CHAIN (field1) = field2;
   DECL_CHAIN (field2) = field3;
+  DECL_CHAIN (field3) = field4;
+  DECL_CHAIN (field4) = field5;
 
   layout_type (ifunc_arg_type);
 
@@ -20777,7 +21162,6 @@ aarch64_get_function_versions_dispatcher (void *decl)
   struct cgraph_node *node = NULL;
   struct cgraph_node *default_node = NULL;
   struct cgraph_function_version_info *node_v = NULL;
-  struct cgraph_function_version_info *first_v = NULL;
 
   tree dispatch_decl = NULL;
 
@@ -20794,37 +21178,16 @@ aarch64_get_function_versions_dispatcher (void *decl)
   if (node_v->dispatcher_resolver != NULL)
     return node_v->dispatcher_resolver;
 
-  /* Find the default version and make it the first node.  */
-  first_v = node_v;
-  /* Go to the beginning of the chain.  */
-  while (first_v->prev != NULL)
-    first_v = first_v->prev;
-  default_version_info = first_v;
-  while (default_version_info != NULL)
-    {
-      if (get_feature_mask_for_version
-	    (default_version_info->this_node->decl) == 0ULL)
-	break;
-      default_version_info = default_version_info->next;
-    }
+  /* The default node is always the beginning of the chain.  */
+  default_version_info = node_v;
+  while (default_version_info->prev)
+    default_version_info = default_version_info->prev;
+  default_node = default_version_info->this_node;
 
   /* If there is no default node, just return NULL.  */
-  if (default_version_info == NULL)
+  if (!is_function_default_version (default_node->decl))
     return NULL;
 
-  /* Make default info the first node.  */
-  if (first_v != default_version_info)
-    {
-      default_version_info->prev->next = default_version_info->next;
-      if (default_version_info->next)
-	default_version_info->next->prev = default_version_info->prev;
-      first_v->prev = default_version_info;
-      default_version_info->next = first_v;
-      default_version_info->prev = NULL;
-    }
-
-  default_node = default_version_info->this_node;
-
   if (targetm.has_ifunc_p ())
     {
       struct cgraph_function_version_info *it_v = NULL;
@@ -21968,6 +22331,14 @@ aarch64_conditional_register_usage (void)
       fixed_regs[SPECULATION_SCRATCH_REGNUM] = 1;
       call_used_regs[SPECULATION_SCRATCH_REGNUM] = 1;
     }
+
+#ifdef TARGET_OS_USES_R18
+  /* R18 is the STATIC_CHAIN_REGNUM on most aarch64 ports, but VxWorks
+     uses it as the TCB, so aarch64-vxworks.h overrides
+     STATIC_CHAIN_REGNUM, and here we mark R18 as fixed.  */
+  fixed_regs[R18_REGNUM] = 1;
+  call_used_regs[R18_REGNUM] = 1;
+#endif
 }
 
 /* Implement TARGET_MEMBER_TYPE_FORCES_BLK.  */
@@ -22904,6 +23275,58 @@ aarch64_sve_index_immediate_p (rtx base_or_step)
 	  && IN_RANGE (INTVAL (base_or_step), -16, 15));
 }
 
+/* Return true if SERIES is a constant vector that can be loaded using
+   an immediate SVE INDEX, considering both SVE and Advanced SIMD modes.
+   When returning true, store the base in *BASE_OUT and the step
+   in *STEP_OUT.  */
+
+static bool
+aarch64_sve_index_series_p (rtx series, rtx *base_out, rtx *step_out)
+{
+  rtx base, step;
+  if (!const_vec_series_p (series, &base, &step)
+      || !CONST_INT_P (base)
+      || !CONST_INT_P (step))
+    return false;
+
+  auto mode = GET_MODE (series);
+  auto elt_mode = as_a<scalar_int_mode> (GET_MODE_INNER (mode));
+  unsigned int vec_flags = aarch64_classify_vector_mode (mode);
+  if (BYTES_BIG_ENDIAN && (vec_flags & VEC_ADVSIMD))
+    {
+      /* On big-endian targets, architectural lane 0 holds the last element
+	 for Advanced SIMD and the first element for SVE; see the comment at
+	 the head of aarch64-sve.md for details.  This means that, from an SVE
+	 point of view, an Advanced SIMD series goes from the last element to
+	 the first.  */
+      auto i = GET_MODE_NUNITS (mode).to_constant () - 1;
+      base = gen_int_mode (UINTVAL (base) + i * UINTVAL (step), elt_mode);
+      step = gen_int_mode (-UINTVAL (step), elt_mode);
+    }
+
+  if (!aarch64_sve_index_immediate_p (base)
+      || !aarch64_sve_index_immediate_p (step))
+    return false;
+
+  /* If the mode spans multiple registers, check that each subseries is
+     in range.  */
+  unsigned int nvectors = aarch64_ldn_stn_vectors (mode);
+  if (nvectors != 1)
+    {
+      unsigned int nunits;
+      if (!GET_MODE_NUNITS (mode).is_constant (&nunits))
+	return false;
+      nunits /= nvectors;
+      for (unsigned int i = 1; i < nvectors; ++i)
+	if (!IN_RANGE (INTVAL (base) + i * nunits * INTVAL (step), -16, 15))
+	  return false;
+    }
+
+  *base_out = base;
+  *step_out = step;
+  return true;
+}
+
 /* Return true if X is a valid immediate for the SVE ADD and SUB instructions
    when applied to mode MODE.  Negate X first if NEGATE_P is true.  */
 
@@ -23352,13 +23775,8 @@ aarch64_simd_valid_imm (rtx op, simd_immediate_info *info,
     n_elts = CONST_VECTOR_NPATTERNS (op);
   else if (which == AARCH64_CHECK_MOV
 	   && TARGET_SVE
-	   && const_vec_series_p (op, &base, &step))
+	   && aarch64_sve_index_series_p (op, &base, &step))
     {
-      gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
-      if (!aarch64_sve_index_immediate_p (base)
-	  || !aarch64_sve_index_immediate_p (step))
-	return false;
-
       if (info)
 	{
 	  /* Get the corresponding container mode.  E.g. an INDEX on V2SI
@@ -23470,6 +23888,8 @@ aarch64_simd_valid_imm (rtx op, simd_immediate_info *info,
       long int as_long_ints[2];
       as_long_ints[0] = ival & 0xFFFFFFFF;
       as_long_ints[1] = (ival >> 32) & 0xFFFFFFFF;
+      if (imode == DImode && FLOAT_WORDS_BIG_ENDIAN)
+	std::swap (as_long_ints[0], as_long_ints[1]);
 
       REAL_VALUE_TYPE r;
       real_from_target (&r, as_long_ints, fmode);
@@ -23495,6 +23915,39 @@ aarch64_simd_valid_imm (rtx op, simd_immediate_info *info,
   return false;
 }
 
+/* Try to optimize the expansion of a maskload or maskstore with
+   the operands in OPERANDS, given that the vector being loaded or
+   stored has mode MODE.  Return true on success or false if the normal
+   expansion should be used.  */
+
+bool
+aarch64_expand_maskloadstore (rtx *operands, machine_mode mode)
+{
+  /* If the predicate in operands[2] is a patterned SVE PTRUE predicate
+     with patterns VL1, VL2, VL4, VL8, or VL16 and at most the bottom
+     128 bits are loaded/stored, emit an ASIMD load/store.  */
+  int vl = aarch64_partial_ptrue_length (operands[2]);
+  int width = vl * GET_MODE_UNIT_BITSIZE (mode);
+  if (width <= 128
+      && pow2p_hwi (vl)
+      && (vl == 1
+	  || (!BYTES_BIG_ENDIAN
+	      && aarch64_classify_vector_mode (mode) == VEC_SVE_DATA)))
+    {
+      machine_mode new_mode;
+      if (known_eq (width, 128))
+	new_mode = V16QImode;
+      else if (known_eq (width, 64))
+	new_mode = V8QImode;
+      else
+	new_mode = int_mode_for_size (width, 0).require ();
+      aarch64_emit_load_store_through_mode (operands[0], operands[1],
+					    new_mode);
+      return true;
+    }
+  return false;
+}
+
 /* Return true if OP is a valid SIMD move immediate for SVE or AdvSIMD.  */
 bool
 aarch64_simd_valid_mov_imm (rtx op)
@@ -23516,6 +23969,36 @@ aarch64_simd_valid_and_imm (rtx op)
   return aarch64_simd_valid_imm (op, NULL, AARCH64_CHECK_AND);
 }
 
+/* Return true if OP is a valid SIMD and immediate which allows the and to be
+   optimized as fmov.  If ELT_BITSIZE is nonnull, use it to return the number of
+   bits to move.  */
+bool
+aarch64_simd_valid_and_imm_fmov (rtx op, unsigned int *elt_bitsize)
+{
+  machine_mode mode = GET_MODE (op);
+  gcc_assert (!aarch64_sve_mode_p (mode));
+
+  auto_vec<target_unit, 16> buffer;
+  unsigned int n_bytes = GET_MODE_SIZE (mode).to_constant ();
+  buffer.reserve (n_bytes);
+
+  bool ok = native_encode_rtx (mode, op, buffer, 0, n_bytes);
+  gcc_assert (ok);
+
+  auto mask = native_decode_int (buffer, 0, n_bytes, n_bytes * BITS_PER_UNIT);
+  int set_bit = wi::exact_log2 (mask + 1);
+  if ((set_bit == 16 && TARGET_SIMD_F16INST)
+      || set_bit == 32
+      || set_bit == 64)
+    {
+      if (elt_bitsize)
+	*elt_bitsize = set_bit;
+      return true;
+    }
+
+  return false;
+}
+
 /* Return true if OP is a valid SIMD xor immediate for SVE.  */
 bool
 aarch64_simd_valid_xor_imm (rtx op)
@@ -23551,6 +24034,19 @@ aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
     return IN_RANGE (INTVAL (x), 1, bit_width);
 }
 
+
+/* Check whether X can control SVE mode MODE.  */
+bool
+aarch64_sve_valid_pred_p (rtx x, machine_mode mode)
+{
+  machine_mode pred_mode = GET_MODE (x);
+  if (!aarch64_sve_pred_mode_p (pred_mode))
+    return false;
+
+  return known_ge (GET_MODE_NUNITS (pred_mode),
+		   GET_MODE_NUNITS (mode));
+}
+
 /* Return the bitmask CONST_INT to select the bits required by a zero extract
    operation of width WIDTH at bit position POS.  */
 
@@ -23809,6 +24305,16 @@ aarch64_strided_registers_p (rtx *operands, unsigned int num_operands,
   return true;
 }
 
+/* Return the base 2 logarithm of the bit inverse of OP masked by the lowest
+   NELTS bits, if OP is a power of 2.  Otherwise, returns -1.  */
+
+int
+aarch64_exact_log2_inverse (unsigned int nelts, rtx op)
+{
+  return exact_log2 ((~INTVAL (op))
+		     & ((HOST_WIDE_INT_1U << nelts) - 1));
+}
+
 /* Bounds-check lanes.  Ensure OPERAND lies between LOW (inclusive) and
    HIGH (exclusive).  */
 void
@@ -24096,10 +24602,14 @@ aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
 static bool
 aarch64_builtin_support_vector_misalignment (machine_mode mode,
 					     const_tree type, int misalignment,
-					     bool is_packed)
+					     bool is_packed,
+					     bool is_gather_scatter)
 {
   if (TARGET_SIMD && STRICT_ALIGNMENT)
     {
+      if (is_gather_scatter)
+	return true;
+
       /* Return if movmisalign pattern is not supported for this mode.  */
       if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
         return false;
@@ -24109,7 +24619,8 @@ aarch64_builtin_support_vector_misalignment (machine_mode mode,
 	return false;
     }
   return default_builtin_support_vector_misalignment (mode, type, misalignment,
-						      is_packed);
+						      is_packed,
+						      is_gather_scatter);
 }
 
 /* If VALS is a vector constant that can be loaded into a register
@@ -24514,6 +25025,28 @@ seq_cost_ignoring_scalar_moves (const rtx_insn *seq, bool speed)
   return cost;
 }
 
+/* *VECTOR is an Advanced SIMD structure mode and *INDEX is a constant index
+   into it.  Narrow *VECTOR and *INDEX so that they reference a single vector
+   of mode SUBVEC_MODE.  IS_DEST is true if *VECTOR is a destination operand,
+   false if it is a source operand.  */
+
+void
+aarch64_decompose_vec_struct_index (machine_mode subvec_mode,
+				    rtx *vector, rtx *index, bool is_dest)
+{
+  auto elts_per_vector = GET_MODE_NUNITS (subvec_mode).to_constant ();
+  auto subvec = UINTVAL (*index) / elts_per_vector;
+  auto subelt = UINTVAL (*index) % elts_per_vector;
+  auto subvec_byte = subvec * GET_MODE_SIZE (subvec_mode);
+  if (is_dest)
+    *vector = simplify_gen_subreg (subvec_mode, *vector, GET_MODE (*vector),
+				   subvec_byte);
+  else
+    *vector = force_subreg (subvec_mode, *vector, GET_MODE (*vector),
+			    subvec_byte);
+  *index = gen_int_mode (subelt, SImode);
+}
+
 /* Expand a vector initialization sequence, such that TARGET is
    initialized to contain VALS.  */
 
@@ -24547,12 +25080,18 @@ aarch64_expand_vector_init (rtx target, rtx vals)
       rtx tmp_reg = gen_reg_rtx (GET_MODE (new_vals));
       aarch64_expand_vector_init (tmp_reg, new_vals);
       halves[i] = gen_rtx_SUBREG (mode, tmp_reg, 0);
-      rtx_insn *rec_seq = get_insns ();
-      end_sequence ();
+      rtx_insn *rec_seq = end_sequence ();
       costs[i] = seq_cost_ignoring_scalar_moves (rec_seq, !optimize_size);
       emit_insn (rec_seq);
     }
 
+  /* The two halves should (by induction) be individually endian-correct.
+     However, in the memory layout provided by VALS, the nth element of
+     HALVES[0] comes immediately before the nth element HALVES[1].
+     This means that, on big-endian targets, the nth element of HALVES[0]
+     is more significant than the nth element HALVES[1].  */
+  if (BYTES_BIG_ENDIAN)
+    std::swap (halves[0], halves[1]);
   rtvec v = gen_rtvec (2, halves[0], halves[1]);
   rtx_insn *zip1_insn
     = emit_set_insn (target, gen_rtx_UNSPEC (mode, v, UNSPEC_ZIP1));
@@ -24560,8 +25099,7 @@ aarch64_expand_vector_init (rtx target, rtx vals)
     = (!optimize_size) ? std::max (costs[0], costs[1]) : costs[0] + costs[1];
   seq_total_cost += insn_cost (zip1_insn, !optimize_size);
 
-  rtx_insn *seq = get_insns ();
-  end_sequence ();
+  rtx_insn *seq = end_sequence ();
 
   start_sequence ();
   aarch64_expand_vector_init_fallback (target, vals);
@@ -25014,20 +25552,41 @@ aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
    return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
 }
 
+/* Return true if function declaration FNDECL needs to be marked as
+   having a variant PCS.  */
+
+static bool
+aarch64_is_variant_pcs (tree fndecl)
+{
+  /* Check for ABIs that preserve more registers than usual.  */
+  arm_pcs pcs = (arm_pcs) fndecl_abi (fndecl).id ();
+  if (pcs == ARM_PCS_SIMD || pcs == ARM_PCS_SVE)
+    return true;
+
+  /* Check for ABIs that allow PSTATE.SM to be 1 on entry.  */
+  tree fntype = TREE_TYPE (fndecl);
+  if (aarch64_fntype_pstate_sm (fntype) != AARCH64_ISA_MODE_SM_OFF)
+    return true;
+
+  /* Check for ABIs that require PSTATE.ZA to be 1 on entry, either because
+     of ZA or ZT0.  */
+  if (aarch64_fntype_pstate_za (fntype) != 0)
+    return true;
+
+  return false;
+}
+
 /* Output .variant_pcs for aarch64_vector_pcs function symbols.  */
 
 static void
 aarch64_asm_output_variant_pcs (FILE *stream, const tree decl, const char* name)
 {
-  if (TREE_CODE (decl) == FUNCTION_DECL)
+  if (TREE_CODE (decl) == FUNCTION_DECL
+      && aarch64_is_variant_pcs (decl))
     {
-      arm_pcs pcs = (arm_pcs) fndecl_abi (decl).id ();
-      if (pcs == ARM_PCS_SIMD || pcs == ARM_PCS_SVE)
-	{
-	  fprintf (stream, "\t.variant_pcs\t");
-	  assemble_name (stream, name);
-	  fprintf (stream, "\n");
-	}
+      fprintf (stream, "\t.variant_pcs\t");
+      assemble_name (stream, name);
+      fprintf (stream, "\n");
     }
 }
 
@@ -25191,7 +25750,6 @@ aarch64_start_file (void)
 }
 
 /* Emit load exclusive.  */
-
 static void
 aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
 			     rtx mem, rtx model_rtx)
@@ -25642,6 +26200,26 @@ aarch64_float_const_representable_p (rtx x)
   return aarch64_real_float_const_representable_p (r);
 }
 
+/* Returns the string with the fmov instruction which is equivalent to an and
+   instruction with the SIMD immediate CONST_VECTOR.  */
+char*
+aarch64_output_fmov (rtx const_vector)
+{
+  bool is_valid;
+  static char templ[40];
+  char element_char;
+  unsigned int elt_bitsize;
+
+  is_valid = aarch64_simd_valid_and_imm_fmov (const_vector, &elt_bitsize);
+  gcc_assert (is_valid);
+
+  element_char = sizetochar (elt_bitsize);
+  snprintf (templ, sizeof (templ), "fmov\t%%%c0, %%%c1", element_char,
+	    element_char);
+
+  return templ;
+}
+
 /* Returns the string with the instruction for the SIMD immediate
  * CONST_VECTOR of MODE and WIDTH.  WHICH selects a move, and(bic) or orr.  */
 char*
@@ -26191,6 +26769,8 @@ aarch64_evpc_reencode (struct expand_vec_perm_d *d)
   newd.op1 = d->op1 ? gen_lowpart (new_mode, d->op1) : NULL;
   newd.testing_p = d->testing_p;
   newd.one_vector_p = d->one_vector_p;
+  newd.zero_op0_p = d->zero_op0_p;
+  newd.zero_op1_p = d->zero_op1_p;
 
   newd.perm.new_vector (newpermindices.encoding (), newd.one_vector_p ? 1 : 2,
 			newpermindices.nelts_per_input ());
@@ -26448,7 +27028,6 @@ aarch64_evpc_hvla (struct expand_vec_perm_d *d)
   machine_mode vmode = d->vmode;
   if (!TARGET_SVE2p1
       || !TARGET_NON_STREAMING
-      || BYTES_BIG_ENDIAN
       || d->vec_flags != VEC_SVE_DATA
       || GET_MODE_UNIT_BITSIZE (vmode) > 64)
     return false;
@@ -26608,12 +27187,23 @@ aarch64_evpc_tbl (struct expand_vec_perm_d *d)
 static bool
 aarch64_evpc_sve_tbl (struct expand_vec_perm_d *d)
 {
-  unsigned HOST_WIDE_INT nelt;
+  if (!d->one_vector_p)
+    {
+      /* aarch64_expand_sve_vec_perm does not yet handle variable-length
+	 vectors.  */
+      if (!d->perm.length ().is_constant ())
+	return false;
 
-  /* Permuting two variable-length vectors could overflow the
-     index range.  */
-  if (!d->one_vector_p && !d->perm.length ().is_constant (&nelt))
-    return false;
+      /* This permutation reduces to the vec_perm optab if the elements are
+	 large enough to hold all selector indices.  Do not handle that case
+	 here, since the general TBL+SUB+TBL+ORR sequence is too expensive to
+	 be considered a "native" constant permutation.
+
+	 Not doing this would undermine code that queries can_vec_perm_const_p
+	 with allow_variable_p set to false.  See PR121027.  */
+      if (selector_fits_mode_p (d->vmode, d->perm))
+	return false;
+    }
 
   if (d->testing_p)
     return true;
@@ -26774,6 +27364,40 @@ aarch64_evpc_ins (struct expand_vec_perm_d *d)
   return true;
 }
 
+/* Recognize patterns suitable for the AND instructions.  */
+static bool
+aarch64_evpc_and (struct expand_vec_perm_d *d)
+{
+  /* Either d->op0 or d->op1 should be a vector of all zeros.  */
+  if (d->one_vector_p || (!d->zero_op0_p && !d->zero_op1_p))
+    return false;
+
+  machine_mode mode = d->vmode;
+  machine_mode sel_mode;
+  if (!related_int_vector_mode (mode).exists (&sel_mode))
+    return false;
+
+  insn_code and_code = optab_handler (and_optab, sel_mode);
+  rtx and_mask = vec_perm_and_mask (sel_mode, d->perm, d->zero_op0_p);
+  if (and_code == CODE_FOR_nothing || !and_mask)
+    return false;
+
+  if (d->testing_p)
+    return true;
+
+  class expand_operand ops[3];
+  rtx in = d->zero_op0_p ? d->op1 : d->op0;
+  create_output_operand (&ops[0], gen_lowpart (sel_mode, d->target), sel_mode);
+  create_input_operand (&ops[1], gen_lowpart (sel_mode, in), sel_mode);
+  create_input_operand (&ops[2], and_mask, sel_mode);
+  expand_insn (and_code, 3, ops);
+  rtx result = gen_lowpart (mode, ops[0].value);
+  if (!rtx_equal_p (d->target, result))
+    emit_move_insn (d->target, result);
+
+  return true;
+}
+
 static bool
 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
 {
@@ -26809,6 +27433,8 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
 	    return true;
 	  else if (aarch64_evpc_uzp (d))
 	    return true;
+	  else if (aarch64_evpc_and (d))
+	    return true;
 	  else if (aarch64_evpc_trn (d))
 	    return true;
 	  else if (aarch64_evpc_sel (d))
@@ -26869,11 +27495,17 @@ aarch64_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
   d.op_mode = op_mode;
   d.op_vec_flags = aarch64_classify_vector_mode (d.op_mode);
   d.target = target;
-  d.op0 = op0 ? force_reg (op_mode, op0) : NULL_RTX;
+  d.op0 = op0;
+  if (d.op0 && !register_operand (d.op0, op_mode))
+    d.op0 = force_reg (op_mode, d.op0);
   if (op0 && d.one_vector_p)
     d.op1 = copy_rtx (d.op0);
   else
-    d.op1 = op1 ? force_reg (op_mode, op1) : NULL_RTX;
+    {
+      d.op1 = op1;
+      if (d.op1 && !register_operand (d.op1, op_mode))
+       d.op1 = force_reg (op_mode, d.op1);
+    }
   d.testing_p = !target;
 
   if (!d.testing_p)
@@ -26961,7 +27593,7 @@ aarch64_emit_sve_fp_cond (rtx target, rtx_code code, rtx pred,
 			  bool known_ptrue_p, rtx op0, rtx op1)
 {
   rtx flag = gen_int_mode (known_ptrue_p, SImode);
-  rtx unspec = gen_rtx_UNSPEC (GET_MODE (pred),
+  rtx unspec = gen_rtx_UNSPEC (GET_MODE (target),
 			       gen_rtvec (4, pred, flag, op0, op1),
 			       aarch64_unspec_cond_code (code));
   emit_set_insn (target, unspec);
@@ -26980,10 +27612,10 @@ static void
 aarch64_emit_sve_or_fp_conds (rtx target, rtx_code code1, rtx_code code2,
 			      rtx pred, bool known_ptrue_p, rtx op0, rtx op1)
 {
-  machine_mode pred_mode = GET_MODE (pred);
-  rtx tmp1 = gen_reg_rtx (pred_mode);
+  machine_mode target_mode = GET_MODE (target);
+  rtx tmp1 = gen_reg_rtx (target_mode);
   aarch64_emit_sve_fp_cond (tmp1, code1, pred, known_ptrue_p, op0, op1);
-  rtx tmp2 = gen_reg_rtx (pred_mode);
+  rtx tmp2 = gen_reg_rtx (target_mode);
   aarch64_emit_sve_fp_cond (tmp2, code2, pred, known_ptrue_p, op0, op1);
   aarch64_emit_binop (target, ior_optab, tmp1, tmp2);
 }
@@ -27000,8 +27632,7 @@ static void
 aarch64_emit_sve_invert_fp_cond (rtx target, rtx_code code, rtx pred,
 				 bool known_ptrue_p, rtx op0, rtx op1)
 {
-  machine_mode pred_mode = GET_MODE (pred);
-  rtx tmp = gen_reg_rtx (pred_mode);
+  rtx tmp = gen_reg_rtx (GET_MODE (target));
   aarch64_emit_sve_fp_cond (tmp, code, pred, known_ptrue_p, op0, op1);
   aarch64_emit_unop (target, one_cmpl_optab, tmp);
 }
@@ -27013,10 +27644,25 @@ aarch64_emit_sve_invert_fp_cond (rtx target, rtx_code code, rtx pred,
 void
 aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code, rtx op0, rtx op1)
 {
-  machine_mode pred_mode = GET_MODE (target);
   machine_mode data_mode = GET_MODE (op0);
+  rtx pred = aarch64_sve_fp_pred (data_mode, nullptr);
 
-  rtx ptrue = aarch64_ptrue_reg (pred_mode);
+  /* The governing and destination modes.  */
+  machine_mode pred_mode = GET_MODE (pred);
+  machine_mode target_mode = GET_MODE (target);
+
+  /* For partial vector modes, the choice of predicate mode depends
+     on whether we need to suppress exceptions for inactive elements.
+     If we do need to suppress exceptions, the predicate mode matches
+     the element size rather than the container size and the predicate
+     marks the upper bits in each container as inactive.  The predicate
+     is then a ptrue wrt TARGET_MODE but not wrt PRED_MODE.  It is the
+     latter which matters here.
+
+     If we don't need to suppress exceptions, the predicate mode matches
+     the container size, PRED_MODE == TARGET_MODE, and the predicate is
+     thus a ptrue wrt both TARGET_MODE and PRED_MODE.  */
+  bool known_ptrue_p = pred_mode == target_mode;
   switch (code)
     {
     case UNORDERED:
@@ -27030,12 +27676,13 @@ aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code, rtx op0, rtx op1)
     case EQ:
     case NE:
       /* There is native support for the comparison.  */
-      aarch64_emit_sve_fp_cond (target, code, ptrue, true, op0, op1);
+      aarch64_emit_sve_fp_cond (target, code, pred, known_ptrue_p, op0, op1);
       return;
 
     case LTGT:
       /* This is a trapping operation (LT or GT).  */
-      aarch64_emit_sve_or_fp_conds (target, LT, GT, ptrue, true, op0, op1);
+      aarch64_emit_sve_or_fp_conds (target, LT, GT,
+				    pred, known_ptrue_p, op0, op1);
       return;
 
     case UNEQ:
@@ -27044,7 +27691,7 @@ aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code, rtx op0, rtx op1)
 	  /* This would trap for signaling NaNs.  */
 	  op1 = force_reg (data_mode, op1);
 	  aarch64_emit_sve_or_fp_conds (target, UNORDERED, EQ,
-					ptrue, true, op0, op1);
+					pred, known_ptrue_p, op0, op1);
 	  return;
 	}
       /* fall through */
@@ -27054,11 +27701,19 @@ aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code, rtx op0, rtx op1)
     case UNGE:
       if (flag_trapping_math)
 	{
-	  /* Work out which elements are ordered.  */
-	  rtx ordered = gen_reg_rtx (pred_mode);
 	  op1 = force_reg (data_mode, op1);
-	  aarch64_emit_sve_invert_fp_cond (ordered, UNORDERED,
-					   ptrue, true, op0, op1);
+
+	  /* Work out which elements are unordered.  */
+	  rtx uo_tmp = gen_reg_rtx (target_mode);
+	  aarch64_emit_sve_fp_cond (uo_tmp, UNORDERED,
+				    pred, known_ptrue_p, op0, op1);
+
+	  /* Invert the result.  Governered by PRED so that we only
+	     flip the active bits.  */
+	  rtx ordered = gen_reg_rtx (pred_mode);
+	  uo_tmp = gen_lowpart (pred_mode, uo_tmp);
+	  emit_insn (gen_aarch64_pred_one_cmpl_z (pred_mode, ordered,
+						  pred, uo_tmp));
 
 	  /* Test the opposite condition for the ordered elements,
 	     then invert the result.  */
@@ -27083,7 +27738,8 @@ aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code, rtx op0, rtx op1)
 
   /* There is native support for the inverse comparison.  */
   code = reverse_condition_maybe_unordered (code);
-  aarch64_emit_sve_invert_fp_cond (target, code, ptrue, true, op0, op1);
+  aarch64_emit_sve_invert_fp_cond (target, code,
+				   pred, known_ptrue_p, op0, op1);
 }
 
 /* Return true if:
@@ -27688,8 +28344,7 @@ aarch64_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
       end_sequence ();
       return NULL_RTX;
     }
-  *prep_seq = get_insns ();
-  end_sequence ();
+  *prep_seq = end_sequence ();
 
   create_fixed_operand (&ops[0], op0);
   create_fixed_operand (&ops[1], op1);
@@ -27700,8 +28355,7 @@ aarch64_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
       end_sequence ();
       return NULL_RTX;
     }
-  *gen_seq = get_insns ();
-  end_sequence ();
+  *gen_seq = end_sequence ();
 
   return gen_rtx_fmt_ee (code, cc_mode,
 			 gen_rtx_REG (cc_mode, CC_REGNUM), const0_rtx);
@@ -27765,8 +28419,7 @@ aarch64_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
       end_sequence ();
       return NULL_RTX;
     }
-  *prep_seq = get_insns ();
-  end_sequence ();
+  *prep_seq = end_sequence ();
 
   target = gen_rtx_REG (cc_mode, CC_REGNUM);
   aarch64_cond = aarch64_get_condition_code_1 (cc_mode, cmp_code);
@@ -27805,8 +28458,7 @@ aarch64_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
       return NULL_RTX;
     }
 
-  *gen_seq = get_insns ();
-  end_sequence ();
+  *gen_seq = end_sequence ();
 
   return gen_rtx_fmt_ee (cmp_code, VOIDmode, target, const0_rtx);
 }
@@ -29762,60 +30414,43 @@ aarch64_can_tag_addresses ()
 
 /* Implement TARGET_ASM_FILE_END for AArch64.  This adds the AArch64 GNU NOTE
    section at the end if needed.  */
-#define GNU_PROPERTY_AARCH64_FEATURE_1_AND	0xc0000000
-#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI	(1U << 0)
-#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC	(1U << 1)
-#define GNU_PROPERTY_AARCH64_FEATURE_1_GCS	(1U << 2)
 void
 aarch64_file_end_indicate_exec_stack ()
 {
   file_end_indicate_exec_stack ();
 
-  unsigned feature_1_and = 0;
-  if (aarch_bti_enabled ())
-    feature_1_and |= GNU_PROPERTY_AARCH64_FEATURE_1_BTI;
-
-  if (aarch_ra_sign_scope != AARCH_FUNCTION_NONE)
-    feature_1_and |= GNU_PROPERTY_AARCH64_FEATURE_1_PAC;
+  /* Check whether the current assembler supports AEABI build attributes, if
+     not fallback to .note.gnu.property section.  */
+  if (HAVE_AS_AEABI_BUILD_ATTRIBUTES)
+    {
+      using namespace aarch64;
+      aeabi_subsection<BA_TagFeature_t, bool, 3>
+	aeabi_subsec ("aeabi_feature_and_bits", true);
 
-  if (aarch64_gcs_enabled ())
-    feature_1_and |= GNU_PROPERTY_AARCH64_FEATURE_1_GCS;
+      aeabi_subsec.append (
+	make_aeabi_attribute (Tag_Feature_BTI, aarch_bti_enabled ()));
+      aeabi_subsec.append (
+	make_aeabi_attribute (Tag_Feature_PAC, aarch64_pacret_enabled ()));
+      aeabi_subsec.append (
+	make_aeabi_attribute (Tag_Feature_GCS, aarch64_gcs_enabled ()));
 
-  if (feature_1_and)
+      if (!aeabi_subsec.empty ())
+	aeabi_subsec.write (asm_out_file);
+    }
+  else
     {
-      /* Generate .note.gnu.property section.  */
-      switch_to_section (get_section (".note.gnu.property",
-				      SECTION_NOTYPE, NULL));
+      aarch64::section_note_gnu_property gnu_properties;
 
-      /* PT_NOTE header: namesz, descsz, type.
-	 namesz = 4 ("GNU\0")
-	 descsz = 16 (Size of the program property array)
-		  [(12 + padding) * Number of array elements]
-	 type   = 5 (NT_GNU_PROPERTY_TYPE_0).  */
-      assemble_align (POINTER_SIZE);
-      assemble_integer (GEN_INT (4), 4, 32, 1);
-      assemble_integer (GEN_INT (ROUND_UP (12, POINTER_BYTES)), 4, 32, 1);
-      assemble_integer (GEN_INT (5), 4, 32, 1);
-
-      /* PT_NOTE name.  */
-      assemble_string ("GNU", 4);
-
-      /* PT_NOTE contents for NT_GNU_PROPERTY_TYPE_0:
-	 type   = GNU_PROPERTY_AARCH64_FEATURE_1_AND
-	 datasz = 4
-	 data   = feature_1_and.  */
-      assemble_integer (GEN_INT (GNU_PROPERTY_AARCH64_FEATURE_1_AND), 4, 32, 1);
-      assemble_integer (GEN_INT (4), 4, 32, 1);
-      assemble_integer (GEN_INT (feature_1_and), 4, 32, 1);
-
-      /* Pad the size of the note to the required alignment.  */
-      assemble_align (POINTER_SIZE);
+      if (aarch_bti_enabled ())
+	gnu_properties.bti_enabled ();
+      if (aarch64_pacret_enabled ())
+	gnu_properties.pac_enabled ();
+      if (aarch64_gcs_enabled ())
+	gnu_properties.gcs_enabled ();
+
+      gnu_properties.write ();
     }
 }
-#undef GNU_PROPERTY_AARCH64_FEATURE_1_GCS
-#undef GNU_PROPERTY_AARCH64_FEATURE_1_PAC
-#undef GNU_PROPERTY_AARCH64_FEATURE_1_BTI
-#undef GNU_PROPERTY_AARCH64_FEATURE_1_AND
 
 /* Helper function for straight line speculation.
    Return what barrier should be emitted for straight line speculation
@@ -30391,8 +31026,7 @@ aarch64_mode_emit (int entity, int mode, int prev_mode, HARD_REG_SET live)
 					 aarch64_local_sme_state (prev_mode));
       break;
     }
-  rtx_insn *seq = get_insns ();
-  end_sequence ();
+  rtx_insn *seq = end_sequence ();
 
   /* Get the set of clobbered registers that are currently live.  */
   HARD_REG_SET clobbers = {};
@@ -30802,8 +31436,7 @@ aarch64_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs,
 	    emit_insn (REGNO (x) == ZA_REGNUM
 		       ? gen_aarch64_asm_update_za (id_rtx)
 		       : gen_aarch64_asm_update_zt0 (id_rtx));
-	    seq = get_insns ();
-	    end_sequence ();
+	    seq = end_sequence ();
 
 	    auto mode = REGNO (x) == ZA_REGNUM ? VNx16QImode : V8DImode;
 	    uses.safe_push (gen_rtx_REG (mode, REGNO (x)));
@@ -30838,8 +31471,7 @@ aarch64_switch_pstate_sm_for_landing_pad (basic_block bb)
   args_switch.emit_epilogue ();
   if (guard_label)
     emit_label (guard_label);
-  auto seq = get_insns ();
-  end_sequence ();
+  auto seq = end_sequence ();
 
   emit_insn_after (seq, bb_note (bb));
   return true;
@@ -30862,8 +31494,7 @@ aarch64_switch_pstate_sm_for_jump (rtx_insn *jump)
   aarch64_switch_pstate_sm (AARCH64_ISA_MODE_SM_ON, AARCH64_ISA_MODE_SM_OFF);
   if (guard_label)
     emit_label (guard_label);
-  auto seq = get_insns ();
-  end_sequence ();
+  auto seq = end_sequence ();
 
   emit_insn_before (seq, jump);
   return true;
@@ -30897,8 +31528,7 @@ aarch64_switch_pstate_sm_for_call (rtx_call_insn *call)
   args_switch.emit_epilogue ();
   if (args_guard_label)
     emit_label (args_guard_label);
-  auto args_seq = get_insns ();
-  end_sequence ();
+  auto args_seq = end_sequence ();
   emit_insn_before (args_seq, call);
 
   if (find_reg_note (call, REG_NORETURN, NULL_RTX))
@@ -30918,8 +31548,7 @@ aarch64_switch_pstate_sm_for_call (rtx_call_insn *call)
   return_switch.emit_epilogue ();
   if (return_guard_label)
     emit_label (return_guard_label);
-  auto result_seq = get_insns ();
-  end_sequence ();
+  auto result_seq = end_sequence ();
   emit_insn_after (result_seq, call);
   return true;
 }
@@ -31294,6 +31923,79 @@ aarch64_expand_reversed_crc_using_pmull (scalar_mode crc_mode,
     }
 }
 
+/* Expand the spaceship optab for floating-point operands.
+
+   If the result is compared against (-1, 0, 1, -128), expand into
+   fcmpe + conditional branch insns.
+
+   Otherwise (the result is just stored as an integer), expand into
+   fcmpe + a sequence of conditional select/increment/invert insns.  */
+void
+aarch64_expand_fp_spaceship (rtx dest, rtx op0, rtx op1, rtx hint)
+{
+  rtx cc_reg = gen_rtx_REG (CCFPEmode, CC_REGNUM);
+  emit_set_insn (cc_reg, gen_rtx_COMPARE (CCFPEmode, op0, op1));
+
+  rtx cc_gt = gen_rtx_GT (VOIDmode, cc_reg, const0_rtx);
+  rtx cc_lt = gen_rtx_LT (VOIDmode, cc_reg, const0_rtx);
+  rtx cc_un = gen_rtx_UNORDERED (VOIDmode, cc_reg, const0_rtx);
+
+  if (hint == const0_rtx)
+    {
+      rtx un_label = gen_label_rtx ();
+      rtx lt_label = gen_label_rtx ();
+      rtx gt_label = gen_label_rtx ();
+      rtx end_label = gen_label_rtx ();
+
+      rtx temp = gen_rtx_IF_THEN_ELSE (VOIDmode, cc_un,
+			gen_rtx_LABEL_REF (Pmode, un_label), pc_rtx);
+      aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, temp));
+
+      temp = gen_rtx_IF_THEN_ELSE (VOIDmode, cc_lt,
+			gen_rtx_LABEL_REF (Pmode, lt_label), pc_rtx);
+      emit_jump_insn (gen_rtx_SET (pc_rtx, temp));
+
+      temp = gen_rtx_IF_THEN_ELSE (VOIDmode, cc_gt,
+			gen_rtx_LABEL_REF (Pmode, gt_label), pc_rtx);
+      emit_jump_insn (gen_rtx_SET (pc_rtx, temp));
+
+      /* Equality.  */
+      emit_move_insn (dest, const0_rtx);
+      emit_jump (end_label);
+
+      emit_label (un_label);
+      emit_move_insn (dest, GEN_INT (-128));
+      emit_jump (end_label);
+
+      emit_label (gt_label);
+      emit_move_insn (dest, const1_rtx);
+      emit_jump (end_label);
+
+      emit_label (lt_label);
+      emit_move_insn (dest, constm1_rtx);
+
+      emit_label (end_label);
+    }
+  else
+    {
+      rtx temp0 = gen_reg_rtx (SImode);
+      rtx temp1 = gen_reg_rtx (SImode);
+      rtx cc_ungt = gen_rtx_UNGT (VOIDmode, cc_reg, const0_rtx);
+
+      /* The value of hint is stored if the operands are unordered.  */
+      rtx temp_un = gen_int_mode (UINTVAL (hint) - 1, SImode);
+      if (!aarch64_reg_zero_or_m1_or_1 (temp_un, SImode))
+	temp_un = force_reg (SImode, temp_un);
+
+      emit_set_insn (temp0, gen_rtx_IF_THEN_ELSE (SImode, cc_lt,
+			constm1_rtx, const0_rtx));
+      emit_set_insn (temp1, gen_rtx_IF_THEN_ELSE (SImode, cc_un,
+			temp_un, const0_rtx));
+      emit_set_insn (dest, gen_rtx_IF_THEN_ELSE (SImode, cc_ungt,
+			gen_rtx_PLUS (SImode, temp1, const1_rtx), temp0));
+    }
+}
+
 /* Target-specific selftests.  */
 
 #if CHECKING_P
@@ -31468,9 +32170,43 @@ aarch64_test_sysreg_encoding_clashes (void)
 static void
 aarch64_test_sve_folding ()
 {
+  aarch64_target_switcher switcher (AARCH64_FL_SVE);
+
   tree res = fold_unary (BIT_NOT_EXPR, ssizetype,
 			 ssize_int (poly_int64 (1, 1)));
   ASSERT_TRUE (operand_equal_p (res, ssize_int (poly_int64 (-2, -1))));
+
+  auto build_v16bi = [](bool a, bool b)
+    {
+      rtx_vector_builder builder (VNx16BImode, 2, 1);
+      builder.quick_push (a ? const1_rtx : const0_rtx);
+      builder.quick_push (b ? const1_rtx : const0_rtx);
+      return builder.build ();
+    };
+  rtx v16bi_10 = build_v16bi (1, 0);
+  rtx v16bi_01 = build_v16bi (0, 1);
+
+  for (auto mode : { VNx8BImode, VNx4BImode, VNx2BImode })
+    {
+      rtx reg = gen_rtx_REG (mode, LAST_VIRTUAL_REGISTER + 1);
+      rtx subreg = lowpart_subreg (VNx16BImode, reg, mode);
+      rtx and1 = simplify_gen_binary (AND, VNx16BImode, subreg, v16bi_10);
+      ASSERT_EQ (lowpart_subreg (mode, and1, VNx16BImode), reg);
+      rtx and0 = simplify_gen_binary (AND, VNx16BImode, subreg, v16bi_01);
+      ASSERT_EQ (lowpart_subreg (mode, and0, VNx16BImode), CONST0_RTX (mode));
+
+      rtx ior1 = simplify_gen_binary (IOR, VNx16BImode, subreg, v16bi_10);
+      ASSERT_EQ (lowpart_subreg (mode, ior1, VNx16BImode), CONSTM1_RTX (mode));
+      rtx ior0 = simplify_gen_binary (IOR, VNx16BImode, subreg, v16bi_01);
+      ASSERT_EQ (lowpart_subreg (mode, ior0, VNx16BImode), reg);
+
+      rtx xor1 = simplify_gen_binary (XOR, VNx16BImode, subreg, v16bi_10);
+      ASSERT_RTX_EQ (lowpart_subreg (mode, xor1, VNx16BImode),
+		     lowpart_subreg (mode, gen_rtx_NOT (VNx16BImode, subreg),
+				     VNx16BImode));
+      rtx xor0 = simplify_gen_binary (XOR, VNx16BImode, subreg, v16bi_01);
+      ASSERT_EQ (lowpart_subreg (mode, xor0, VNx16BImode), reg);
+    }
 }
 
 /* Run all target-specific selftests.  */
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index e8bd8c7..2b3610c 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -410,6 +410,10 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
 /* CSSC instructions are enabled through +cssc.  */
 #define TARGET_CSSC AARCH64_HAVE_ISA (CSSC)
 
+/* CB<cc> instructions are enabled through +cmpbr,
+   but are incompatible with -mtrack-speculation. */
+#define TARGET_CMPBR (AARCH64_HAVE_ISA (CMPBR) && !aarch64_track_speculation)
+
 /* Make sure this is always defined so we don't have to check for ifdefs
    but rather use normal ifs.  */
 #ifndef TARGET_FIX_ERR_A53_835769_DEFAULT
@@ -490,6 +494,11 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
   (bool (aarch64_tune_params.extra_tuning_flags \
 	 & AARCH64_EXTRA_TUNE_CHEAP_FPMR_WRITE))
 
+/* Enable folding address computation into LDAPUR when RCPC2 is available.  */
+#define TARGET_ENABLE_LDAPUR (TARGET_RCPC2 \
+			      && !(aarch64_tune_params.extra_tuning_flags \
+				   & AARCH64_EXTRA_TUNE_AVOID_LDAPUR))
+
 /* Combinatorial tests.  */
 
 #define TARGET_SVE2_OR_SME2 \
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 031e621..6e215c4 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -136,6 +136,14 @@
     ;; The actual value can sometimes vary, because it does not track
     ;; changes to PSTATE.ZA that happen during a lazy save and restore.
     ;; Those effects are instead tracked by ZA_SAVED_REGNUM.
+    ;;
+    ;; Sequences also write to this register if they synchronize the
+    ;; actual contents of ZA and PSTATE.ZA with the current function's
+    ;; ZA_REGNUM and SME_STATE_REGNUM.  Conceptually, these extra writes
+    ;; do not change the value of SME_STATE_REGNUM.  They simply act as
+    ;; sequencing points.  They means that all direct accesses to ZA can
+    ;; depend only on ZA_REGNUM and SME_STATE_REGNUM, rather than also
+    ;; depending on ZA_SAVED_REGNUM etc.
     (SME_STATE_REGNUM 89)
 
     ;; Instructions write to this register if they set TPIDR2_EL0 to a
@@ -272,6 +280,7 @@
     UNSPEC_PACIBSP
     UNSPEC_PRLG_STK
     UNSPEC_REV
+    UNSPEC_REV_PRED
     UNSPEC_SADALP
     UNSPEC_SCVTF
     UNSPEC_SET_LANE
@@ -432,6 +441,16 @@
    ; must not operate on inactive inputs if doing so could induce a fault.
    (SVE_STRICT_GP 1)])
 
+;; These constants are used as a const_int in MTE instructions
+(define_constants
+  [; 0xf0ff...
+   ; Tag mask for the 4-bit tag stored in the top 8 bits of a pointer.
+   (MEMTAG_TAG_MASK -1080863910568919041)
+
+   ;  0x00ff...
+   ; Tag mask 56-bit address used by subp instruction.
+   (MEMTAG_ADDR_MASK 72057594037927935)])
+
 (include "constraints.md")
 (include "predicates.md")
 (include "iterators.md")
@@ -474,6 +493,8 @@
 ;; clobber for SVE predicates.
 (define_attr "pred_clobber" "any,no,yes" (const_string "any"))
 
+(define_attr "enable_ldapur" "any,no,yes" (const_string "any"))
+
 ;; [For compatibility with Arm in pipeline models]
 ;; Attribute that specifies whether or not the instruction touches fp
 ;; registers.
@@ -498,7 +519,14 @@
 	  (eq_attr "pred_clobber" "yes")
 	  (match_test "TARGET_SVE_PRED_CLOBBER"))
 	(eq_attr "pred_clobber" "any"))
-
+      (ior
+	(and
+	  (eq_attr "enable_ldapur" "yes")
+	  (match_test "TARGET_ENABLE_LDAPUR"))
+	(and
+	  (eq_attr "enable_ldapur" "no")
+	  (match_test "!TARGET_ENABLE_LDAPUR"))
+	(eq_attr "enable_ldapur" "any"))
       (ior
 	(eq_attr "arch" "any")
 
@@ -561,9 +589,7 @@
 ;; Attribute that specifies whether we are dealing with a branch to a
 ;; label that is far away, i.e. further away than the maximum/minimum
 ;; representable in a signed 21-bits number.
-;; 0 :=: no
-;; 1 :=: yes
-(define_attr "far_branch" "" (const_int 0))
+(define_attr "far_branch" "no,yes" (const_string "no"))
 
 ;; Attribute that specifies whether the alternative uses MOVPRFX.
 (define_attr "movprfx" "no,yes" (const_string "no"))
@@ -674,6 +700,10 @@
  "msrr\t%x0, %x1, %H1"
 )
 
+;; -------------------------------------------------------------------
+;; Unconditional jumps
+;; -------------------------------------------------------------------
+
 (define_insn "indirect_jump"
   [(set (pc) (match_operand:DI 0 "register_operand" "r"))]
   ""
@@ -692,43 +722,345 @@
   [(set_attr "type" "branch")]
 )
 
-(define_expand "cbranch<mode>4"
+;; Maximum PC-relative positive/negative displacements for various branching
+;; instructions.
+(define_constants
+  [
+    ;; +/- 1MiB.  Used by B.<cond>, CBZ, CBNZ.
+    (BRANCH_LEN_P_1MiB  1048572)
+    (BRANCH_LEN_N_1MiB -1048576)
+
+    ;; +/- 32KiB.  Used by TBZ, TBNZ.
+    (BRANCH_LEN_P_32KiB  32764)
+    (BRANCH_LEN_N_32KiB -32768)
+
+    ;; +/- 1KiB.  Used by CBB<cond>, CBH<cond>, CB<cond>.
+    (BRANCH_LEN_P_1KiB  1020)
+    (BRANCH_LEN_N_1KiB -1024)
+  ]
+)
+
+;; -------------------------------------------------------------------
+;; Conditional jumps
+;; -------------------------------------------------------------------
+
+;; The order of the rules below is important.
+;; Higher priority rules are preferred because they can express larger
+;; displacements.
+;; 1) EQ/NE comparisons against zero are handled by CBZ/CBNZ.
+;; 2) LT/GE comparisons against zero are handled by TBZ/TBNZ.
+;; 3) When the CMPBR extension is enabled:
+;;   a) Comparisons between two registers are handled by
+;;      CBB<cond>/CBH<cond>/CB<cond>.
+;;   b) Comparisons between a GP register and an in range immediate are
+;;      handled by CB<cond> (immediate).
+;; 4) Otherwise, emit a CMP+B<cond> sequence.
+;; -------------------------------------------------------------------
+
+(define_expand "cbranch<GPI:mode>4"
   [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
 			    [(match_operand:GPI 1 "register_operand")
 			     (match_operand:GPI 2 "aarch64_plus_operand")])
-			   (label_ref (match_operand 3 "" ""))
+			   (label_ref (match_operand 3))
 			   (pc)))]
   ""
-  "
-  operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1],
-					 operands[2]);
-  operands[2] = const0_rtx;
-  "
+  {
+    if (TARGET_CMPBR && aarch64_cb_rhs (GET_CODE (operands[0]), operands[2]))
+      {
+	/* The branch is supported natively.  */
+      }
+    else
+      {
+        operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]),
+					       operands[1], operands[2]);
+        operands[2] = const0_rtx;
+      }
+  }
 )
 
-(define_expand "cbranch<mode>4"
+(define_expand "cbranch<SHORT:mode>4"
   [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
-			    [(match_operand:GPF 1 "register_operand")
-			     (match_operand:GPF 2 "aarch64_fp_compare_operand")])
-			   (label_ref (match_operand 3 "" ""))
+			    [(match_operand:SHORT 1 "register_operand")
+			     (match_operand:SHORT 2 "aarch64_reg_or_zero")])
+			   (label_ref (match_operand 3))
 			   (pc)))]
+  "TARGET_CMPBR"
   ""
-  "
-  operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1],
-					 operands[2]);
-  operands[2] = const0_rtx;
-  "
+)
+
+(define_expand "cbranch<mode>4"
+  [(set (pc) (if_then_else
+		(match_operator 0 "aarch64_comparison_operator"
+		 [(match_operand:GPF_F16 1 "register_operand")
+		  (match_operand:GPF_F16 2 "aarch64_fp_compare_operand")])
+		(label_ref (match_operand 3))
+		(pc)))]
+  ""
+  {
+    operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1],
+					   operands[2]);
+    operands[2] = const0_rtx;
+  }
 )
 
 (define_expand "cbranchcc4"
+  [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
+			    [(match_operand 1 "cc_register")
+			     (match_operand 2 "const0_operand")])
+			   (label_ref (match_operand 3))
+			   (pc)))]
+  ""
+  ""
+)
+
+;; For an EQ/NE comparison against zero, emit `CBZ`/`CBNZ`
+(define_insn "*aarch64_cbz<optab><mode>"
+  [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r")
+				(const_int 0))
+			   (label_ref (match_operand 1))
+			   (pc)))]
+  "!aarch64_track_speculation"
+  {
+    if (get_attr_length (insn) == 8)
+      return aarch64_gen_far_branch (operands, 1, "Lcb", "<inv_cb>\\t%<w>0, ");
+    else
+      return "<cbz>\\t%<w>0, %l1";
+  }
+  [(set_attr "type" "branch")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 1) (pc))
+			       (const_int BRANCH_LEN_N_1MiB))
+			   (lt (minus (match_dup 1) (pc))
+			       (const_int BRANCH_LEN_P_1MiB)))
+		      (const_int 4)
+		      (const_int 8)))
+   (set (attr "far_branch")
+	(if_then_else (and (ge (minus (match_dup 2) (pc))
+			       (const_int BRANCH_LEN_N_1MiB))
+			   (lt (minus (match_dup 2) (pc))
+			       (const_int BRANCH_LEN_P_1MiB)))
+		      (const_string "no")
+		      (const_string "yes")))]
+)
+
+;; For an LT/GE comparison against zero, emit `TBZ`/`TBNZ`
+(define_insn "*aarch64_tbz<optab><mode>1"
+  [(set (pc) (if_then_else (LTGE (match_operand:ALLI 0 "register_operand" "r")
+				 (const_int 0))
+			   (label_ref (match_operand 1))
+			   (pc)))]
+  "!aarch64_track_speculation"
+  {
+    if (get_attr_length (insn) == 4)
+      return "<tbz>\t%<w>0, <sizem1>, %l1";
+    return aarch64_gen_far_branch (operands, 1, "Ltb",
+				   "<inv_tb>\\t%<w>0, <sizem1>, ");
+  }
+  [(set_attr "type" "branch")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 1) (pc))
+			       (const_int BRANCH_LEN_N_32KiB))
+			   (lt (minus (match_dup 1) (pc))
+			       (const_int BRANCH_LEN_P_32KiB)))
+		      (const_int 4)
+		      (const_int 8)))
+   (set (attr "far_branch")
+	(if_then_else (and (ge (minus (match_dup 1) (pc))
+			       (const_int BRANCH_LEN_N_32KiB))
+			   (lt (minus (match_dup 1) (pc))
+			       (const_int BRANCH_LEN_P_32KiB)))
+		      (const_string "no")
+		      (const_string "yes")))]
+)
+
+;; Emit a `CB<cond> (register)` or `CB<cond> (immediate)` instruction.
+;; The immediate range depends on the comparison code.
+(define_insn "*aarch64_cb<code><mode>"
   [(set (pc) (if_then_else
-	      (match_operator 0 "aarch64_comparison_operator"
-	       [(match_operand 1 "cc_register")
-	        (match_operand 2 "const0_operand")])
-	      (label_ref (match_operand 3 "" ""))
-	      (pc)))]
+		(INT_CMP
+		  (match_operand:GPI 0 "register_operand" "r")
+		  (match_operand:GPI 1
+		    "aarch64_reg_<cmpbr_imm_constraint>_operand"
+		    "r<cmpbr_imm_constraint>"))
+		(label_ref (match_operand 2))
+		(pc)))]
+  "TARGET_CMPBR"
+  {
+    if (get_attr_length (insn) == 4)
+      return "cb<cmp_op>\t%<w>0, %<w>1, %l2";
+    return aarch64_gen_far_branch (operands, 2, "L",
+		"cb<inv_cmp_op>\t%<w>0, %<w>1, ");
+  }
+  [(set_attr "type" "branch")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 2) (pc))
+			       (const_int BRANCH_LEN_N_1KiB))
+			   (lt (minus (match_dup 2) (pc))
+			       (const_int BRANCH_LEN_P_1KiB)))
+		      (const_int 4)
+		      (const_int 8)))
+   (set (attr "far_branch")
+	(if_then_else (and (ge (minus (match_dup 2) (pc))
+			       (const_int BRANCH_LEN_N_1KiB))
+			   (lt (minus (match_dup 2) (pc))
+			       (const_int BRANCH_LEN_P_1KiB)))
+		      (const_string "no")
+		      (const_string "yes")))]
+)
+
+;; Emit a `CBB<cond> (register)` or `CBH<cond> (register)` instruction.
+(define_insn "aarch64_cb<INT_CMP:code><SHORT:mode>"
+  [(set (pc) (if_then_else (INT_CMP
+			     (match_operand:SHORT 0 "register_operand" "r")
+			     (match_operand:SHORT 1 "aarch64_reg_or_zero" "rZ"))
+			   (label_ref (match_operand 2))
+			   (pc)))]
+  "TARGET_CMPBR"
+  {
+    return (get_attr_far_branch (insn) == FAR_BRANCH_NO)
+      ? "cb<SHORT:cmpbr_suffix><INT_CMP:cmp_op>\\t%<w>0, %<w>1, %l2"
+      : aarch64_gen_far_branch (operands, 2, "L",
+          "cb<SHORT:cmpbr_suffix><INT_CMP:inv_cmp_op>\\t%<w>0, %<w>1, ");
+  }
+  [(set_attr "type" "branch")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 2) (pc))
+			       (const_int BRANCH_LEN_N_1KiB))
+			   (lt (minus (match_dup 2) (pc))
+			       (const_int BRANCH_LEN_P_1KiB)))
+		      (const_int 4)
+		      (const_int 8)))
+   (set (attr "far_branch")
+	(if_then_else (and (ge (minus (match_dup 2) (pc))
+			       (const_int BRANCH_LEN_N_1KiB))
+			   (lt (minus (match_dup 2) (pc))
+			       (const_int BRANCH_LEN_P_1KiB)))
+		      (const_string "no")
+		      (const_string "yes")))]
+)
+
+;; Emit `B<cond>`, assuming that the condition is already in the CC register.
+(define_insn "aarch64_bcond"
+  [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
+			    [(match_operand 1 "cc_register")
+			     (const_int 0)])
+			   (label_ref (match_operand 2))
+			   (pc)))]
   ""
-  "")
+  {
+    /* GCC's traditional style has been to use "beq" instead of "b.eq", etc.,
+       but the "." is required for SVE conditions.  */
+    bool use_dot_p = GET_MODE (operands[1]) == CC_NZCmode;
+    if (get_attr_length (insn) == 8)
+      return aarch64_gen_far_branch (operands, 2, "Lbcond",
+				     use_dot_p ? "b.%M0\\t" : "b%M0\\t");
+    else
+      return use_dot_p ? "b.%m0\\t%l2" : "b%m0\\t%l2";
+  }
+  [(set_attr "type" "branch")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 2) (pc))
+			       (const_int BRANCH_LEN_N_1MiB))
+			   (lt (minus (match_dup 2) (pc))
+			       (const_int BRANCH_LEN_P_1MiB)))
+		      (const_int 4)
+		      (const_int 8)))
+   (set (attr "far_branch")
+	(if_then_else (and (ge (minus (match_dup 2) (pc))
+			       (const_int BRANCH_LEN_N_1MiB))
+			   (lt (minus (match_dup 2) (pc))
+			       (const_int BRANCH_LEN_P_1MiB)))
+		      (const_string "no")
+		      (const_string "yes")))]
+)
+
+;; For a 24-bit immediate CST we can optimize the compare for equality.
+(define_insn_and_split "*aarch64_bcond_wide_imm<GPI:mode>"
+  [(set (pc) (if_then_else
+	       (match_operator 0 "aarch64_equality_operator"
+		[(match_operand:GPI 1 "register_operand" "r")
+	         (match_operand:GPI 2 "aarch64_split_imm24" "n")])
+	       (label_ref (match_operand 3))
+	       (pc)))
+   (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:GPI 4 "=r"))]
+  ""
+  "#"
+  ""
+  [(const_int 0)]
+  {
+    rtx cc_reg = aarch64_gen_compare_split_imm24 (operands[1], operands[2],
+						  operands[4]);
+    emit_jump_insn (gen_aarch64_bcond (operands[0], cc_reg, operands[3]));
+    DONE;
+  }
+)
+
+;; -------------------------------------------------------------------
+;; Test bit and branch
+;; -------------------------------------------------------------------
+
+(define_expand "tbranch_<code><mode>3"
+  [(set (pc) (if_then_else (EQL
+			     (match_operand:SHORT 0 "register_operand")
+			     (match_operand 1 "const0_operand"))
+			   (label_ref (match_operand 2 ""))
+			   (pc)))]
+  ""
+{
+  rtx bitvalue = gen_reg_rtx (<ZEROM>mode);
+  rtx reg = gen_lowpart (<ZEROM>mode, operands[0]);
+  rtx val = gen_int_mode (HOST_WIDE_INT_1U << UINTVAL (operands[1]),
+			  <MODE>mode);
+  emit_insn (gen_and<zerom>3 (bitvalue, reg, val));
+  operands[1] = const0_rtx;
+  operands[0] = aarch64_gen_compare_reg (<CODE>, bitvalue,
+					 operands[1]);
+})
+
+(define_insn "@aarch64_tbz<optab><ALLI:mode><GPI:mode>"
+  [(set (pc) (if_then_else (EQL
+			     (zero_extract:GPI
+			       (match_operand:ALLI 0 "register_operand" "r")
+			       (const_int 1)
+			       (match_operand 1 "aarch64_simd_shift_imm_<ALLI:mode>" "n"))
+			     (const_int 0))
+			   (label_ref (match_operand 2))
+			   (pc)))
+   (clobber (reg:CC CC_REGNUM))]
+  "!aarch64_track_speculation"
+  {
+    if (get_attr_length (insn) == 8)
+      {
+	if (get_attr_far_branch (insn) == 1)
+	  return aarch64_gen_far_branch (operands, 2, "Ltb",
+					 "<inv_tb>\\t%<ALLI:w>0, %1, ");
+	else
+	  {
+	    operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1]));
+	    return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2";
+	  }
+      }
+    else
+      return "<tbz>\t%<ALLI:w>0, %1, %l2";
+  }
+  [(set_attr "type" "branch")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 2) (pc))
+			       (const_int BRANCH_LEN_N_32KiB))
+			   (lt (minus (match_dup 2) (pc))
+			       (const_int BRANCH_LEN_P_32KiB)))
+		      (const_int 4)
+		      (const_int 8)))
+   (set (attr "far_branch")
+	(if_then_else (and (ge (minus (match_dup 2) (pc))
+			       (const_int BRANCH_LEN_N_1MiB))
+			   (lt (minus (match_dup 2) (pc))
+			       (const_int BRANCH_LEN_P_1MiB)))
+		      (const_string "no")
+		      (const_string "yes")))]
+
+)
 
 (define_insn "@ccmp<CC_ONLY:mode><GPI:mode>"
   [(set (match_operand:CC_ONLY 1 "cc_register")
@@ -860,71 +1192,6 @@
   }
 )
 
-(define_insn "condjump"
-  [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
-			    [(match_operand 1 "cc_register" "") (const_int 0)])
-			   (label_ref (match_operand 2 "" ""))
-			   (pc)))]
-  ""
-  {
-    /* GCC's traditional style has been to use "beq" instead of "b.eq", etc.,
-       but the "." is required for SVE conditions.  */
-    bool use_dot_p = GET_MODE (operands[1]) == CC_NZCmode;
-    if (get_attr_length (insn) == 8)
-      return aarch64_gen_far_branch (operands, 2, "Lbcond",
-				     use_dot_p ? "b.%M0\\t" : "b%M0\\t");
-    else
-      return use_dot_p ? "b.%m0\\t%l2" : "b%m0\\t%l2";
-  }
-  [(set_attr "type" "branch")
-   (set (attr "length")
-	(if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576))
-			   (lt (minus (match_dup 2) (pc)) (const_int 1048572)))
-		      (const_int 4)
-		      (const_int 8)))
-   (set (attr "far_branch")
-	(if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576))
-			   (lt (minus (match_dup 2) (pc)) (const_int 1048572)))
-		      (const_int 0)
-		      (const_int 1)))]
-)
-
-;; For a 24-bit immediate CST we can optimize the compare for equality
-;; and branch sequence from:
-;; 	mov	x0, #imm1
-;; 	movk	x0, #imm2, lsl 16 /* x0 contains CST.  */
-;; 	cmp	x1, x0
-;; 	b<ne,eq> .Label
-;; into the shorter:
-;; 	sub	x0, x1, #(CST & 0xfff000)
-;; 	subs	x0, x0, #(CST & 0x000fff)
-;; 	b<ne,eq> .Label
-(define_insn_and_split "*compare_condjump<GPI:mode>"
-  [(set (pc) (if_then_else (EQL
-			      (match_operand:GPI 0 "register_operand" "r")
-			      (match_operand:GPI 1 "aarch64_imm24" "n"))
-			   (label_ref:P (match_operand 2 "" ""))
-			   (pc)))]
-  "!aarch64_move_imm (INTVAL (operands[1]), <GPI:MODE>mode)
-   && !aarch64_plus_operand (operands[1], <GPI:MODE>mode)
-   && !reload_completed"
-  "#"
-  "&& true"
-  [(const_int 0)]
-  {
-    HOST_WIDE_INT lo_imm = UINTVAL (operands[1]) & 0xfff;
-    HOST_WIDE_INT hi_imm = UINTVAL (operands[1]) & 0xfff000;
-    rtx tmp = gen_reg_rtx (<GPI:MODE>mode);
-    emit_insn (gen_add<GPI:mode>3 (tmp, operands[0], GEN_INT (-hi_imm)));
-    emit_insn (gen_add<GPI:mode>3_compare0 (tmp, tmp, GEN_INT (-lo_imm)));
-    rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
-    rtx cmp_rtx = gen_rtx_fmt_ee (<EQL:CMP>, <GPI:MODE>mode,
-				  cc_reg, const0_rtx);
-    emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, operands[2]));
-    DONE;
-  }
-)
-
 (define_expand "casesi"
   [(match_operand:SI 0 "register_operand")	; Index
    (match_operand:SI 1 "const_int_operand")	; Lower bound
@@ -1116,127 +1383,6 @@
    (set_attr "sls_length" "retbr")]
 )
 
-(define_insn "aarch64_cb<optab><mode>1"
-  [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r")
-				(const_int 0))
-			   (label_ref (match_operand 1 "" ""))
-			   (pc)))]
-  "!aarch64_track_speculation"
-  {
-    if (get_attr_length (insn) == 8)
-      return aarch64_gen_far_branch (operands, 1, "Lcb", "<inv_cb>\\t%<w>0, ");
-    else
-      return "<cbz>\\t%<w>0, %l1";
-  }
-  [(set_attr "type" "branch")
-   (set (attr "length")
-	(if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -1048576))
-			   (lt (minus (match_dup 1) (pc)) (const_int 1048572)))
-		      (const_int 4)
-		      (const_int 8)))
-   (set (attr "far_branch")
-	(if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576))
-			   (lt (minus (match_dup 2) (pc)) (const_int 1048572)))
-		      (const_int 0)
-		      (const_int 1)))]
-)
-
-(define_expand "tbranch_<code><mode>3"
-  [(set (pc) (if_then_else
-              (EQL (match_operand:SHORT 0 "register_operand")
-                   (match_operand 1 "const0_operand"))
-              (label_ref (match_operand 2 ""))
-              (pc)))]
-  ""
-{
-  rtx bitvalue = gen_reg_rtx (<ZEROM>mode);
-  rtx reg = gen_lowpart (<ZEROM>mode, operands[0]);
-  rtx val = gen_int_mode (HOST_WIDE_INT_1U << UINTVAL (operands[1]), <MODE>mode);
-  emit_insn (gen_and<zerom>3 (bitvalue, reg, val));
-  operands[1] = const0_rtx;
-  operands[0] = aarch64_gen_compare_reg (<CODE>, bitvalue,
-					 operands[1]);
-})
-
-(define_insn "@aarch64_tb<optab><ALLI:mode><GPI:mode>"
-  [(set (pc) (if_then_else
-	      (EQL (zero_extract:GPI (match_operand:ALLI 0 "register_operand" "r")
-				     (const_int 1)
-				     (match_operand 1
-				       "aarch64_simd_shift_imm_<ALLI:mode>" "n"))
-		   (const_int 0))
-	     (label_ref (match_operand 2 "" ""))
-	     (pc)))
-   (clobber (reg:CC CC_REGNUM))]
-  "!aarch64_track_speculation"
-  {
-    if (get_attr_length (insn) == 8)
-      {
-	if (get_attr_far_branch (insn) == 1)
-	  return aarch64_gen_far_branch (operands, 2, "Ltb",
-					 "<inv_tb>\\t%<ALLI:w>0, %1, ");
-	else
-	  {
-	    operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1]));
-	    return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2";
-	  }
-      }
-    else
-      return "<tbz>\t%<ALLI:w>0, %1, %l2";
-  }
-  [(set_attr "type" "branch")
-   (set (attr "length")
-	(if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -32768))
-			   (lt (minus (match_dup 2) (pc)) (const_int 32764)))
-		      (const_int 4)
-		      (const_int 8)))
-   (set (attr "far_branch")
-	(if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576))
-			   (lt (minus (match_dup 2) (pc)) (const_int 1048572)))
-		      (const_int 0)
-		      (const_int 1)))]
-
-)
-
-(define_insn "*cb<optab><mode>1"
-  [(set (pc) (if_then_else (LTGE (match_operand:ALLI 0 "register_operand" "r")
-				 (const_int 0))
-			   (label_ref (match_operand 1 "" ""))
-			   (pc)))
-   (clobber (reg:CC CC_REGNUM))]
-  "!aarch64_track_speculation"
-  {
-    if (get_attr_length (insn) == 8)
-      {
-	if (get_attr_far_branch (insn) == 1)
-	  return aarch64_gen_far_branch (operands, 1, "Ltb",
-					 "<inv_tb>\\t%<w>0, <sizem1>, ");
-	else
-	  {
-	    char buf[64];
-	    uint64_t val = ((uint64_t) 1)
-		<< (GET_MODE_SIZE (<MODE>mode) * BITS_PER_UNIT - 1);
-	    sprintf (buf, "tst\t%%<w>0, %" PRId64, val);
-	    output_asm_insn (buf, operands);
-	    return "<bcond>\t%l1";
-	  }
-      }
-    else
-      return "<tbz>\t%<w>0, <sizem1>, %l1";
-  }
-  [(set_attr "type" "branch")
-   (set (attr "length")
-	(if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -32768))
-			   (lt (minus (match_dup 1) (pc)) (const_int 32764)))
-		      (const_int 4)
-		      (const_int 8)))
-   (set (attr "far_branch")
-	(if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -1048576))
-			   (lt (minus (match_dup 1) (pc)) (const_int 1048572)))
-		      (const_int 0)
-		      (const_int 1)))]
-)
-
 (define_expand "save_stack_nonlocal"
   [(set (match_operand 0 "memory_operand")
         (match_operand 1 "register_operand"))]
@@ -1250,16 +1396,16 @@
       /* Save GCS with code like
 		mov     x16, 1
 		chkfeat x16
-		tbnz    x16, 0, .L_done
+		cbnz    x16, .L_done
 		mrs     tmp, gcspr_el0
 		str     tmp, [%0, 8]
 	.L_done:  */
 
-      rtx done_label = gen_label_rtx ();
+      auto done_label = gen_label_rtx ();
       rtx r16 = gen_rtx_REG (DImode, R16_REGNUM);
       emit_move_insn (r16, const1_rtx);
       emit_insn (gen_aarch64_chkfeat ());
-      emit_insn (gen_tbranch_neqi3 (r16, const0_rtx, done_label));
+      emit_jump_insn (aarch64_gen_compare_zero_and_branch (NE, r16, done_label));
       rtx gcs_slot = adjust_address (operands[0], Pmode, GET_MODE_SIZE (Pmode));
       rtx gcs = gen_reg_rtx (Pmode);
       emit_insn (gen_aarch64_load_gcspr (gcs));
@@ -1282,7 +1428,7 @@
       /* Restore GCS with code like
 		mov     x16, 1
 		chkfeat x16
-		tbnz    x16, 0, .L_done
+		cbnz    x16, .L_done
 		ldr     tmp1, [%1, 8]
 		mrs     tmp2, gcspr_el0
 		subs    tmp2, tmp1, tmp2
@@ -1293,12 +1439,12 @@
 		b.ne    .L_loop
 	.L_done:  */
 
-      rtx loop_label = gen_label_rtx ();
-      rtx done_label = gen_label_rtx ();
+      auto loop_label = gen_label_rtx ();
+      auto done_label = gen_label_rtx ();
       rtx r16 = gen_rtx_REG (DImode, R16_REGNUM);
       emit_move_insn (r16, const1_rtx);
       emit_insn (gen_aarch64_chkfeat ());
-      emit_insn (gen_tbranch_neqi3 (r16, const0_rtx, done_label));
+      emit_jump_insn (aarch64_gen_compare_zero_and_branch (NE, r16, done_label));
       rtx gcs_slot = adjust_address (operands[1], Pmode, GET_MODE_SIZE (Pmode));
       rtx gcs_old = gen_reg_rtx (Pmode);
       emit_move_insn (gcs_old, gcs_slot);
@@ -1307,13 +1453,13 @@
       emit_insn (gen_subdi3_compare1 (gcs_now, gcs_old, gcs_now));
       rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
       rtx cmp_rtx = gen_rtx_fmt_ee (EQ, DImode, cc_reg, const0_rtx);
-      emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, done_label));
+      emit_jump_insn (gen_aarch64_bcond (cmp_rtx, cc_reg, done_label));
       emit_label (loop_label);
       emit_insn (gen_aarch64_gcspopm_xzr ());
       emit_insn (gen_adddi3_compare0 (gcs_now, gcs_now, GEN_INT (-8)));
       cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
       cmp_rtx = gen_rtx_fmt_ee (NE, DImode, cc_reg, const0_rtx);
-      emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, loop_label));
+      emit_jump_insn (gen_aarch64_bcond (cmp_rtx, cc_reg, loop_label));
       emit_label (done_label);
     }
   DONE;
@@ -2181,9 +2327,9 @@
   "aarch64_mem_pair_offset (operands[4], <MODE>mode)
    && known_eq (INTVAL (operands[5]),
 		INTVAL (operands[4]) + GET_MODE_SIZE (<MODE>mode))"
-  {@ [cons: =&0, 1, =2, =3; attrs: type     ]
-     [       rk, 0,  r,  r; load_<ldpstp_sz>] ldp\t%<w>2, %<w>3, [%0, %4]!
-     [       rk, 0,  w,  w; neon_load1_2reg ] ldp\t%<v>2, %<v>3, [%0, %4]!
+  {@ [cons: =0,   1, =2, =3; attrs: type     ]
+     [       &rk, 0,  r,  r; load_<ldpstp_sz>] ldp\t%<w>2, %<w>3, [%0, %4]!
+     [       rk,  0,  w,  w; neon_load1_2reg ] ldp\t%<v>2, %<v>3, [%0, %4]!
   }
 )
 
@@ -2237,9 +2383,9 @@
 		INTVAL (operands[4]) + GET_MODE_SIZE (<MODE>mode))
    && !reg_overlap_mentioned_p (operands[0], operands[2])
    && !reg_overlap_mentioned_p (operands[0], operands[3])"
-  {@ [cons: =&0, 1,   2,   3; attrs: type      ]
-     [       rk, 0, rYZ, rYZ; store_<ldpstp_sz>] stp\t%<w>2, %<w>3, [%0, %4]!
-     [       rk, 0,   w,   w; neon_store1_2reg ] stp\t%<v>2, %<v>3, [%0, %4]!
+  {@ [cons: =0, 1,   2,   3; attrs: type      ]
+     [     &rk, 0, rYZ, rYZ; store_<ldpstp_sz>] stp\t%<w>2, %<w>3, [%0, %4]!
+     [      rk, 0,   w,   w; neon_store1_2reg ] stp\t%<v>2, %<v>3, [%0, %4]!
   }
 )
 
@@ -2485,15 +2631,15 @@
      (match_operand:GPI 1 "register_operand")
      (match_operand:GPI 2 "aarch64_pluslong_operand")))]
   ""
-  {@ [ cons: =0 , 1   , 2   ; attrs: type , arch  ]
-     [ rk       , %rk , I   ; alu_imm     , *     ] add\t%<w>0, %<w>1, %2
-     [ rk       , rk  , r   ; alu_sreg    , *     ] add\t%<w>0, %<w>1, %<w>2
-     [ w        , w   , w   ; neon_add    , simd  ] add\t%<rtn>0<vas>, %<rtn>1<vas>, %<rtn>2<vas>
-     [ rk       , rk  , J   ; alu_imm     , *     ] sub\t%<w>0, %<w>1, #%n2
-     [ r        , rk  , Uaa ; multiple    , *     ] #
-     [ r        , 0   , Uai ; alu_imm     , sve   ] << aarch64_output_sve_scalar_inc_dec (operands[2]);
-     [ rk       , rk  , Uav ; alu_imm     , sve   ] << aarch64_output_sve_addvl_addpl (operands[2]);
-     [ rk       , rk  , UaV ; alu_imm     , sme   ] << aarch64_output_addsvl_addspl (operands[2]);
+  {@ [ cons: =0 , %1 , 2   ; attrs: type , arch  ]
+     [ rk       , rk , I   ; alu_imm     , *     ] add\t%<w>0, %<w>1, %2
+     [ rk       , rk , r   ; alu_sreg    , *     ] add\t%<w>0, %<w>1, %<w>2
+     [ w        , w  , w   ; neon_add    , simd  ] add\t%<rtn>0<vas>, %<rtn>1<vas>, %<rtn>2<vas>
+     [ rk       , rk , J   ; alu_imm     , *     ] sub\t%<w>0, %<w>1, #%n2
+     [ r        , rk , Uaa ; multiple    , *     ] #
+     [ r        , 0  , Uai ; alu_imm     , sve   ] << aarch64_output_sve_scalar_inc_dec (operands[2]);
+     [ rk       , rk , Uav ; alu_imm     , sve   ] << aarch64_output_sve_addvl_addpl (operands[2]);
+     [ rk       , rk , UaV ; alu_imm     , sme   ] << aarch64_output_addsvl_addspl (operands[2]);
   }
   ;; The "alu_imm" types for INC/DEC and ADDVL/ADDPL are just placeholders.
 )
@@ -2506,11 +2652,11 @@
      (plus:SI (match_operand:SI 1 "register_operand")
 	      (match_operand:SI 2 "aarch64_pluslong_operand"))))]
   ""
-  {@ [ cons: =0 , 1   , 2   ; attrs: type ]
-     [ rk       , %rk , I   ; alu_imm     ] add\t%w0, %w1, %2
-     [ rk       , rk  , r   ; alu_sreg    ] add\t%w0, %w1, %w2
-     [ rk       , rk  , J   ; alu_imm     ] sub\t%w0, %w1, #%n2
-     [ r        , rk  , Uaa ; multiple    ] #
+  {@ [ cons: =0 , %1 , 2   ; attrs: type ]
+     [ rk       , rk , I   ; alu_imm     ] add\t%w0, %w1, %2
+     [ rk       , rk , r   ; alu_sreg    ] add\t%w0, %w1, %w2
+     [ rk       , rk , J   ; alu_imm     ] sub\t%w0, %w1, #%n2
+     [ r        , rk , Uaa ; multiple    ] #
   }
 )
 
@@ -2579,14 +2725,14 @@
      (match_operand:GPI 1 "register_operand")
      (match_operand:GPI 2 "aarch64_pluslong_or_poly_operand")))]
   "TARGET_SVE && operands[0] != stack_pointer_rtx"
-  {@ [ cons: =0 , 1   , 2   ; attrs: type ]
-     [ r        , %rk , I   ; alu_imm     ] add\t%<w>0, %<w>1, %2
-     [ r        , rk  , r   ; alu_sreg    ] add\t%<w>0, %<w>1, %<w>2
-     [ r        , rk  , J   ; alu_imm     ] sub\t%<w>0, %<w>1, #%n2
-     [ r        , rk  , Uaa ; multiple    ] #
-     [ r        , 0   , Uai ; alu_imm     ] << aarch64_output_sve_scalar_inc_dec (operands[2]);
-     [ r        , rk  , Uav ; alu_imm     ] << aarch64_output_sve_addvl_addpl (operands[2]);
-     [ &r       , rk  , Uat ; multiple    ] #
+  {@ [ cons: =0 , %1 , 2   ; attrs: type ]
+     [ r        , rk , I   ; alu_imm     ] add\t%<w>0, %<w>1, %2
+     [ r        , rk , r   ; alu_sreg    ] add\t%<w>0, %<w>1, %<w>2
+     [ r        , rk , J   ; alu_imm     ] sub\t%<w>0, %<w>1, #%n2
+     [ r        , rk , Uaa ; multiple    ] #
+     [ r        , 0  , Uai ; alu_imm     ] << aarch64_output_sve_scalar_inc_dec (operands[2]);
+     [ r        , rk , Uav ; alu_imm     ] << aarch64_output_sve_addvl_addpl (operands[2]);
+     [ &r       , rk , Uat ; multiple    ] #
   }
   "&& epilogue_completed
    && !reg_overlap_mentioned_p (operands[0], operands[1])
@@ -2758,10 +2904,10 @@
    (set (match_operand:GPI 0 "register_operand")
 	(plus:GPI (match_dup 1) (match_dup 2)))]
   ""
-  {@ [ cons: =0 , 1   , 2 ; attrs: type ]
-     [ r        , %rk , r ; alus_sreg   ] adds\t%<w>0, %<w>1, %<w>2
-     [ r        , rk  , I ; alus_imm    ] adds\t%<w>0, %<w>1, %2
-     [ r        , rk  , J ; alus_imm    ] subs\t%<w>0, %<w>1, #%n2
+  {@ [ cons: =0 , %1 , 2 ; attrs: type ]
+     [ r        , rk , r ; alus_sreg   ] adds\t%<w>0, %<w>1, %<w>2
+     [ r        , rk , I ; alus_imm    ] adds\t%<w>0, %<w>1, %2
+     [ r        , rk , J ; alus_imm    ] subs\t%<w>0, %<w>1, #%n2
   }
 )
 
@@ -2775,10 +2921,10 @@
    (set (match_operand:DI 0 "register_operand")
 	(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
   ""
-  {@ [ cons: =0 , 1   , 2 ; attrs: type ]
-     [ r        , %rk , r ; alus_sreg   ] adds\t%w0, %w1, %w2
-     [ r        , rk  , I ; alus_imm    ] adds\t%w0, %w1, %2
-     [ r        , rk  , J ; alus_imm    ] subs\t%w0, %w1, #%n2
+  {@ [ cons: =0 , %1 , 2 ; attrs: type ]
+     [ r        , rk , r ; alus_sreg   ] adds\t%w0, %w1, %w2
+     [ r        , rk , I ; alus_imm    ] adds\t%w0, %w1, %2
+     [ r        , rk , J ; alus_imm    ] subs\t%w0, %w1, #%n2
   }
 )
 
@@ -2979,10 +3125,10 @@
 		   (match_operand:GPI 1 "aarch64_plus_operand"))
 	 (const_int 0)))]
   ""
-  {@ [ cons: 0 , 1 ; attrs: type ]
-     [ %r      , r ; alus_sreg   ] cmn\t%<w>0, %<w>1
-     [ r       , I ; alus_imm    ] cmn\t%<w>0, %1
-     [ r       , J ; alus_imm    ] cmp\t%<w>0, #%n1
+  {@ [ cons: %0 , 1 ; attrs: type ]
+     [ r        , r ; alus_sreg   ] cmn\t%<w>0, %<w>1
+     [ r        , I ; alus_imm    ] cmn\t%<w>0, %1
+     [ r        , J ; alus_imm    ] cmp\t%<w>0, #%n1
   }
 )
 
@@ -4337,29 +4483,31 @@
 
 (define_insn "fcmp<mode>"
   [(set (reg:CCFP CC_REGNUM)
-        (compare:CCFP (match_operand:GPF 0 "register_operand")
-		      (match_operand:GPF 1 "aarch64_fp_compare_operand")))]
+	(compare:CCFP
+	  (match_operand:GPF_F16 0 "register_operand")
+	  (match_operand:GPF_F16 1 "aarch64_fp_compare_operand")))]
    "TARGET_FLOAT"
    {@ [ cons: 0 , 1  ]
       [ w       , Y  ] fcmp\t%<s>0, #0.0
       [ w       , w  ] fcmp\t%<s>0, %<s>1
   }
-  [(set_attr "type" "fcmp<s>")]
+  [(set_attr "type" "fcmp<stype>")]
 )
 
 (define_insn "fcmpe<mode>"
   [(set (reg:CCFPE CC_REGNUM)
-        (compare:CCFPE (match_operand:GPF 0 "register_operand")
-		       (match_operand:GPF 1 "aarch64_fp_compare_operand")))]
+	(compare:CCFPE
+	  (match_operand:GPF_F16 0 "register_operand")
+	  (match_operand:GPF_F16 1 "aarch64_fp_compare_operand")))]
    "TARGET_FLOAT"
    {@ [ cons: 0 , 1  ]
       [ w       , Y  ] fcmpe\t%<s>0, #0.0
       [ w       , w  ] fcmpe\t%<s>0, %<s>1
   }
-  [(set_attr "type" "fcmp<s>")]
+  [(set_attr "type" "fcmp<stype>")]
 )
 
-(define_insn "*cmp_swp_<shift>_reg<mode>"
+(define_insn "cmp_swp_<shift>_reg<mode>"
   [(set (reg:CC_SWP CC_REGNUM)
 	(compare:CC_SWP (ASHIFT:GPI
 			 (match_operand:GPI 0 "register_operand" "r")
@@ -4392,6 +4540,49 @@
   [(set_attr "type" "alus_ext")]
 )
 
+;; <=> operator pattern (integer)
+;; (a == b) ? 0 : (a < b) ? -1 : 1.
+(define_expand "spaceship<mode>4"
+  [(match_operand:SI  0 "register_operand")
+   (match_operand:GPI 1 "register_operand")
+   (match_operand:GPI 2 "register_operand")
+   (match_operand:SI  3 "const_int_operand")]
+  ""
+  {
+    // 1 indicates unsigned comparison, -1 indicates signed.
+    gcc_assert (operands[3] == constm1_rtx || operands[3] == const1_rtx);
+
+    rtx cc_reg = aarch64_gen_compare_reg (EQ, operands[1], operands[2]);
+    RTX_CODE code_gt = operands[3] == const1_rtx ? GTU : GT;
+    RTX_CODE code_lt = operands[3] == const1_rtx ? LTU : LT;
+
+    rtx cc_gt = gen_rtx_fmt_ee (code_gt, VOIDmode, cc_reg, const0_rtx);
+    rtx cc_lt = gen_rtx_fmt_ee (code_lt, VOIDmode, cc_reg, const0_rtx);
+
+    rtx temp = gen_reg_rtx (SImode);
+    emit_insn (gen_rtx_SET (temp, gen_rtx_IF_THEN_ELSE (SImode, cc_gt,
+						const1_rtx, const0_rtx)));
+    emit_insn (gen_rtx_SET (operands[0], gen_rtx_IF_THEN_ELSE (SImode, cc_lt,
+						constm1_rtx, temp)));
+    DONE;
+  }
+)
+
+;; <=> operator pattern (floating-point)
+;; (a == b) ? 0 : (a < b) ? -1 : (a > b) ? 1 : UNORDERED.
+(define_expand "spaceship<mode>4"
+  [(match_operand:SI  0 "register_operand")
+   (match_operand:GPF 1 "register_operand")
+   (match_operand:GPF 2 "register_operand")
+   (match_operand:SI  3 "const_int_operand")]
+  "TARGET_FLOAT"
+  {
+    aarch64_expand_fp_spaceship (operands[0], operands[1], operands[2],
+				operands[3]);
+    DONE;
+  }
+)
+
 ;; -------------------------------------------------------------------
 ;; Store-flag and conditional select insns
 ;; -------------------------------------------------------------------
@@ -4424,8 +4615,8 @@
 (define_expand "cstore<mode>4"
   [(set (match_operand:SI 0 "register_operand")
 	(match_operator:SI 1 "aarch64_comparison_operator_mode"
-	 [(match_operand:GPF 2 "register_operand")
-	  (match_operand:GPF 3 "aarch64_fp_compare_operand")]))]
+	 [(match_operand:GPF_F16 2 "register_operand")
+	  (match_operand:GPF_F16 3 "aarch64_fp_compare_operand")]))]
   ""
   "
   operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2],
@@ -4443,39 +4634,24 @@
   [(set_attr "type" "csel")]
 )
 
-;; For a 24-bit immediate CST we can optimize the compare for equality
-;; and branch sequence from:
-;; 	mov	x0, #imm1
-;; 	movk	x0, #imm2, lsl 16 /* x0 contains CST.  */
-;; 	cmp	x1, x0
-;; 	cset	x2, <ne,eq>
-;; into the shorter:
-;; 	sub	x0, x1, #(CST & 0xfff000)
-;; 	subs	x0, x0, #(CST & 0x000fff)
-;; 	cset x2, <ne, eq>.
+;; For a 24-bit immediate CST we can optimize the compare for equality.
 (define_insn_and_split "*compare_cstore<mode>_insn"
   [(set (match_operand:GPI 0 "register_operand" "=r")
-	 (EQL:GPI (match_operand:GPI 1 "register_operand" "r")
-		  (match_operand:GPI 2 "aarch64_imm24" "n")))
-   (clobber (reg:CC CC_REGNUM))]
-  "!aarch64_move_imm (INTVAL (operands[2]), <MODE>mode)
-   && !aarch64_plus_operand (operands[2], <MODE>mode)
-   && !reload_completed"
+	(match_operator:GPI 1 "aarch64_equality_operator"
+	 [(match_operand:GPI 2 "register_operand" "r")
+	  (match_operand:GPI 3 "aarch64_split_imm24" "n")]))
+   (clobber (reg:CC CC_REGNUM))
+   (clobber (match_scratch:GPI 4 "=r"))]
+  ""
   "#"
-  "&& true"
+  ""
   [(const_int 0)]
   {
-    HOST_WIDE_INT lo_imm = UINTVAL (operands[2]) & 0xfff;
-    HOST_WIDE_INT hi_imm = UINTVAL (operands[2]) & 0xfff000;
-    rtx tmp = gen_reg_rtx (<MODE>mode);
-    emit_insn (gen_add<mode>3 (tmp, operands[1], GEN_INT (-hi_imm)));
-    emit_insn (gen_add<mode>3_compare0 (tmp, tmp, GEN_INT (-lo_imm)));
-    rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
-    rtx cmp_rtx = gen_rtx_fmt_ee (<EQL:CMP>, <MODE>mode, cc_reg, const0_rtx);
-    emit_insn (gen_aarch64_cstore<mode> (operands[0], cmp_rtx, cc_reg));
+    rtx cc_reg = aarch64_gen_compare_split_imm24 (operands[2], operands[3],
+						  operands[4]);
+    emit_insn (gen_aarch64_cstore<mode> (operands[0], operands[1], cc_reg));
     DONE;
   }
-  [(set_attr "type" "csel")]
 )
 
 ;; zero_extend version of the above
@@ -4509,38 +4685,6 @@
   [(set_attr "type" "csel")]
 )
 
-(define_expand "cmov<mode>6"
-  [(set (match_operand:GPI 0 "register_operand")
-	(if_then_else:GPI
-	 (match_operator 1 "aarch64_comparison_operator"
-	  [(match_operand:GPI 2 "register_operand")
-	   (match_operand:GPI 3 "aarch64_plus_operand")])
-	 (match_operand:GPI 4 "register_operand")
-	 (match_operand:GPI 5 "register_operand")))]
-  ""
-  "
-  operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2],
-				      operands[3]);
-  operands[3] = const0_rtx;
-  "
-)
-
-(define_expand "cmov<mode>6"
-  [(set (match_operand:GPF 0 "register_operand")
-	(if_then_else:GPF
-	 (match_operator 1 "aarch64_comparison_operator"
-	  [(match_operand:GPF 2 "register_operand")
-	   (match_operand:GPF 3 "aarch64_fp_compare_operand")])
-	 (match_operand:GPF 4 "register_operand")
-	 (match_operand:GPF 5 "register_operand")))]
-  ""
-  "
-  operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2],
-				      operands[3]);
-  operands[3] = const0_rtx;
-  "
-)
-
 (define_insn "*cmov<mode>_insn"
   [(set (match_operand:ALLI 0 "register_operand")
 	(if_then_else:ALLI
@@ -4637,15 +4781,21 @@
 			   (match_operand:ALLI 3 "register_operand")))]
   ""
   {
-    rtx ccreg;
     enum rtx_code code = GET_CODE (operands[1]);
-
     if (code == UNEQ || code == LTGT)
       FAIL;
 
-    ccreg = aarch64_gen_compare_reg (code, XEXP (operands[1], 0),
-				     XEXP (operands[1], 1));
-    operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx);
+    rtx ccreg = XEXP (operands[1], 0);
+    enum machine_mode ccmode = GET_MODE (ccreg);
+    if (GET_MODE_CLASS (ccmode) == MODE_CC)
+      gcc_assert (XEXP (operands[1], 1) == const0_rtx);
+    else if (ccmode == QImode || ccmode == HImode)
+      FAIL;
+    else
+      {
+	ccreg = aarch64_gen_compare_reg (code, ccreg, XEXP (operands[1], 1));
+	operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx);
+      }
   }
 )
 
@@ -5045,8 +5195,8 @@
 	(LOGICAL:GPI (match_operand:GPI 1 "register_operand")
 		     (match_operand:GPI 2 "aarch64_logical_operand")))]
   ""
-  {@ [ cons: =0 , 1  , 2        ; attrs: type , arch  ]
-     [ r        , %r , r        ; logic_reg   , *     ] <logical>\t%<w>0, %<w>1, %<w>2
+  {@ [ cons: =0 , %1 , 2        ; attrs: type , arch  ]
+     [ r        , r  , r        ; logic_reg   , *     ] <logical>\t%<w>0, %<w>1, %<w>2
      [ rk       , r  , <lconst> ; logic_imm   , *     ] <logical>\t%<w>0, %<w>1, %2
      [ w        , 0  , <lconst> ; *           , sve   ] <logical>\t%Z0.<s>, %Z0.<s>, #%2
      [ w        , w  , w        ; neon_logic  , simd  ] <logical>\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
@@ -5060,8 +5210,8 @@
          (LOGICAL:SI (match_operand:SI 1 "register_operand")
 		     (match_operand:SI 2 "aarch64_logical_operand"))))]
   ""
-  {@ [ cons: =0 , 1  , 2 ; attrs: type ]
-     [ r        , %r , r ; logic_reg   ] <logical>\t%w0, %w1, %w2
+  {@ [ cons: =0 , %1 , 2 ; attrs: type ]
+     [ r        , r  , r ; logic_reg   ] <logical>\t%w0, %w1, %w2
      [ rk       , r  , K ; logic_imm   ] <logical>\t%w0, %w1, %2
   }
 )
@@ -5075,8 +5225,8 @@
    (set (match_operand:GPI 0 "register_operand")
 	(and:GPI (match_dup 1) (match_dup 2)))]
   ""
-  {@ [ cons: =0 , 1  , 2        ; attrs: type ]
-     [ r        , %r , r        ; logics_reg  ] ands\t%<w>0, %<w>1, %<w>2
+  {@ [ cons: =0 , %1 , 2        ; attrs: type ]
+     [ r        , r  , r        ; logics_reg  ] ands\t%<w>0, %<w>1, %<w>2
      [ r        , r  , <lconst> ; logics_imm  ] ands\t%<w>0, %<w>1, %2
   }
 )
@@ -5091,8 +5241,8 @@
    (set (match_operand:DI 0 "register_operand")
 	(zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))]
   ""
-  {@ [ cons: =0 , 1  , 2 ; attrs: type ]
-     [ r        , %r , r ; logics_reg  ] ands\t%w0, %w1, %w2
+  {@ [ cons: =0 , %1 , 2 ; attrs: type ]
+     [ r        , r  , r ; logics_reg  ] ands\t%w0, %w1, %w2
      [ r        , r  , K ; logics_imm  ] ands\t%w0, %w1, %2
   }
 )
@@ -5605,6 +5755,19 @@
    (match_operand:TI 1 "register_operand")]
   "TARGET_SIMD && !TARGET_CSSC"
 {
+  /* For SVE we can do popcount on DImode chunks of the TImode argument
+     and then use a cheap ADDP reduction.  The SVE CNT instruction requires
+     materializing a PTRUE so don't do this if optimizing for size.  */
+  if (TARGET_SVE && !optimize_function_for_size_p (cfun))
+    {
+      rtx v = gen_reg_rtx (V2DImode);
+      rtx v1 = gen_reg_rtx (V2DImode);
+      emit_move_insn (v, gen_lowpart (V2DImode, operands[1]));
+      rtx p = aarch64_ptrue_reg (VNx2BImode, 16);
+      emit_insn (gen_aarch64_pred_popcountv2di (v1, p, v));
+      emit_insn (gen_reduc_plus_scal_v2di (operands[0], v1));
+      DONE;
+    }
   rtx v = gen_reg_rtx (V16QImode);
   rtx v1 = gen_reg_rtx (V16QImode);
   emit_move_insn (v, gen_lowpart (V16QImode, operands[1]));
@@ -5676,9 +5839,9 @@
 		  (match_operand:GPI 1 "aarch64_logical_operand"))
 	 (const_int 0)))]
   ""
-  {@ [ cons: 0 , 1        ; attrs: type ]
-     [ %r      , r        ; logics_reg  ] tst\t%<w>0, %<w>1
-     [ r       , <lconst> ; logics_imm  ] tst\t%<w>0, %1
+  {@ [ cons: %0 , 1        ; attrs: type ]
+     [ r        , r        ; logics_reg  ] tst\t%<w>0, %<w>1
+     [ r        , <lconst> ; logics_imm  ] tst\t%<w>0, %1
   }
 )
 
@@ -7527,6 +7690,22 @@
 }
 )
 
+(define_expand "isinf<mode>2"
+ [(match_operand:SI 0 "register_operand")
+  (match_operand:GPF 1 "register_operand")]
+ "TARGET_FLOAT"
+{
+  rtx op = force_lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
+  rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
+  emit_move_insn (tmp, GEN_INT (HOST_WIDE_INT_M1U << (<mantissa_bits> + 1)));
+  rtx cc_reg = gen_rtx_REG (CC_SWPmode, CC_REGNUM);
+  emit_insn (gen_cmp_swp_lsl_reg<v_int_equiv> (op, GEN_INT (1), tmp));
+  rtx cmp = gen_rtx_fmt_ee (EQ, SImode, cc_reg, const0_rtx);
+  emit_insn (gen_aarch64_cstoresi (operands[0], cmp, cc_reg));
+  DONE;
+}
+)
+
 ;; -------------------------------------------------------------------
 ;; Reload support
 ;; -------------------------------------------------------------------
@@ -8068,8 +8247,8 @@
 	     : gen_stack_protect_test_si) (operands[0], operands[1]));
 
   rtx cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
-  emit_jump_insn (gen_condjump (gen_rtx_EQ (VOIDmode, cc_reg, const0_rtx),
-				cc_reg, operands[2]));
+  emit_jump_insn (gen_aarch64_bcond (gen_rtx_EQ (VOIDmode, cc_reg, const0_rtx),
+				     cc_reg, operands[2]));
   DONE;
 })
 
@@ -8377,7 +8556,7 @@
   [(set (match_operand:DI 0 "register_operand" "=rk")
 	(ior:DI
 	 (and:DI (match_operand:DI 1 "register_operand" "rk")
-		 (const_int -1080863910568919041)) ;; 0xf0ff...
+		 (const_int MEMTAG_TAG_MASK))
 	 (ashift:DI (unspec:QI [(match_operand:DI 2 "register_operand" "r")]
 		     UNSPEC_GEN_TAG_RND)
 		    (const_int 56))))]
@@ -8420,9 +8599,9 @@
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(minus:DI
 	  (and:DI (match_operand:DI 1 "register_operand" "rk")
-		  (const_int 72057594037927935)) ;; 0x00ff...
+		  (const_int MEMTAG_ADDR_MASK))
 	  (and:DI (match_operand:DI 2 "register_operand" "rk")
-		  (const_int 72057594037927935))))] ;; 0x00ff...
+		  (const_int MEMTAG_ADDR_MASK))))]
   "TARGET_MEMTAG"
   "subp\\t%0, %1, %2"
   [(set_attr "type" "memtag")]
@@ -8432,7 +8611,7 @@
 (define_insn "ldg"
   [(set (match_operand:DI 0 "register_operand" "+r")
 	(ior:DI
-	 (and:DI (match_dup 0) (const_int -1080863910568919041)) ;; 0xf0ff...
+	 (and:DI (match_dup 0) (const_int MEMTAG_TAG_MASK))
 	 (ashift:DI
 	  (mem:QI (unspec:DI
 	   [(and:DI (plus:DI (match_operand:DI 1 "register_operand" "rk")
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
index f32d56d..9ca753e 100644
--- a/gcc/config/aarch64/aarch64.opt
+++ b/gcc/config/aarch64/aarch64.opt
@@ -290,6 +290,17 @@ msve-vector-bits=
 Target RejectNegative Joined Enum(sve_vector_bits) Var(aarch64_sve_vector_bits) Init(SVE_SCALABLE)
 -msve-vector-bits=<number>	Set the number of bits in an SVE vector register.
 
+mautovec-preference=
+Target RejectNegative Joined Var(autovec_preference) Enum(aarch64_autovec_preference) Init(AARCH64_AUTOVEC_DEFAULT)
+-mautovec-preference=[default|asimd-only|sve-only|prefer-asimd|prefer-sve]
+Force an ISA selection strategy for auto-vectorization.  For best performance it
+is highly recommended to use -mcpu or -mtune instead.  This parameter should
+only be used for code exploration.
+
+mmax-vectorization
+Target Var(flag_aarch64_max_vectorization) Save
+Override the scalar cost model such that vectorization is always profitable.
+
 mverbose-cost-dump
 Target Undocumented Var(flag_aarch64_verbose_cost)
 Enables verbose cost model dumping in the debug dump files.
@@ -356,8 +367,8 @@ The number of Newton iterations for calculating the reciprocal for double type.
 
 -param=aarch64-autovec-preference=
 Target Joined Var(aarch64_autovec_preference) Enum(aarch64_autovec_preference) Init(AARCH64_AUTOVEC_DEFAULT) Param
---param=aarch64-autovec-preference=[default|asimd-only|sve-only|prefer-asimd|prefer-sve]
-Force an ISA selection strategy for auto-vectorization.
+An old alias for -mautovec-preference.  If both -mautovec-preference and
+--param=aarch64-autovec-preference are passed, the --param value will be used.
 
 Enum
 Name(aarch64_autovec_preference) Type(enum aarch64_autovec_preference_enum) UnknownError(unknown autovec preference %qs)
diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h
index d9e2401..507b6e7 100644
--- a/gcc/config/aarch64/arm_acle.h
+++ b/gcc/config/aarch64/arm_acle.h
@@ -118,6 +118,20 @@ __revl (unsigned long __value)
     return __rev (__value);
 }
 
+__extension__ extern __inline double
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__sqrt (double __x)
+{
+  return __builtin_aarch64_sqrtdf (__x);
+}
+
+__extension__ extern __inline float
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__sqrtf (float __x)
+{
+  return __builtin_aarch64_sqrtsf (__x);
+}
+
 #pragma GCC push_options
 #pragma GCC target ("+nothing+jscvt")
 __extension__ extern __inline int32_t
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
index 36b0dbd..ea4a936 100644
--- a/gcc/config/aarch64/atomics.md
+++ b/gcc/config/aarch64/atomics.md
@@ -679,13 +679,16 @@
 )
 
 (define_insn "aarch64_atomic_load<mode>_rcpc"
-  [(set (match_operand:ALLI 0 "register_operand" "=r")
+  [(set (match_operand:ALLI 0 "register_operand")
     (unspec_volatile:ALLI
-      [(match_operand:ALLI 1 "aarch64_sync_memory_operand" "Q")
+      [(match_operand:ALLI 1 "aarch64_rcpc_memory_operand")
        (match_operand:SI 2 "const_int_operand")]			;; model
       UNSPECV_LDAP))]
   "TARGET_RCPC"
-  "ldapr<atomic_sfx>\t%<w>0, %1"
+  {@ [ cons: =0 , 1   ; attrs: enable_ldapur  ]
+     [ r        , Q   ; any                   ] ldapr<atomic_sfx>\t%<w>0, %1
+     [ r        , Ust ; yes                   ] ldapur<atomic_sfx>\t%<w>0, %1
+  }
 )
 
 (define_insn "aarch64_atomic_load<mode>"
@@ -705,21 +708,24 @@
 )
 
 (define_insn "*aarch64_atomic_load<ALLX:mode>_rcpc_zext"
-  [(set (match_operand:SD_HSDI 0 "register_operand" "=r")
+  [(set (match_operand:SD_HSDI 0 "register_operand")
     (zero_extend:SD_HSDI
       (unspec_volatile:ALLX
-        [(match_operand:ALLX 1 "aarch64_sync_memory_operand" "Q")
+        [(match_operand:ALLX 1 "aarch64_rcpc_memory_operand")
          (match_operand:SI 2 "const_int_operand")]			;; model
        UNSPECV_LDAP)))]
   "TARGET_RCPC && (<SD_HSDI:sizen> > <ALLX:sizen>)"
-  "ldapr<ALLX:atomic_sfx>\t%w0, %1"
+  {@ [ cons: =0 , 1   ; attrs: enable_ldapur ]
+     [ r        , Q   ; any                  ] ldapr<ALLX:atomic_sfx>\t%w0, %1
+     [ r        , Ust ; yes                  ] ldapur<ALLX:atomic_sfx>\t%w0, %1
+  }
 )
 
 (define_insn "*aarch64_atomic_load<ALLX:mode>_rcpc_sext"
   [(set (match_operand:GPI  0 "register_operand" "=r")
     (sign_extend:GPI
       (unspec_volatile:ALLX
-        [(match_operand:ALLX 1 "aarch64_sync_memory_operand" "Q")
+        [(match_operand:ALLX 1 "aarch64_rcpc_memory_operand" "Ust")
          (match_operand:SI 2 "const_int_operand")]			;; model
        UNSPECV_LDAP)))]
   "TARGET_RCPC2 && (<GPI:sizen> > <ALLX:sizen>)"
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index e8321c4..7b9e558 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -304,6 +304,18 @@
   (and (match_code "const_int")
        (match_test "(unsigned HOST_WIDE_INT) ival <= 7")))
 
+(define_constraint "Uc0"
+  "@internal
+  A constraint that matches the integers 0...63."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 63)")))
+
+(define_constraint "Uc1"
+  "@internal
+  A constraint that matches the integers 0...62."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 62)")))
+
 (define_constraint "Up3"
   "@internal
   A constraint that matches the integers 2^(0...4)."
@@ -466,6 +478,13 @@
  (and (match_code "const_vector")
       (match_test "aarch64_simd_valid_orr_imm (op)")))
 
+(define_constraint "Df"
+  "@internal
+   A constraint that matches a vector of immediates for and which can be
+   optimized as fmov."
+ (and (match_code "const_vector")
+      (match_test "aarch64_simd_valid_and_imm_fmov (op)")))
+
 (define_constraint "Db"
   "@internal
    A constraint that matches vector of immediates for and/bic."
diff --git a/gcc/config/aarch64/cortex-a57-fma-steering.cc b/gcc/config/aarch64/cortex-a57-fma-steering.cc
index fd6da66..f7675be 100644
--- a/gcc/config/aarch64/cortex-a57-fma-steering.cc
+++ b/gcc/config/aarch64/cortex-a57-fma-steering.cc
@@ -948,6 +948,11 @@ func_fma_steering::analyze ()
 
 	  /* Search the chain where this instruction is (one of) the root.  */
 	  dest_op_info = insn_rr[INSN_UID (insn)].op_info;
+
+	  /* Register rename could fail. */
+	  if (!dest_op_info)
+	    continue;
+
 	  dest_regno = REGNO (SET_DEST (PATTERN (insn)));
 	  for (i = 0; i < dest_op_info->n_chains; i++)
 	    {
diff --git a/gcc/config/aarch64/driver-aarch64.cc b/gcc/config/aarch64/driver-aarch64.cc
index 9d99554..0333746 100644
--- a/gcc/config/aarch64/driver-aarch64.cc
+++ b/gcc/config/aarch64/driver-aarch64.cc
@@ -63,7 +63,7 @@ struct aarch64_core_data
 #define DEFAULT_CPU "generic-armv8-a"
 
 #define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
-  { CORE_NAME, #ARCH, IMP, PART, VARIANT, feature_deps::cpu_##CORE_IDENT },
+  { CORE_NAME, #ARCH, IMP, PART, unsigned(VARIANT), feature_deps::cpu_##CORE_IDENT },
 
 static CONSTEXPR const aarch64_core_data aarch64_cpu_data[] =
 {
diff --git a/gcc/config/aarch64/gcc-auto-profile b/gcc/config/aarch64/gcc-auto-profile
new file mode 100755
index 0000000..4d5c2e3
--- /dev/null
+++ b/gcc/config/aarch64/gcc-auto-profile
@@ -0,0 +1,53 @@
+#!/bin/sh
+# Profile workload for gcc profile feedback (autofdo) using Linux perf.
+# Copyright The GNU Toolchain Authors.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 3, or (at your option) any later
+# version.
+
+# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.  */
+
+# Run perf record with branch stack sampling and check for
+# specific error message to see if it is supported.
+use_brbe=true
+output=$(perf record -j any,u /bin/true 2>&1)
+case "$output" in
+  *"PMU Hardware or event type doesn't support branch stack sampling."*)
+    use_brbe=false;;
+  *)
+    use_brbe=true;;
+esac
+
+FLAGS=u
+if [ "$1" = "--kernel" ] ; then
+  FLAGS=k
+  shift
+elif [ "$1" = "--all" ] ; then
+  FLAGS=u,k
+  shift
+fi
+
+if [ "$use_brbe" = true ] ; then
+  if grep -q hypervisor /proc/cpuinfo ; then
+    echo >&2 "Warning: branch profiling may not be functional in VMs"
+  fi
+  set -x
+  perf record -j any,$FLAGS "$@"
+  set +x
+else
+  echo >&2 "Warning: branch profiling may not be functional without BRBE"
+  set -x
+  perf record "$@"
+  set +x
+fi
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 146453b..b15e578 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -455,6 +455,7 @@
 (define_mode_iterator VCVTFPM [V4HF V8HF V4SF])
 
 ;; Iterators for single modes, for "@" patterns.
+(define_mode_iterator VNx16BI_ONLY [VNx16BI])
 (define_mode_iterator VNx16QI_ONLY [VNx16QI])
 (define_mode_iterator VNx16SI_ONLY [VNx16SI])
 (define_mode_iterator VNx8HI_ONLY [VNx8HI])
@@ -463,6 +464,7 @@
 (define_mode_iterator VNx8SI_ONLY [VNx8SI])
 (define_mode_iterator VNx8SF_ONLY [VNx8SF])
 (define_mode_iterator VNx8DI_ONLY [VNx8DI])
+(define_mode_iterator VNx2SI_ONLY [VNx2SI])
 (define_mode_iterator VNx4SI_ONLY [VNx4SI])
 (define_mode_iterator VNx4SF_ONLY [VNx4SF])
 (define_mode_iterator VNx2DI_ONLY [VNx2DI])
@@ -479,13 +481,25 @@
 ;; All fully-packed SVE integer and Advanced SIMD integer modes.
 (define_mode_iterator SVE_ASIMD_FULL_I [SVE_FULL_I VDQ_I])
 
-;; All fully-packed SVE floating-point vector modes.
+;; Fully-packed SVE floating-point vector modes, excluding BF16.
 (define_mode_iterator SVE_FULL_F [VNx8HF VNx4SF VNx2DF])
 
+;; Partial SVE floating-point vector modes, excluding BF16.
+(define_mode_iterator SVE_PARTIAL_F [VNx2HF VNx4HF VNx2SF])
+
+;; SVE floating-point vector modes, excluding BF16.
+(define_mode_iterator SVE_F [SVE_PARTIAL_F SVE_FULL_F])
+
 ;; Fully-packed SVE floating-point vector modes and their scalar equivalents.
 (define_mode_iterator SVE_FULL_F_SCALAR [SVE_FULL_F GPF_HF])
 
-(define_mode_iterator SVE_FULL_F_BF [(VNx8BF "TARGET_SSVE_B16B16") SVE_FULL_F])
+(define_mode_iterator SVE_FULL_F_B16B16 [(VNx8BF "TARGET_SSVE_B16B16") SVE_FULL_F])
+
+(define_mode_iterator SVE_PARTIAL_F_B16B16 [(VNx2BF "TARGET_SSVE_B16B16")
+					    (VNx4BF "TARGET_SSVE_B16B16")
+					    SVE_PARTIAL_F])
+
+(define_mode_iterator SVE_F_B16B16 [SVE_PARTIAL_F_B16B16 SVE_FULL_F_B16B16])
 
 ;; Modes for which (B)FCLAMP is supported.
 (define_mode_iterator SVE_CLAMP_F [(VNx8BF "TARGET_SSVE_B16B16")
@@ -529,6 +543,19 @@
 ;; elements.
 (define_mode_iterator SVE_FULL_HSF [VNx8HF VNx4SF])
 
+;; Like SVE_FULL_HSF, but selectively enables those modes that are valid
+;; for the variant of the SVE2 FP8 FDOT instruction associated with that
+;; mode.
+(define_mode_iterator SVE_FULL_HSF_FP8_FDOT [(VNx4SF "TARGET_SSVE_FP8DOT4")
+					     (VNx8HF "TARGET_SSVE_FP8DOT2")])
+
+;; Partial SVE floating-point vector modes that have 16-bit or 32-bit
+;; elements.
+(define_mode_iterator SVE_PARTIAL_HSF [VNx2HF VNx4HF VNx2SF])
+
+;; SVE floating-point vector modes that have 16-bit or 32-bit elements.
+(define_mode_iterator SVE_HSF [SVE_PARTIAL_HSF SVE_FULL_HSF])
+
 ;; Fully-packed SVE integer vector modes that have 16-bit or 64-bit elements.
 (define_mode_iterator SVE_FULL_HDI [VNx8HI VNx2DI])
 
@@ -553,6 +580,9 @@
 (define_mode_iterator SVE_MATMULF [(VNx4SF "TARGET_SVE_F32MM")
 				   (VNx2DF "TARGET_SVE_F64MM")])
 
+;; SVE floating-point vector modes that have 32-bit or 64-bit elements.
+(define_mode_iterator SVE_SDF [VNx2SF SVE_FULL_SDF])
+
 ;; Fully-packed SVE vector modes that have 32-bit or smaller elements.
 (define_mode_iterator SVE_FULL_BHS [VNx16QI VNx8HI VNx4SI
 				    VNx8BF VNx8HF VNx4SF])
@@ -580,14 +610,13 @@
 			     VNx4SI VNx2SI
 			     VNx2DI])
 
+(define_mode_iterator SVE_BF [VNx2BF VNx4BF VNx8BF])
+
 ;; All SVE floating-point vector modes.
-(define_mode_iterator SVE_F [VNx8HF VNx4HF VNx2HF
-			     VNx8BF VNx4BF VNx2BF
-			     VNx4SF VNx2SF
-			     VNx2DF])
+(define_mode_iterator SVE_F_BF [SVE_F SVE_BF])
 
 ;; All SVE vector modes.
-(define_mode_iterator SVE_ALL [SVE_I SVE_F])
+(define_mode_iterator SVE_ALL [SVE_I SVE_F_BF])
 
 ;; All SVE 2-vector modes.
 (define_mode_iterator SVE_FULLx2 [VNx32QI VNx16HI VNx8SI VNx4DI
@@ -623,6 +652,9 @@
 				VNx4SI VNx2SI
 				VNx2DI])
 
+;; SVE integer vector modes with 32-bit elements.
+(define_mode_iterator SVE_SI [VNx2SI VNx4SI])
+
 (define_mode_iterator SVE_DIx24 [VNx4DI VNx8DI])
 
 ;; SVE modes with 2 or 4 elements.
@@ -638,6 +670,9 @@
 (define_mode_iterator SVE_2 [VNx2QI VNx2HI VNx2HF VNx2BF
 			     VNx2SI VNx2SF VNx2DI VNx2DF])
 
+;; SVE SI and DI modes with 2 elements.
+(define_mode_iterator SVE_2SDI [VNx2SI VNx2DI])
+
 ;; SVE integer modes with 2 elements, excluding the widest element.
 (define_mode_iterator SVE_2BHSI [VNx2QI VNx2HI VNx2SI])
 
@@ -902,7 +937,6 @@
     UNSPEC_UZP2Q	; Used in aarch64-sve.md.
     UNSPEC_ZIP1Q	; Used in aarch64-sve.md.
     UNSPEC_ZIP2Q	; Used in aarch64-sve.md.
-    UNSPEC_TRN1_CONV	; Used in aarch64-sve.md.
     UNSPEC_COND_CMPEQ_WIDE ; Used in aarch64-sve.md.
     UNSPEC_COND_CMPGE_WIDE ; Used in aarch64-sve.md.
     UNSPEC_COND_CMPGT_WIDE ; Used in aarch64-sve.md.
@@ -1157,6 +1191,9 @@
     UNSPEC_LUTI2	; Used in aarch64-simd.md.
     UNSPEC_LUTI4	; Used in aarch64-simd.md.
 
+    ;; All used in aarch64-sve.md
+    UNSPEC_PERMUTE_PRED
+
     ;; All used in aarch64-sve2.md
     UNSPEC_ADDQV
     UNSPEC_ANDQV
@@ -1303,6 +1340,8 @@
 
 (define_mode_attr half_mask [(HI "255") (SI "65535") (DI "4294967295")])
 
+(define_mode_attr mantissa_bits [(SF "23") (DF "52")])
+
 ;; For constraints used in scalar immediate vector moves
 (define_mode_attr hq [(HI "h") (QI "q")])
 
@@ -1678,6 +1717,30 @@
 		       (SI   "SI") (HI    "HI")
 		       (QI   "QI")
 		       (V4BF "BF") (V8BF "BF")
+		       (V2x8QI "QI") (V2x4HI "HI")
+		       (V2x2SI "SI") (V2x1DI "DI")
+		       (V2x4HF "HF") (V2x2SF "SF")
+		       (V2x1DF "DF") (V2x4BF "BF")
+		       (V3x8QI "QI") (V3x4HI "HI")
+		       (V3x2SI "SI") (V3x1DI "DI")
+		       (V3x4HF "HF") (V3x2SF "SF")
+		       (V3x1DF "DF") (V3x4BF "BF")
+		       (V4x8QI "QI") (V4x4HI "HI")
+		       (V4x2SI "SI") (V4x1DI "DI")
+		       (V4x4HF "HF") (V4x2SF "SF")
+		       (V4x1DF "DF") (V4x4BF "BF")
+		       (V2x16QI "QI") (V2x8HI "HI")
+		       (V2x4SI "SI") (V2x2DI "DI")
+		       (V2x8HF "HF") (V2x4SF "SF")
+		       (V2x2DF "DF") (V2x8BF "BF")
+		       (V3x16QI "QI") (V3x8HI "HI")
+		       (V3x4SI "SI") (V3x2DI "DI")
+		       (V3x8HF "HF") (V3x4SF "SF")
+		       (V3x2DF "DF") (V3x8BF "BF")
+		       (V4x16QI "QI") (V4x8HI "HI")
+		       (V4x4SI "SI") (V4x2DI "DI")
+		       (V4x8HF "HF") (V4x4SF "SF")
+		       (V4x2DF "DF") (V4x8BF "BF")
 		       (VNx16QI "QI") (VNx8QI "QI") (VNx4QI "QI") (VNx2QI "QI")
 		       (VNx8HI "HI") (VNx4HI "HI") (VNx2HI "HI")
 		       (VNx8HF "HF") (VNx4HF "HF") (VNx2HF "HF")
@@ -1699,6 +1762,30 @@
 		       (DF   "df") (SI   "si")
 		       (HI   "hi") (QI   "qi")
 		       (V4BF "bf") (V8BF "bf")
+		       (V2x8QI "qi") (V2x4HI "hi")
+		       (V2x2SI "si") (V2x1DI "di")
+		       (V2x4HF "hf") (V2x2SF "sf")
+		       (V2x1DF "df") (V2x4BF "bf")
+		       (V3x8QI "qi") (V3x4HI "hi")
+		       (V3x2SI "si") (V3x1DI "di")
+		       (V3x4HF "hf") (V3x2SF "sf")
+		       (V3x1DF "df") (V3x4BF "bf")
+		       (V4x8QI "qi") (V4x4HI "hi")
+		       (V4x2SI "si") (V4x1DI "di")
+		       (V4x4HF "hf") (V4x2SF "sf")
+		       (V4x1DF "df") (V4x4BF "bf")
+		       (V2x16QI "qi") (V2x8HI "hi")
+		       (V2x4SI "si") (V2x2DI "di")
+		       (V2x8HF "hf") (V2x4SF "sf")
+		       (V2x2DF "df") (V2x8BF "bf")
+		       (V3x16QI "qi") (V3x8HI "hi")
+		       (V3x4SI "si") (V3x2DI "di")
+		       (V3x8HF "hf") (V3x4SF "sf")
+		       (V3x2DF "df") (V3x8BF "bf")
+		       (V4x16QI "qi") (V4x8HI "hi")
+		       (V4x4SI "si") (V4x2DI "di")
+		       (V4x8HF "hf") (V4x4SF "sf")
+		       (V4x2DF "df") (V4x8BF "bf")
 		       (VNx16QI "qi") (VNx8QI "qi") (VNx4QI "qi") (VNx2QI "qi")
 		       (VNx8HI "hi") (VNx4HI "hi") (VNx2HI "hi")
 		       (VNx8HF "hf") (VNx4HF "hf") (VNx2HF "hf")
@@ -2445,7 +2532,9 @@
 			   (VNx8DI "vnx2di") (VNx8DF "vnx2df")])
 
 ;; The predicate mode associated with an SVE data mode.  For structure modes
-;; this is equivalent to the <VPRED> of the subvector mode.
+;; this is equivalent to the <VPRED> of the subvector mode.  For partial
+;; vector modes, this is equivalent to the <VPRED> of a full SVE mode with
+;; the same number of elements.
 (define_mode_attr VPRED [(VNx16QI "VNx16BI") (VNx8QI "VNx8BI")
 			 (VNx4QI "VNx4BI") (VNx2QI "VNx2BI")
 			 (VNx8HI "VNx8BI") (VNx4HI "VNx4BI") (VNx2HI "VNx2BI")
@@ -2583,19 +2672,22 @@
 (define_mode_attr data_bytes [(VNx16BI "1") (VNx8BI "2")
 			      (VNx4BI "4") (VNx2BI "8")])
 
-;; Two-nybble mask for partial vector modes: nunits, byte size.
-(define_mode_attr self_mask [(VNx8QI "0x81")
-			     (VNx4QI "0x41")
-			     (VNx2QI "0x21")
-			     (VNx4HI "0x42")
-			     (VNx2HI "0x22")
-			     (VNx2SI "0x24")])
-
-;; For SVE_HSDI vector modes, the mask of narrower modes, encoded as above.
-(define_mode_attr narrower_mask [(VNx8HI "0x81") (VNx4HI "0x41")
-				 (VNx2HI "0x21")
-				 (VNx4SI "0x43") (VNx2SI "0x23")
-				 (VNx2DI "0x27")])
+;; Two-nybble mask for vector modes: nunits, byte size.
+(define_mode_attr self_mask [(VNx2HI "0x22") (VNx2HF "0x22")
+			     (VNx4HI "0x42") (VNx4HF "0x42")
+			     (VNx8HI "0x82") (VNx8HF "0x82")
+			     (VNx2SI "0x24") (VNx2SF "0x24")
+			     (VNx4SI "0x44") (VNx4SF "0x44")
+			     (VNx2DI "0x28") (VNx2DF "0x28")
+			     (VNx8QI "0x81") (VNx4QI "0x41") (VNx2QI "0x21")])
+
+;; The mask of narrower vector modes, encoded as above.
+(define_mode_attr narrower_mask [(VNx8HI "0x81") (VNx8HF "0x81")
+				 (VNx4HI "0x41") (VNx4HF "0x41")
+				 (VNx2HI "0x21") (VNx2HF "0x21")
+				 (VNx4SI "0x43") (VNx4SF "0x43")
+				 (VNx2SI "0x23") (VNx2SF "0x23")
+				 (VNx2DI "0x27") (VNx2DF "0x27")])
 
 ;; The constraint to use for an SVE [SU]DOT, FMUL, FMLA or FMLS lane index.
 (define_mode_attr sve_lane_con [(VNx8HI "y") (VNx4SI "y") (VNx2DI "x")
@@ -2611,13 +2703,15 @@
 				 (V2DI "vec") (DI "offset")])
 
 (define_mode_attr b [(V4BF "b") (V4HF "") (V8BF "b") (V8HF "")
-		     (VNx8BF "b") (VNx8HF "") (VNx4SF "") (VNx2DF "")
+		     (VNx2BF "b") (VNx2HF "") (VNx2SF "")
+		     (VNx4BF "b") (VNx4HF "") (VNx4SF "")
+		     (VNx8BF "b") (VNx8HF "") (VNx2DF "")
 		     (VNx16BF "b") (VNx16HF "") (VNx8SF "") (VNx4DF "")
 		     (VNx32BF "b") (VNx32HF "") (VNx16SF "") (VNx8DF "")])
 
-(define_mode_attr is_bf16 [(VNx8BF "true")
-			   (VNx8HF "false")
-			   (VNx4SF "false")
+(define_mode_attr is_bf16 [(VNx2BF "true") (VNx4BF "true") (VNx8BF "true")
+			   (VNx2HF "false") (VNx4HF "false") (VNx8HF "false")
+			   (VNx2SF "false") (VNx4SF "false")
 			   (VNx2DF "false")])
 
 (define_mode_attr aligned_operand [(VNx16QI "register_operand")
@@ -2879,6 +2973,32 @@
 			  (geu "hs")
 			  (gtu "hi")])
 
+(define_code_attr inv_cmp_op [(lt "ge")
+			  (le "gt")
+			  (eq "ne")
+			  (ne "eq")
+			  (ge "lt")
+			  (gt "le")
+			  (ltu "hs")
+			  (leu "hi")
+			  (geu "lo")
+			  (gtu "ls")])
+
+(define_mode_attr cmpbr_suffix [(QI "b") (HI "h")])
+
+(define_code_iterator INT_CMP [lt le eq ne ge gt ltu leu geu gtu])
+
+;; Inverse comparisons must have the same constraint so that
+;; branches can be redirected during late compilation.
+(define_code_attr cmpbr_imm_constraint [
+    (eq "Uc0") (ne "Uc0")
+    (lt "Uc0") (ge "Uc0")
+    (ltu "Uc0") (geu "Uc0")
+
+    (gt "Uc1") (le "Uc1")
+    (gtu "Uc1") (leu "Uc1")
+])
+
 (define_code_attr fix_trunc_optab [(fix "fix_trunc")
 				   (unsigned_fix "fixuns_trunc")])
 
@@ -3254,6 +3374,10 @@
 (define_int_iterator SVE_INT_UNARY [UNSPEC_REVB
 				    UNSPEC_REVH UNSPEC_REVW])
 
+;; This iterator is currently only used for estimation instructions,
+;; which are never generated automatically when -ftrapping-math is true.
+;; The iterator is therefore applied unconditionally to partial FP modes.
+;; This might need to be revisited if new operations are added in future.
 (define_int_iterator SVE_FP_UNARY [UNSPEC_FRECPE UNSPEC_RSQRTE])
 
 (define_int_iterator SVE_FP_UNARY_INT [(UNSPEC_FEXPA "TARGET_NON_STREAMING")])
@@ -3266,6 +3390,10 @@
 (define_int_iterator SVE_INT_BINARY_MULTI [UNSPEC_SQDMULH
 					   UNSPEC_SRSHL UNSPEC_URSHL])
 
+;; This iterator is currently only used for estimation instructions,
+;; which are never generated automatically when -ftrapping-math is true.
+;; The iterator is therefore applied unconditionally to partial FP modes.
+;; This might need to be revisited if new operations are added in future.
 (define_int_iterator SVE_FP_BINARY [UNSPEC_FRECPS UNSPEC_RSQRTS])
 
 (define_int_iterator SVE_FP_BINARY_INT [UNSPEC_FTSMUL UNSPEC_FTSSEL])
@@ -3317,9 +3445,10 @@
 					   UNSPEC_FMINQV
 					   UNSPEC_FMINNMQV])
 
-(define_int_iterator SVE_COND_FP_UNARY [UNSPEC_COND_FABS
-					UNSPEC_COND_FNEG
-					UNSPEC_COND_FRECPX
+(define_int_iterator SVE_COND_FP_UNARY_BITWISE [UNSPEC_COND_FABS
+						UNSPEC_COND_FNEG])
+
+(define_int_iterator SVE_COND_FP_UNARY [UNSPEC_COND_FRECPX
 					UNSPEC_COND_FRINTA
 					UNSPEC_COND_FRINTI
 					UNSPEC_COND_FRINTM
@@ -3327,13 +3456,12 @@
 					UNSPEC_COND_FRINTP
 					UNSPEC_COND_FRINTX
 					UNSPEC_COND_FRINTZ
-					UNSPEC_COND_FSQRT])
+					UNSPEC_COND_FSQRT
+					SVE_COND_FP_UNARY_BITWISE])
 
 ;; Same as SVE_COND_FP_UNARY, but without codes that have a dedicated
 ;; <optab><mode>2 expander.
-(define_int_iterator SVE_COND_FP_UNARY_OPTAB [UNSPEC_COND_FABS
-					      UNSPEC_COND_FNEG
-					      UNSPEC_COND_FRECPX
+(define_int_iterator SVE_COND_FP_UNARY_OPTAB [UNSPEC_COND_FRECPX
 					      UNSPEC_COND_FRINTA
 					      UNSPEC_COND_FRINTI
 					      UNSPEC_COND_FRINTM
@@ -3756,6 +3884,8 @@
 
 (define_int_iterator SVE_PITER [UNSPEC_PFIRST UNSPEC_PNEXT])
 
+(define_int_iterator PNEXT_ONLY [UNSPEC_PNEXT])
+
 (define_int_iterator MATMUL [UNSPEC_SMATMUL UNSPEC_UMATMUL
 			     UNSPEC_USMATMUL])
 
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 1ab1c69..42304ce 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -123,7 +123,8 @@
 (define_predicate "aarch64_reg_or_and_imm"
    (ior (match_operand 0 "register_operand")
 	(and (match_code "const_vector")
-	     (match_test "aarch64_simd_valid_and_imm (op)"))))
+	     (ior (match_test "aarch64_simd_valid_and_imm (op)")
+		  (match_test "aarch64_simd_valid_and_imm_fmov (op)")))))
 
 (define_predicate "aarch64_reg_or_xor_imm"
    (ior (match_operand 0 "register_operand")
@@ -285,10 +286,15 @@
   (and (match_code "const_int")
        (match_test "UINTVAL (op) <= 7")))
 
-;; An immediate that fits into 24 bits.
-(define_predicate "aarch64_imm24"
-  (and (match_code "const_int")
-       (match_test "IN_RANGE (UINTVAL (op), 0, 0xffffff)")))
+;; An immediate that fits into 24 bits, but needs splitting.
+(define_predicate "aarch64_split_imm24"
+  (match_code "const_int")
+{
+  unsigned HOST_WIDE_INT i = UINTVAL (op);
+  return (IN_RANGE (i, 0, 0xffffff)
+          && !aarch64_move_imm (i, mode)
+          && !aarch64_uimm12_shift (i));
+})
 
 (define_predicate "aarch64_mem_pair_offset"
   (and (match_code "const_int")
@@ -586,6 +592,11 @@
   return aarch64_simd_shift_imm_p (op, mode, false);
 })
 
+(define_special_predicate "aarch64_predicate_operand"
+  (and (match_code "reg,subreg")
+       (match_test "register_operand (op, GET_MODE (op))")
+       (match_test "aarch64_sve_valid_pred_p (op, mode)")))
+
 (define_predicate "aarch64_simd_imm_zero"
   (and (match_code "const,const_vector")
        (match_test "op == CONST0_RTX (GET_MODE (op))")))
@@ -1070,5 +1081,21 @@
 		    && !(INTVAL (op) & 0xf)")))
 
 (define_predicate "aarch64_maskload_else_operand"
-  (and (match_code "const_int,const_vector")
+  (and (match_code "const_vector")
        (match_test "op == CONST0_RTX (GET_MODE (op))")))
+
+;; Check for a VNx16BI predicate that is a canonical PTRUE for the given
+;; predicate mode.
+(define_special_predicate "aarch64_ptrue_all_operand"
+  (and (match_code "const_vector")
+       (match_test "aarch64_ptrue_all_mode (op) == mode")))
+
+(define_predicate "aarch64_reg_Uc0_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "const_int")
+	    (match_test "satisfies_constraint_Uc0 (op)"))))
+
+(define_predicate "aarch64_reg_Uc1_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_code "const_int")
+	    (match_test "satisfies_constraint_Uc1 (op)"))))
diff --git a/gcc/config/aarch64/t-aarch64 b/gcc/config/aarch64/t-aarch64
index 5957194..63ca8e9 100644
--- a/gcc/config/aarch64/t-aarch64
+++ b/gcc/config/aarch64/t-aarch64
@@ -140,6 +140,17 @@ aarch-common.o: $(srcdir)/config/arm/aarch-common.cc $(CONFIG_H) $(SYSTEM_H) \
 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
 		$(srcdir)/config/arm/aarch-common.cc
 
+aarch64-elf-metadata.o: $(srcdir)/config/aarch64/aarch64-elf-metadata.cc \
+    $(CONFIG_H) \
+    $(BACKEND_H) \
+    $(RTL_H) \
+    $(SYSTEM_H) \
+    $(TARGET_H) \
+    $(srcdir)/config/aarch64/aarch64-elf-metadata.h \
+    output.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_SPPFLAGS) $(INCLUDES) \
+	  $(srcdir)/config/aarch64/aarch64-elf-metadata.cc
+
 aarch64-c.o: $(srcdir)/config/aarch64/aarch64-c.cc $(CONFIG_H) $(SYSTEM_H) \
     coretypes.h $(TM_H) $(TREE_H) output.h $(C_COMMON_H) $(TARGET_H)
 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
@@ -179,12 +190,6 @@ aarch-bti-insert.o: $(srcdir)/config/arm/aarch-bti-insert.cc \
 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
 		$(srcdir)/config/arm/aarch-bti-insert.cc
 
-aarch64-cc-fusion.o: $(srcdir)/config/aarch64/aarch64-cc-fusion.cc \
-    $(CONFIG_H) $(SYSTEM_H) $(CORETYPES_H) $(BACKEND_H) $(RTL_H) $(DF_H) \
-    $(RTL_SSA_H) tree-pass.h
-	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
-		$(srcdir)/config/aarch64/aarch64-cc-fusion.cc
-
 aarch64-early-ra.o: $(srcdir)/config/aarch64/aarch64-early-ra.cc \
     $(CONFIG_H) $(SYSTEM_H) $(CORETYPES_H) $(BACKEND_H) $(RTL_H) $(DF_H) \
     $(RTL_SSA_H) tree-pass.h
diff --git a/gcc/config/aarch64/tuning_models/cortexx925.h b/gcc/config/aarch64/tuning_models/cortexx925.h
index 7d0162e..f448493 100644
--- a/gcc/config/aarch64/tuning_models/cortexx925.h
+++ b/gcc/config/aarch64/tuning_models/cortexx925.h
@@ -222,7 +222,8 @@ static const struct tune_params cortexx925_tunings =
   (AARCH64_EXTRA_TUNE_BASE
    | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
    | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
-   | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW),	/* tune_flags.  */
+   | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW
+   | AARCH64_EXTRA_TUNE_AVOID_LDAPUR),	/* tune_flags.  */
   &generic_armv9a_prefetch_tune,
   AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
   AARCH64_LDP_STP_POLICY_ALWAYS	   /* stp_policy_model.  */
diff --git a/gcc/config/aarch64/tuning_models/generic_armv9_a.h b/gcc/config/aarch64/tuning_models/generic_armv9_a.h
index f76a250..9eb1a20 100644
--- a/gcc/config/aarch64/tuning_models/generic_armv9_a.h
+++ b/gcc/config/aarch64/tuning_models/generic_armv9_a.h
@@ -26,7 +26,7 @@
 static const struct cpu_addrcost_table generic_armv9_a_addrcost_table =
 {
     {
-      1, /* hi  */
+      0, /* hi  */
       0, /* si  */
       0, /* di  */
       1, /* ti  */
diff --git a/gcc/config/aarch64/tuning_models/neoversev2.h b/gcc/config/aarch64/tuning_models/neoversev2.h
index b000fb4..266d8f1 100644
--- a/gcc/config/aarch64/tuning_models/neoversev2.h
+++ b/gcc/config/aarch64/tuning_models/neoversev2.h
@@ -220,7 +220,8 @@ static const struct tune_params neoversev2_tunings =
   (AARCH64_EXTRA_TUNE_BASE
    | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
    | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
-   | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW),	/* tune_flags.  */
+   | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW
+   | AARCH64_EXTRA_TUNE_AVOID_LDAPUR),	/* tune_flags.  */
   &generic_armv9a_prefetch_tune,
   AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
   AARCH64_LDP_STP_POLICY_ALWAYS	   /* stp_policy_model.  */
diff --git a/gcc/config/aarch64/tuning_models/neoversev3.h b/gcc/config/aarch64/tuning_models/neoversev3.h
index ad3cd22..f5566d2 100644
--- a/gcc/config/aarch64/tuning_models/neoversev3.h
+++ b/gcc/config/aarch64/tuning_models/neoversev3.h
@@ -220,7 +220,8 @@ static const struct tune_params neoversev3_tunings =
   (AARCH64_EXTRA_TUNE_BASE
    | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
    | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
-   | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW),	/* tune_flags.  */
+   | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW
+   | AARCH64_EXTRA_TUNE_AVOID_LDAPUR),	/* tune_flags.  */
   &generic_armv9a_prefetch_tune,
   AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
   AARCH64_LDP_STP_POLICY_ALWAYS	   /* stp_policy_model.  */
diff --git a/gcc/config/aarch64/tuning_models/neoversev3ae.h b/gcc/config/aarch64/tuning_models/neoversev3ae.h
index a0adef0..5796e52 100644
--- a/gcc/config/aarch64/tuning_models/neoversev3ae.h
+++ b/gcc/config/aarch64/tuning_models/neoversev3ae.h
@@ -220,7 +220,8 @@ static const struct tune_params neoversev3ae_tunings =
   (AARCH64_EXTRA_TUNE_BASE
    | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
    | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
-   | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW),	/* tune_flags.  */
+   | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW
+   | AARCH64_EXTRA_TUNE_AVOID_LDAPUR),	/* tune_flags.  */
   &generic_armv9a_prefetch_tune,
   AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
   AARCH64_LDP_STP_POLICY_ALWAYS	   /* stp_policy_model.  */
diff --git a/gcc/config/aarch64/tuning_models/olympus.h b/gcc/config/aarch64/tuning_models/olympus.h
new file mode 100644
index 0000000..268789d
--- /dev/null
+++ b/gcc/config/aarch64/tuning_models/olympus.h
@@ -0,0 +1,210 @@
+/* Tuning model description for the NVIDIA Olympus core.
+   Copyright The GNU Toolchain Authors.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_AARCH64_H_OLYMPUS
+#define GCC_AARCH64_H_OLYMPUS
+
+#include "generic.h"
+
+static struct cpu_regmove_cost olympus_regmove_cost =
+{
+  1, /* GP2GP  */
+  /* Spilling to int<->fp instead of memory is recommended so set
+     realistic costs compared to memmov_cost.  */
+  3, /* GP2FP  */
+  3, /* FP2GP  */
+  2 /* FP2FP  */
+};
+
+static advsimd_vec_cost olympus_advsimd_vector_cost =
+{
+  2, /* int_stmt_cost  */
+  2, /* fp_stmt_cost  */
+  2, /* ld2_st2_permute_cost */
+  2, /* ld3_st3_permute_cost  */
+  3, /* ld4_st4_permute_cost  */
+  2, /* permute_cost  */
+  5, /* reduc_i8_cost  */
+  3, /* reduc_i16_cost  */
+  3, /* reduc_i32_cost  */
+  2, /* reduc_i64_cost  */
+  4, /* reduc_f16_cost  */
+  4, /* reduc_f32_cost  */
+  4, /* reduc_f64_cost  */
+  2, /* store_elt_extra_cost  */
+  8, /* vec_to_scalar_cost  */
+  4, /* scalar_to_vec_cost  */
+  6, /* align_load_cost  */
+  6, /* unalign_load_cost  */
+  1, /* unalign_store_cost  */
+  1  /* store_cost  */
+};
+
+static sve_vec_cost olympus_sve_vector_cost =
+{
+  {
+    2, /* int_stmt_cost  */
+    2, /* fp_stmt_cost  */
+    2, /* ld2_st2_permute_cost  */
+    3, /* ld3_st3_permute_cost  */
+    3, /* ld4_st4_permute_cost  */
+    2, /* permute_cost  */
+    9, /* reduc_i8_cost  */
+    8, /* reduc_i16_cost  */
+    6, /* reduc_i32_cost  */
+    2, /* reduc_i64_cost  */
+    8, /* reduc_f16_cost  */
+    6, /* reduc_f32_cost  */
+    4, /* reduc_f64_cost  */
+    2, /* store_elt_extra_cost  */
+    8, /* vec_to_scalar_cost  */
+    4, /* scalar_to_vec_cost  */
+    4, /* align_load_cost  */
+    6, /* unalign_load_cost  */
+    1, /* unalign_store_cost  */
+    1  /* store_cost  */
+  },
+  3, /* clast_cost  */
+  10, /* fadda_f16_cost  */
+  6, /* fadda_f32_cost  */
+  4, /* fadda_f64_cost  */
+  14, /* gather_load_x32_cost  */
+  12, /* gather_load_x64_cost  */
+  42, /* gather_load_x32_init_cost  */
+  24, /* gather_load_x64_init_cost  */
+  1 /* scatter_store_elt_cost  */
+};
+
+static aarch64_scalar_vec_issue_info olympus_scalar_issue_info =
+{
+  4, /* loads_stores_per_cycle  */
+  2, /* stores_per_cycle  */
+  8, /* general_ops_per_cycle  */
+  0, /* fp_simd_load_general_ops  */
+  1 /* fp_simd_store_general_ops  */
+};
+
+static aarch64_advsimd_vec_issue_info olympus_advsimd_issue_info =
+{
+  {
+    3, /* loads_stores_per_cycle  */
+    2, /* stores_per_cycle  */
+    6, /* general_ops_per_cycle  */
+    0, /* fp_simd_load_general_ops  */
+    1 /* fp_simd_store_general_ops  */
+  },
+  2, /* ld2_st2_general_ops  */
+  2, /* ld3_st3_general_ops  */
+  3 /* ld4_st4_general_ops  */
+};
+
+static aarch64_sve_vec_issue_info olympus_sve_issue_info =
+{
+  {
+    {
+      3, /* loads_stores_per_cycle  */
+      2, /* stores_per_cycle  */
+      6, /* general_ops_per_cycle  */
+      0, /* fp_simd_load_general_ops  */
+      1 /* fp_simd_store_general_ops  */
+    },
+    2, /* ld2_st2_general_ops  */
+    2, /* ld3_st3_general_ops  */
+    3 /* ld4_st4_general_ops  */
+  },
+  2, /* pred_ops_per_cycle  */
+  1, /* while_pred_ops  */
+  0, /* int_cmp_pred_ops  */
+  0, /* fp_cmp_pred_ops  */
+  1, /* gather_scatter_pair_general_ops  */
+  1 /* gather_scatter_pair_pred_ops  */
+};
+
+static aarch64_vec_issue_info olympus_vec_issue_info =
+{
+  &olympus_scalar_issue_info,
+  &olympus_advsimd_issue_info,
+  &olympus_sve_issue_info
+};
+
+/* Olympus costs for vector insn classes.  */
+static struct cpu_vector_cost olympus_vector_cost =
+{
+  1, /* scalar_int_stmt_cost  */
+  2, /* scalar_fp_stmt_cost  */
+  4, /* scalar_load_cost  */
+  1, /* scalar_store_cost  */
+  1, /* cond_taken_branch_cost  */
+  1, /* cond_not_taken_branch_cost  */
+  &olympus_advsimd_vector_cost, /* advsimd  */
+  &olympus_sve_vector_cost, /* sve  */
+  &olympus_vec_issue_info /* issue_info  */
+};
+
+/* Olympus prefetch settings (which disable prefetch).  */
+static cpu_prefetch_tune olympus_prefetch_tune =
+{
+  0,			/* num_slots  */
+  -1,			/* l1_cache_size  */
+  64,			/* l1_cache_line_size  */
+  -1,			/* l2_cache_size  */
+  true,			/* prefetch_dynamic_strides */
+  -1,			/* minimum_stride */
+  -1			/* default_opt_level  */
+};
+
+static struct tune_params olympus_tunings =
+{
+  &cortexa76_extra_costs,
+  &generic_armv9_a_addrcost_table,
+  &olympus_regmove_cost,
+  &olympus_vector_cost,
+  &generic_branch_cost,
+  &generic_approx_modes,
+  SVE_128, /* sve_width  */
+  { 4, /* load_int.  */
+    1, /* store_int.  */
+    6, /* load_fp.  */
+    3, /* store_fp.  */
+    5, /* load_pred.  */
+    1 /* store_pred.  */
+  }, /* memmov_cost.  */
+  10, /* issue_rate  */
+  AARCH64_FUSE_NEOVERSE_BASE, /* fusible_ops  */
+  "32:16",	/* function_align.  */
+  "4",		/* jump_align.  */
+  "32:16",	/* loop_align.  */
+  8,	/* int_reassoc_width.  */
+  6,	/* fp_reassoc_width.  */
+  4,	/* fma_reassoc_width.  */
+  6,	/* vec_reassoc_width.  */
+  2,	/* min_div_recip_mul_sf.  */
+  2,	/* min_div_recip_mul_df.  */
+  0,	/* max_case_values.  */
+  tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
+  (AARCH64_EXTRA_TUNE_BASE
+   | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
+   | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
+   | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW),	/* tune_flags.  */
+  &olympus_prefetch_tune,
+  AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
+  AARCH64_LDP_STP_POLICY_ALWAYS	   /* stp_policy_model.  */
+};
+
+#endif /* GCC_AARCH64_H_OLYMPUS.  */
diff --git a/gcc/config/alpha/alpha.cc b/gcc/config/alpha/alpha.cc
index ba470d9..5082c1c 100644
--- a/gcc/config/alpha/alpha.cc
+++ b/gcc/config/alpha/alpha.cc
@@ -1036,8 +1036,7 @@ alpha_legitimize_address_1 (rtx x, rtx scratch, machine_mode mode)
 	    RTL_CONST_CALL_P (insn) = 1;
 	    use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
 
-	    insn = get_insns ();
-	    end_sequence ();
+	    insn = end_sequence ();
 
 	    emit_libcall_block (insn, dest, r0, x);
 	    return dest;
@@ -1059,8 +1058,7 @@ alpha_legitimize_address_1 (rtx x, rtx scratch, machine_mode mode)
 	    RTL_CONST_CALL_P (insn) = 1;
 	    use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
 
-	    insn = get_insns ();
-	    end_sequence ();
+	    insn = end_sequence ();
 
 	    eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
 				  UNSPEC_TLSLDM_CALL);
@@ -3214,8 +3212,7 @@ alpha_emit_xfloating_libcall (rtx func, rtx target, rtx operands[],
   CALL_INSN_FUNCTION_USAGE (tmp) = usage;
   RTL_CONST_CALL_P (tmp) = 1;
 
-  tmp = get_insns ();
-  end_sequence ();
+  tmp = end_sequence ();
 
   emit_libcall_block (tmp, target, reg, equiv);
 }
@@ -4291,14 +4288,10 @@ alpha_get_mem_rtx_alignment_and_offset (rtx expr, int &a, HOST_WIDE_INT &o)
 
   tree mem = MEM_EXPR (expr);
   if (mem != NULL_TREE)
-    switch (TREE_CODE (mem))
-      {
-      case MEM_REF:
-	tree_offset = mem_ref_offset (mem).force_shwi ();
-	tree_align = get_object_alignment (get_base_address (mem));
-	break;
+    {
+      HOST_WIDE_INT comp_offset = 0;
 
-      case COMPONENT_REF:
+      for (; TREE_CODE (mem) == COMPONENT_REF; mem = TREE_OPERAND (mem, 0))
 	{
 	  tree byte_offset = component_ref_field_offset (mem);
 	  tree bit_offset = DECL_FIELD_BIT_OFFSET (TREE_OPERAND (mem, 1));
@@ -4307,14 +4300,15 @@ alpha_get_mem_rtx_alignment_and_offset (rtx expr, int &a, HOST_WIDE_INT &o)
 	      || !poly_int_tree_p (byte_offset, &offset)
 	      || !tree_fits_shwi_p (bit_offset))
 	    break;
-	  tree_offset = offset + tree_to_shwi (bit_offset) / BITS_PER_UNIT;
+	  comp_offset += offset + tree_to_shwi (bit_offset) / BITS_PER_UNIT;
 	}
-	tree_align = get_object_alignment (get_base_address (mem));
-	break;
 
-      default:
-	break;
-      }
+      if (TREE_CODE (mem) == MEM_REF)
+	{
+	  tree_offset = comp_offset + mem_ref_offset (mem).force_shwi ();
+	  tree_align = get_object_alignment (get_base_address (mem));
+	}
+    }
 
   if (reg_align > mem_align)
     {
@@ -5599,8 +5593,7 @@ alpha_gp_save_rtx (void)
       m = validize_mem (m);
       emit_move_insn (m, pic_offset_table_rtx);
 
-      seq = get_insns ();
-      end_sequence ();
+      seq = end_sequence ();
 
       /* We used to simply emit the sequence after entry_of_function.
 	 However this breaks the CFG if the first instruction in the
diff --git a/gcc/config/arc/arc-modes.def b/gcc/config/arc/arc-modes.def
index cab46d7..7c7dff9 100644
--- a/gcc/config/arc/arc-modes.def
+++ b/gcc/config/arc/arc-modes.def
@@ -24,6 +24,7 @@ along with GCC; see the file COPYING3.  If not see
 
 CC_MODE (CC_ZN);
 CC_MODE (CC_Z);
+CC_MODE (CC_V);
 CC_MODE (CC_C);
 CC_MODE (CC_FP_GT);
 CC_MODE (CC_FP_GE);
diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h
index cd82aa1..2db643c 100644
--- a/gcc/config/arc/arc-protos.h
+++ b/gcc/config/arc/arc-protos.h
@@ -55,6 +55,7 @@ extern bool arc_check_mov_const (HOST_WIDE_INT );
 extern bool arc_split_mov_const (rtx *);
 extern bool arc_can_use_return_insn (void);
 extern bool arc_split_move_p (rtx *);
+extern void arc_gen_unlikely_cbranch (enum rtx_code, machine_mode, rtx);
 #endif /* RTX_CODE */
 
 
diff --git a/gcc/config/arc/arc.cc b/gcc/config/arc/arc.cc
index be4bd61..bb5db97 100644
--- a/gcc/config/arc/arc.cc
+++ b/gcc/config/arc/arc.cc
@@ -720,8 +720,6 @@ static rtx arc_legitimize_address_0 (rtx, rtx, machine_mode mode);
 #define TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P	\
   arc_no_speculation_in_delay_slots_p
 
-#undef TARGET_LRA_P
-#define TARGET_LRA_P hook_bool_void_true
 #define TARGET_REGISTER_PRIORITY arc_register_priority
 /* Stores with scaled offsets have different displacement ranges.  */
 #define TARGET_DIFFERENT_ADDR_DISPLACEMENT_P hook_bool_void_true
@@ -1438,6 +1436,13 @@ get_arc_condition_code (rtx comparison)
 	case GEU : return ARC_CC_NC;
 	default : gcc_unreachable ();
 	}
+    case E_CC_Vmode:
+      switch (GET_CODE (comparison))
+	{
+	case EQ : return ARC_CC_NV;
+	case NE : return ARC_CC_V;
+	default : gcc_unreachable ();
+	}
     case E_CC_FP_GTmode:
       if (TARGET_ARGONAUT_SET && TARGET_SPFP)
 	switch (GET_CODE (comparison))
@@ -1548,6 +1553,13 @@ arc_select_cc_mode (enum rtx_code op, rtx x, rtx y)
   machine_mode mode = GET_MODE (x);
   rtx x1;
 
+  /* Matches all instructions which can do .f and clobbers only Z flag.  */
+  if (GET_MODE_CLASS (mode) == MODE_INT
+      && y == const0_rtx
+      && GET_CODE (x) == MULT
+      && (op == EQ || op == NE))
+    return CC_Zmode;
+
   /* For an operation that sets the condition codes as a side-effect, the
      C and V flags is not set as for cmp, so we can only use comparisons where
      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
@@ -8220,8 +8232,7 @@ hwloop_optimize (hwloop_info loop)
   insn = emit_insn (gen_arc_lp (loop->start_label,
 				loop->end_label));
 
-  seq = get_insns ();
-  end_sequence ();
+  seq = end_sequence ();
 
   entry_after = BB_END (entry_bb);
   if (!single_succ_p (entry_bb) || vec_safe_length (loop->incoming) > 1
@@ -11546,6 +11557,21 @@ arc_libm_function_max_error (unsigned cfn, machine_mode mode,
   return default_libm_function_max_error (cfn, mode, boundary_p);
 }
 
+void
+arc_gen_unlikely_cbranch (enum rtx_code cmp, machine_mode cc_mode, rtx label)
+{
+  rtx cc_reg, x;
+
+  cc_reg = gen_rtx_REG (cc_mode, CC_REG);
+  label = gen_rtx_LABEL_REF (VOIDmode, label);
+
+  x = gen_rtx_fmt_ee (cmp, VOIDmode, cc_reg, const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, label, pc_rtx);
+
+  emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
+}
+
+
 #undef TARGET_USE_ANCHORS_FOR_SYMBOL_P
 #define TARGET_USE_ANCHORS_FOR_SYMBOL_P arc_use_anchors_for_symbol_p
 
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index 1344d9c..8f7e537 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -66,9 +66,9 @@
 ;;   I signed 12-bit immediate (for ARCompact)
 ;;   K  unsigned 3-bit immediate (for ARCompact)
 ;;   L  unsigned 6-bit immediate (for ARCompact)
-;;   M  unsinged 5-bit immediate (for ARCompact)
-;;   O  unsinged 7-bit immediate (for ARCompact)
-;;   P  unsinged 8-bit immediate (for ARCompact)
+;;   M  unsigned 5-bit immediate (for ARCompact)
+;;   O  unsigned 7-bit immediate (for ARCompact)
+;;   P  unsigned 8-bit immediate (for ARCompact)
 ;;   N  constant '1' (for ARCompact)
 
 
@@ -842,6 +842,9 @@ archs4x, archs4xd"
 ; Optab prefix for sign/zero-extending operations
 (define_code_attr su_optab [(sign_extend "") (zero_extend "u")])
 
+;; Code iterator for sign/zero extension
+(define_code_iterator ANY_EXTEND [sign_extend zero_extend])
+
 (define_insn "*<SEZ_prefix>xt<SQH_postfix>_cmp0_noout"
   [(set (match_operand 0 "cc_set_register" "")
 	(compare:CC_ZN (SEZ:SI (match_operand:SQH 1 "register_operand" "r"))
@@ -1068,11 +1071,67 @@ archs4x, archs4xd"
    (set_attr "cond" "set_zn")
    (set_attr "length" "*,4,4,4,8")])
 
-;; The next two patterns are for plos, ior, xor, and, and mult.
+(define_expand "<su_optab>mulvsi4"
+  [(ANY_EXTEND:DI (match_operand:SI 0 "register_operand"))
+   (ANY_EXTEND:DI (match_operand:SI 1 "register_operand"))
+   (ANY_EXTEND:DI (match_operand:SI 2 "register_operand"))
+   (label_ref (match_operand 3 "" ""))]
+  "TARGET_MPY"
+  {
+    emit_insn (gen_<su_optab>mulsi3_Vcmp (operands[0], operands[1],
+					  operands[2]));
+    arc_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]);
+    DONE;
+  })
+
+(define_insn "<su_optab>mulsi3_Vcmp"
+  [(parallel
+    [(set
+      (reg:CC_V CC_REG)
+      (compare:CC_V
+       (mult:DI
+	(ANY_EXTEND:DI (match_operand:SI 1 "register_operand"  "%0,r,r,r"))
+	(ANY_EXTEND:DI (match_operand:SI 2 "nonmemory_operand"  "I,L,r,C32")))
+       (ANY_EXTEND:DI (mult:SI (match_dup 1) (match_dup 2)))))
+     (set (match_operand:SI 0 "register_operand"	       "=r,r,r,r")
+	  (mult:SI (match_dup 1) (match_dup 2)))])]
+  "register_operand (operands[1], SImode)
+   || register_operand (operands[2], SImode)"
+  "mpy<su_optab>.f\\t%0,%1,%2"
+  [(set_attr "length" "4,4,4,8")
+   (set_attr "type"   "multi")])
+
+(define_insn "*mulsi3_cmp0"
+  [(set (reg:CC_Z CC_REG)
+	(compare:CC_Z
+	 (mult:SI
+	  (match_operand:SI 1 "register_operand"  "%r,0,r")
+	  (match_operand:SI 2 "nonmemory_operand" "rL,I,i"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "register_operand"    "=r,r,r")
+	(mult:SI (match_dup 1) (match_dup 2)))]
+ "TARGET_MPY"
+ "mpy%?.f\\t%0,%1,%2"
+ [(set_attr "length" "4,4,8")
+  (set_attr "type" "multi")])
+
+(define_insn "*mulsi3_cmp0_noout"
+  [(set (reg:CC_Z CC_REG)
+	(compare:CC_Z
+	 (mult:SI
+	  (match_operand:SI 0 "register_operand"   "%r,r,r")
+	  (match_operand:SI 1 "nonmemory_operand"  "rL,I,i"))
+	 (const_int 0)))]
+ "TARGET_MPY"
+ "mpy%?.f\\t0,%0,%1"
+ [(set_attr "length" "4,4,8")
+  (set_attr "type" "multi")])
+
+;; The next two patterns are for plus, ior, xor, and.
 (define_insn "*commutative_binary_cmp0_noout"
   [(set (match_operand 0 "cc_set_register" "")
 	(match_operator 4 "zn_compare_operator"
-	  [(match_operator:SI 3 "commutative_operator"
+	  [(match_operator:SI 3 "commutative_operator_sans_mult"
 	     [(match_operand:SI 1 "register_operand" "%r,r")
 	      (match_operand:SI 2 "nonmemory_operand" "rL,Cal")])
 	   (const_int 0)]))]
@@ -1085,7 +1144,7 @@ archs4x, archs4xd"
 (define_insn "*commutative_binary_cmp0"
   [(set (match_operand 3 "cc_set_register" "")
 	(match_operator 5 "zn_compare_operator"
-	  [(match_operator:SI 4 "commutative_operator"
+	  [(match_operator:SI 4 "commutative_operator_sans_mult"
 	     [(match_operand:SI 1 "register_operand"  "%0, 0,r,r")
 	      (match_operand:SI 2 "nonmemory_operand" "rL,rI,r,Cal")])
 	   (const_int 0)]))
@@ -2734,6 +2793,56 @@ archs4x, archs4xd"
 }
   [(set_attr "length" "8")])
 
+(define_insn "addsi3_v"
+ [(set (match_operand:SI       0 "register_operand"  "=r,r,r,  r")
+       (plus:SI (match_operand:SI 1 "register_operand"   "r,r,0,  r")
+		(match_operand:SI 2 "nonmemory_operand"  "r,L,I,C32")))
+  (set (reg:CC_V CC_REG)
+       (compare:CC_V (sign_extend:DI (plus:SI (match_dup 1)
+					      (match_dup 2)))
+		     (plus:DI (sign_extend:DI (match_dup 1))
+			      (sign_extend:DI (match_dup 2)))))]
+ ""
+ "add.f\\t%0,%1,%2"
+ [(set_attr "cond"   "set")
+  (set_attr "type"   "compare")
+  (set_attr "length" "4,4,4,8")])
+
+(define_expand "addvsi4"
+  [(match_operand:SI 0 "register_operand")
+   (match_operand:SI 1 "register_operand")
+   (match_operand:SI 2 "nonmemory_operand")
+   (label_ref (match_operand 3 "" ""))]
+  ""
+  "emit_insn (gen_addsi3_v (operands[0], operands[1], operands[2]));
+   arc_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]);
+   DONE;")
+
+(define_insn "addsi3_c"
+ [(set (match_operand:SI       0 "register_operand"  "=r,r,r,  r")
+       (plus:SI (match_operand:SI 1 "register_operand"   "r,r,0,  r")
+		(match_operand:SI 2 "nonmemory_operand"  "r,L,I,C32")))
+  (set (reg:CC_C CC_REG)
+       (compare:CC_C (plus:SI (match_dup 1)
+			      (match_dup 2))
+		     (match_dup 1)))]
+ ""
+ "add.f\\t%0,%1,%2"
+ [(set_attr "cond"   "set")
+  (set_attr "type"   "compare")
+  (set_attr "length" "4,4,4,8")])
+
+(define_expand "uaddvsi4"
+  [(match_operand:SI 0 "register_operand")
+   (match_operand:SI 1 "register_operand")
+   (match_operand:SI 2 "nonmemory_operand")
+   (label_ref (match_operand 3 "" ""))]
+  ""
+  "emit_insn (gen_addsi3_c (operands[0], operands[1], operands[2]));
+   arc_gen_unlikely_cbranch (LTU, CC_Cmode, operands[3]);
+   DONE;")
+
+
 (define_insn "add_f"
   [(set (reg:CC_C CC_REG)
 	(compare:CC_C
@@ -2914,6 +3023,54 @@ archs4x, archs4xd"
   (set_attr "cpu_facility" "*,cd,*,*,*,*,*,*,*,*")
   ])
 
+(define_insn "subsi3_v"
+  [(set (match_operand:SI	 0 "register_operand"  "=r,r,r,  r")
+	(minus:SI (match_operand:SI 1 "register_operand"   "r,r,0,  r")
+		  (match_operand:SI 2 "nonmemory_operand"  "r,L,I,C32")))
+   (set (reg:CC_V CC_REG)
+	(compare:CC_V (sign_extend:DI (minus:SI (match_dup 1)
+						(match_dup 2)))
+		      (minus:DI (sign_extend:DI (match_dup 1))
+				(sign_extend:DI (match_dup 2)))))]
+   ""
+   "sub.f\\t%0,%1,%2"
+   [(set_attr "cond"	"set")
+    (set_attr "type"	"compare")
+    (set_attr "length"	"4,4,4,8")])
+
+(define_expand "subvsi4"
+ [(match_operand:SI 0 "register_operand")
+  (match_operand:SI 1 "register_operand")
+  (match_operand:SI 2 "nonmemory_operand")
+  (label_ref (match_operand 3 "" ""))]
+  ""
+  "emit_insn (gen_subsi3_v (operands[0], operands[1], operands[2]));
+   arc_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]);
+   DONE;")
+
+(define_insn "subsi3_c"
+  [(set (match_operand:SI	 0 "register_operand"	"=r,r,r,  r")
+	(minus:SI (match_operand:SI 1 "register_operand"	 "r,r,0,  r")
+		  (match_operand:SI 2 "nonmemory_operand"	 "r,L,I,C32")))
+   (set (reg:CC_C CC_REG)
+	(compare:CC_C (match_dup 1)
+		      (match_dup 2)))]
+   ""
+   "sub.f\\t%0,%1,%2"
+   [(set_attr "cond"	"set")
+    (set_attr "type"	"compare")
+    (set_attr "length"	"4,4,4,8")])
+
+(define_expand "usubvsi4"
+  [(match_operand:SI 0 "register_operand")
+   (match_operand:SI 1 "register_operand")
+   (match_operand:SI 2 "nonmemory_operand")
+   (label_ref (match_operand 3 "" ""))]
+   ""
+   "emit_insn (gen_subsi3_c (operands[0], operands[1], operands[2]));
+    arc_gen_unlikely_cbranch (LTU, CC_Cmode, operands[3]);
+    DONE;")
+
 (define_expand "subdi3"
   [(set (match_operand:DI 0 "register_operand" "")
 	(minus:DI (match_operand:DI 1 "register_operand" "")
diff --git a/gcc/config/arc/predicates.md b/gcc/config/arc/predicates.md
index 209cda9..f506cee 100644
--- a/gcc/config/arc/predicates.md
+++ b/gcc/config/arc/predicates.md
@@ -419,6 +419,8 @@
       return code == EQ || code == NE;
     case E_CC_Cmode:
       return code == LTU || code == GEU;
+    case E_CC_Vmode:
+      return code == EQ || code == NE;
     case E_CC_FP_GTmode:
       return code == GT || code == UNLE;
     case E_CC_FP_GEmode:
@@ -451,7 +453,12 @@
 })
 
 (define_predicate "equality_comparison_operator"
-  (match_code "eq, ne"))
+  (match_code "eq, ne")
+  {
+    machine_mode opmode = GET_MODE (XEXP (op, 0));
+    return opmode != CC_Vmode;
+  }
+)
 
 (define_predicate "ge_lt_comparison_operator"
   (match_code "ge, lt"))
diff --git a/gcc/config/arm/aarch-common.cc b/gcc/config/arm/aarch-common.cc
index 3289853..9cd926e 100644
--- a/gcc/config/arm/aarch-common.cc
+++ b/gcc/config/arm/aarch-common.cc
@@ -655,8 +655,7 @@ arm_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
 	  emit_move_insn (dest, tmp);
 	}
     }
-  rtx_insn *seq = get_insns ();
-  end_sequence ();
+  rtx_insn *seq = end_sequence ();
 
   return saw_asm_flag ? seq : NULL;
 }
diff --git a/gcc/config/arm/aarch-cost-tables.h b/gcc/config/arm/aarch-cost-tables.h
index c7a14b3..0600e59 100644
--- a/gcc/config/arm/aarch-cost-tables.h
+++ b/gcc/config/arm/aarch-cost-tables.h
@@ -123,9 +123,9 @@ const struct cpu_cost_table generic_extra_costs =
   {
     COSTS_N_INSNS (1),	/* alu.  */
     COSTS_N_INSNS (4),  /* mult.  */
-    COSTS_N_INSNS (1),  /* movi.  */
-    COSTS_N_INSNS (2),  /* dup.  */
-    COSTS_N_INSNS (2)   /* extract.  */
+    COSTS_N_INSNS (0),  /* movi.  */
+    COSTS_N_INSNS (1),  /* dup.  */
+    COSTS_N_INSNS (1)   /* extract.  */
   }
 };
 
@@ -230,9 +230,9 @@ const struct cpu_cost_table cortexa53_extra_costs =
   {
     COSTS_N_INSNS (1),	/* alu.  */
     COSTS_N_INSNS (4),  /* mult.  */
-    COSTS_N_INSNS (1),  /* movi.  */
-    COSTS_N_INSNS (2),  /* dup.  */
-    COSTS_N_INSNS (2)   /* extract.  */
+    COSTS_N_INSNS (0),  /* movi.  */
+    COSTS_N_INSNS (1),  /* dup.  */
+    COSTS_N_INSNS (1)   /* extract.  */
   }
 };
 
@@ -337,9 +337,9 @@ const struct cpu_cost_table cortexa57_extra_costs =
   {
     COSTS_N_INSNS (1),  /* alu.  */
     COSTS_N_INSNS (4),  /* mult.  */
-    COSTS_N_INSNS (1),  /* movi.  */
-    COSTS_N_INSNS (2),  /* dup.  */
-    COSTS_N_INSNS (2)   /* extract.  */
+    COSTS_N_INSNS (0),  /* movi.  */
+    COSTS_N_INSNS (1),  /* dup.  */
+    COSTS_N_INSNS (1)   /* extract.  */
   }
 };
 
@@ -444,9 +444,9 @@ const struct cpu_cost_table cortexa76_extra_costs =
   {
     COSTS_N_INSNS (1),  /* alu.  */
     COSTS_N_INSNS (4),  /* mult.  */
-    COSTS_N_INSNS (1),  /* movi.  */
-    COSTS_N_INSNS (2),  /* dup.  */
-    COSTS_N_INSNS (2)   /* extract.  */
+    COSTS_N_INSNS (0),  /* movi.  */
+    COSTS_N_INSNS (1),  /* dup.  */
+    COSTS_N_INSNS (1)   /* extract.  */
   }
 };
 
@@ -551,9 +551,9 @@ const struct cpu_cost_table exynosm1_extra_costs =
   {
     COSTS_N_INSNS (0),  /* alu.  */
     COSTS_N_INSNS (4),  /* mult.  */
-    COSTS_N_INSNS (1),  /* movi.  */
-    COSTS_N_INSNS (2),  /* dup.  */
-    COSTS_N_INSNS (2)   /* extract.  */
+    COSTS_N_INSNS (0),  /* movi.  */
+    COSTS_N_INSNS (1),  /* dup.  */
+    COSTS_N_INSNS (1)   /* extract.  */
   }
 };
 
@@ -658,9 +658,9 @@ const struct cpu_cost_table xgene1_extra_costs =
   {
     COSTS_N_INSNS (2),  /* alu.  */
     COSTS_N_INSNS (8),  /* mult.  */
-    COSTS_N_INSNS (1),  /* movi.  */
-    COSTS_N_INSNS (2),  /* dup.  */
-    COSTS_N_INSNS (2)   /* extract.  */
+    COSTS_N_INSNS (0),  /* movi.  */
+    COSTS_N_INSNS (1),  /* dup.  */
+    COSTS_N_INSNS (1)   /* extract.  */
   }
 };
 
diff --git a/gcc/config/arm/aout.h b/gcc/config/arm/aout.h
index cdce361..a9b0dfa 100644
--- a/gcc/config/arm/aout.h
+++ b/gcc/config/arm/aout.h
@@ -69,11 +69,6 @@
   "d20", "?20", "d21", "?21", "d22", "?22", "d23", "?23",	\
   "d24", "?24", "d25", "?25", "d26", "?26", "d27", "?27",	\
   "d28", "?28", "d29", "?29", "d30", "?30", "d31", "?31",	\
-  "wr0",   "wr1",   "wr2",   "wr3",				\
-  "wr4",   "wr5",   "wr6",   "wr7",				\
-  "wr8",   "wr9",   "wr10",  "wr11",				\
-  "wr12",  "wr13",  "wr14",  "wr15",				\
-  "wcgr0", "wcgr1", "wcgr2", "wcgr3",				\
   "cc", "vfpcc", "sfp", "afp", "apsrq", "apsrge", "p0",		\
   "ra_auth_code"						\
 }
diff --git a/gcc/config/arm/arm-builtins.cc b/gcc/config/arm/arm-builtins.cc
index c56ab5d..3bb2566 100644
--- a/gcc/config/arm/arm-builtins.cc
+++ b/gcc/config/arm/arm-builtins.cc
@@ -816,252 +816,6 @@ static arm_builtin_cde_datum cde_builtin_data[] =
 
 enum arm_builtins
 {
-  ARM_BUILTIN_GETWCGR0,
-  ARM_BUILTIN_GETWCGR1,
-  ARM_BUILTIN_GETWCGR2,
-  ARM_BUILTIN_GETWCGR3,
-
-  ARM_BUILTIN_SETWCGR0,
-  ARM_BUILTIN_SETWCGR1,
-  ARM_BUILTIN_SETWCGR2,
-  ARM_BUILTIN_SETWCGR3,
-
-  ARM_BUILTIN_WZERO,
-
-  ARM_BUILTIN_WAVG2BR,
-  ARM_BUILTIN_WAVG2HR,
-  ARM_BUILTIN_WAVG2B,
-  ARM_BUILTIN_WAVG2H,
-
-  ARM_BUILTIN_WACCB,
-  ARM_BUILTIN_WACCH,
-  ARM_BUILTIN_WACCW,
-
-  ARM_BUILTIN_WMACS,
-  ARM_BUILTIN_WMACSZ,
-  ARM_BUILTIN_WMACU,
-  ARM_BUILTIN_WMACUZ,
-
-  ARM_BUILTIN_WSADB,
-  ARM_BUILTIN_WSADBZ,
-  ARM_BUILTIN_WSADH,
-  ARM_BUILTIN_WSADHZ,
-
-  ARM_BUILTIN_WALIGNI,
-  ARM_BUILTIN_WALIGNR0,
-  ARM_BUILTIN_WALIGNR1,
-  ARM_BUILTIN_WALIGNR2,
-  ARM_BUILTIN_WALIGNR3,
-
-  ARM_BUILTIN_TMIA,
-  ARM_BUILTIN_TMIAPH,
-  ARM_BUILTIN_TMIABB,
-  ARM_BUILTIN_TMIABT,
-  ARM_BUILTIN_TMIATB,
-  ARM_BUILTIN_TMIATT,
-
-  ARM_BUILTIN_TMOVMSKB,
-  ARM_BUILTIN_TMOVMSKH,
-  ARM_BUILTIN_TMOVMSKW,
-
-  ARM_BUILTIN_TBCSTB,
-  ARM_BUILTIN_TBCSTH,
-  ARM_BUILTIN_TBCSTW,
-
-  ARM_BUILTIN_WMADDS,
-  ARM_BUILTIN_WMADDU,
-
-  ARM_BUILTIN_WPACKHSS,
-  ARM_BUILTIN_WPACKWSS,
-  ARM_BUILTIN_WPACKDSS,
-  ARM_BUILTIN_WPACKHUS,
-  ARM_BUILTIN_WPACKWUS,
-  ARM_BUILTIN_WPACKDUS,
-
-  ARM_BUILTIN_WADDB,
-  ARM_BUILTIN_WADDH,
-  ARM_BUILTIN_WADDW,
-  ARM_BUILTIN_WADDSSB,
-  ARM_BUILTIN_WADDSSH,
-  ARM_BUILTIN_WADDSSW,
-  ARM_BUILTIN_WADDUSB,
-  ARM_BUILTIN_WADDUSH,
-  ARM_BUILTIN_WADDUSW,
-  ARM_BUILTIN_WSUBB,
-  ARM_BUILTIN_WSUBH,
-  ARM_BUILTIN_WSUBW,
-  ARM_BUILTIN_WSUBSSB,
-  ARM_BUILTIN_WSUBSSH,
-  ARM_BUILTIN_WSUBSSW,
-  ARM_BUILTIN_WSUBUSB,
-  ARM_BUILTIN_WSUBUSH,
-  ARM_BUILTIN_WSUBUSW,
-
-  ARM_BUILTIN_WAND,
-  ARM_BUILTIN_WANDN,
-  ARM_BUILTIN_WOR,
-  ARM_BUILTIN_WXOR,
-
-  ARM_BUILTIN_WCMPEQB,
-  ARM_BUILTIN_WCMPEQH,
-  ARM_BUILTIN_WCMPEQW,
-  ARM_BUILTIN_WCMPGTUB,
-  ARM_BUILTIN_WCMPGTUH,
-  ARM_BUILTIN_WCMPGTUW,
-  ARM_BUILTIN_WCMPGTSB,
-  ARM_BUILTIN_WCMPGTSH,
-  ARM_BUILTIN_WCMPGTSW,
-
-  ARM_BUILTIN_TEXTRMSB,
-  ARM_BUILTIN_TEXTRMSH,
-  ARM_BUILTIN_TEXTRMSW,
-  ARM_BUILTIN_TEXTRMUB,
-  ARM_BUILTIN_TEXTRMUH,
-  ARM_BUILTIN_TEXTRMUW,
-  ARM_BUILTIN_TINSRB,
-  ARM_BUILTIN_TINSRH,
-  ARM_BUILTIN_TINSRW,
-
-  ARM_BUILTIN_WMAXSW,
-  ARM_BUILTIN_WMAXSH,
-  ARM_BUILTIN_WMAXSB,
-  ARM_BUILTIN_WMAXUW,
-  ARM_BUILTIN_WMAXUH,
-  ARM_BUILTIN_WMAXUB,
-  ARM_BUILTIN_WMINSW,
-  ARM_BUILTIN_WMINSH,
-  ARM_BUILTIN_WMINSB,
-  ARM_BUILTIN_WMINUW,
-  ARM_BUILTIN_WMINUH,
-  ARM_BUILTIN_WMINUB,
-
-  ARM_BUILTIN_WMULUM,
-  ARM_BUILTIN_WMULSM,
-  ARM_BUILTIN_WMULUL,
-
-  ARM_BUILTIN_PSADBH,
-  ARM_BUILTIN_WSHUFH,
-
-  ARM_BUILTIN_WSLLH,
-  ARM_BUILTIN_WSLLW,
-  ARM_BUILTIN_WSLLD,
-  ARM_BUILTIN_WSRAH,
-  ARM_BUILTIN_WSRAW,
-  ARM_BUILTIN_WSRAD,
-  ARM_BUILTIN_WSRLH,
-  ARM_BUILTIN_WSRLW,
-  ARM_BUILTIN_WSRLD,
-  ARM_BUILTIN_WRORH,
-  ARM_BUILTIN_WRORW,
-  ARM_BUILTIN_WRORD,
-  ARM_BUILTIN_WSLLHI,
-  ARM_BUILTIN_WSLLWI,
-  ARM_BUILTIN_WSLLDI,
-  ARM_BUILTIN_WSRAHI,
-  ARM_BUILTIN_WSRAWI,
-  ARM_BUILTIN_WSRADI,
-  ARM_BUILTIN_WSRLHI,
-  ARM_BUILTIN_WSRLWI,
-  ARM_BUILTIN_WSRLDI,
-  ARM_BUILTIN_WRORHI,
-  ARM_BUILTIN_WRORWI,
-  ARM_BUILTIN_WRORDI,
-
-  ARM_BUILTIN_WUNPCKIHB,
-  ARM_BUILTIN_WUNPCKIHH,
-  ARM_BUILTIN_WUNPCKIHW,
-  ARM_BUILTIN_WUNPCKILB,
-  ARM_BUILTIN_WUNPCKILH,
-  ARM_BUILTIN_WUNPCKILW,
-
-  ARM_BUILTIN_WUNPCKEHSB,
-  ARM_BUILTIN_WUNPCKEHSH,
-  ARM_BUILTIN_WUNPCKEHSW,
-  ARM_BUILTIN_WUNPCKEHUB,
-  ARM_BUILTIN_WUNPCKEHUH,
-  ARM_BUILTIN_WUNPCKEHUW,
-  ARM_BUILTIN_WUNPCKELSB,
-  ARM_BUILTIN_WUNPCKELSH,
-  ARM_BUILTIN_WUNPCKELSW,
-  ARM_BUILTIN_WUNPCKELUB,
-  ARM_BUILTIN_WUNPCKELUH,
-  ARM_BUILTIN_WUNPCKELUW,
-
-  ARM_BUILTIN_WABSB,
-  ARM_BUILTIN_WABSH,
-  ARM_BUILTIN_WABSW,
-
-  ARM_BUILTIN_WADDSUBHX,
-  ARM_BUILTIN_WSUBADDHX,
-
-  ARM_BUILTIN_WABSDIFFB,
-  ARM_BUILTIN_WABSDIFFH,
-  ARM_BUILTIN_WABSDIFFW,
-
-  ARM_BUILTIN_WADDCH,
-  ARM_BUILTIN_WADDCW,
-
-  ARM_BUILTIN_WAVG4,
-  ARM_BUILTIN_WAVG4R,
-
-  ARM_BUILTIN_WMADDSX,
-  ARM_BUILTIN_WMADDUX,
-
-  ARM_BUILTIN_WMADDSN,
-  ARM_BUILTIN_WMADDUN,
-
-  ARM_BUILTIN_WMULWSM,
-  ARM_BUILTIN_WMULWUM,
-
-  ARM_BUILTIN_WMULWSMR,
-  ARM_BUILTIN_WMULWUMR,
-
-  ARM_BUILTIN_WMULWL,
-
-  ARM_BUILTIN_WMULSMR,
-  ARM_BUILTIN_WMULUMR,
-
-  ARM_BUILTIN_WQMULM,
-  ARM_BUILTIN_WQMULMR,
-
-  ARM_BUILTIN_WQMULWM,
-  ARM_BUILTIN_WQMULWMR,
-
-  ARM_BUILTIN_WADDBHUSM,
-  ARM_BUILTIN_WADDBHUSL,
-
-  ARM_BUILTIN_WQMIABB,
-  ARM_BUILTIN_WQMIABT,
-  ARM_BUILTIN_WQMIATB,
-  ARM_BUILTIN_WQMIATT,
-
-  ARM_BUILTIN_WQMIABBN,
-  ARM_BUILTIN_WQMIABTN,
-  ARM_BUILTIN_WQMIATBN,
-  ARM_BUILTIN_WQMIATTN,
-
-  ARM_BUILTIN_WMIABB,
-  ARM_BUILTIN_WMIABT,
-  ARM_BUILTIN_WMIATB,
-  ARM_BUILTIN_WMIATT,
-
-  ARM_BUILTIN_WMIABBN,
-  ARM_BUILTIN_WMIABTN,
-  ARM_BUILTIN_WMIATBN,
-  ARM_BUILTIN_WMIATTN,
-
-  ARM_BUILTIN_WMIAWBB,
-  ARM_BUILTIN_WMIAWBT,
-  ARM_BUILTIN_WMIAWTB,
-  ARM_BUILTIN_WMIAWTT,
-
-  ARM_BUILTIN_WMIAWBBN,
-  ARM_BUILTIN_WMIAWBTN,
-  ARM_BUILTIN_WMIAWTBN,
-  ARM_BUILTIN_WMIAWTTN,
-
-  ARM_BUILTIN_WMERGE,
-
   ARM_BUILTIN_GET_FPSCR,
   ARM_BUILTIN_SET_FPSCR,
   ARM_BUILTIN_GET_FPSCR_NZCVQC,
@@ -1878,115 +1632,6 @@ struct builtin_description
 
 static const struct builtin_description bdesc_2arg[] =
 {
-#define IWMMXT_BUILTIN(code, string, builtin) \
-  { isa_bit_iwmmxt, CODE_FOR_##code, \
-    "__builtin_arm_" string,			     \
-    ARM_BUILTIN_##builtin, UNKNOWN, 0 },
-
-#define IWMMXT2_BUILTIN(code, string, builtin) \
-  { isa_bit_iwmmxt2, CODE_FOR_##code, \
-    "__builtin_arm_" string,			      \
-    ARM_BUILTIN_##builtin, UNKNOWN, 0 },
-
-  IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
-  IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
-  IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
-  IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
-  IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
-  IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
-  IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
-  IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
-  IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
-  IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
-  IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
-  IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
-  IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
-  IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
-  IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
-  IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
-  IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
-  IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
-  IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
-  IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
-  IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
-  IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
-  IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
-  IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
-  IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
-  IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
-  IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
-  IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
-  IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
-  IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
-  IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
-  IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
-  IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
-  IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
-  IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
-  IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
-  IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
-  IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
-  IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
-  IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
-  IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
-  IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
-  IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
-  IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
-  IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
-  IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
-  IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
-  IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
-  IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
-  IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
-  IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
-  IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
-  IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
-  IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
-  IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
-  IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
-  IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
-  IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
-  IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
-  IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
-  IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
-  IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
-  IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
-  IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
-  IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
-  IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
-  IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
-  IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
-  IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
-  IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
-  IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
-  IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
-  IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
-  IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
-  IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
-  IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
-  IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
-  IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
-
-#define IWMMXT_BUILTIN2(code, builtin) \
-  { isa_bit_iwmmxt, CODE_FOR_##code, NULL, \
-    ARM_BUILTIN_##builtin, UNKNOWN, 0 },
-
-#define IWMMXT2_BUILTIN2(code, builtin) \
-  { isa_bit_iwmmxt2, CODE_FOR_##code, NULL, \
-    ARM_BUILTIN_##builtin, UNKNOWN, 0 },
-
-  IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
-  IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
-  IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
-  IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
-  IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
-  IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
-  IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
-  IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
-  IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
-  IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
-
-
 #define FP_BUILTIN(L, U) \
   {isa_nobit, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
    UNKNOWN, 0},
@@ -2013,31 +1658,6 @@ static const struct builtin_description bdesc_2arg[] =
 
 static const struct builtin_description bdesc_1arg[] =
 {
-  IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
-  IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
-  IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
-  IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
-  IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
-  IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
-  IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
-  IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
-  IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
-  IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
-  IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
-  IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
-  IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
-  IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
-  IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
-  IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
-  IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
-  IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
-  IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
-  IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
-  IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
-  IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
-  IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
-  IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
-
 #define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
 #define CRYPTO2(L, U, R, A1, A2)
 #define CRYPTO3(L, U, R, A1, A2, A3)
@@ -2059,387 +1679,6 @@ static const struct builtin_description bdesc_3arg[] =
  };
 #undef CRYPTO_BUILTIN
 
-/* Set up all the iWMMXt builtins.  This is not called if
-   TARGET_IWMMXT is zero.  */
-
-static void
-arm_init_iwmmxt_builtins (void)
-{
-  const struct builtin_description * d;
-  size_t i;
-
-  tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
-  tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
-  tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
-
-  tree v8qi_ftype_v8qi_v8qi_int
-    = build_function_type_list (V8QI_type_node,
-				V8QI_type_node, V8QI_type_node,
-				integer_type_node, NULL_TREE);
-  tree v4hi_ftype_v4hi_int
-    = build_function_type_list (V4HI_type_node,
-				V4HI_type_node, integer_type_node, NULL_TREE);
-  tree v2si_ftype_v2si_int
-    = build_function_type_list (V2SI_type_node,
-				V2SI_type_node, integer_type_node, NULL_TREE);
-  tree v2si_ftype_di_di
-    = build_function_type_list (V2SI_type_node,
-				long_long_integer_type_node,
-				long_long_integer_type_node,
-				NULL_TREE);
-  tree di_ftype_di_int
-    = build_function_type_list (long_long_integer_type_node,
-				long_long_integer_type_node,
-				integer_type_node, NULL_TREE);
-  tree di_ftype_di_int_int
-    = build_function_type_list (long_long_integer_type_node,
-				long_long_integer_type_node,
-				integer_type_node,
-				integer_type_node, NULL_TREE);
-  tree int_ftype_v8qi
-    = build_function_type_list (integer_type_node,
-				V8QI_type_node, NULL_TREE);
-  tree int_ftype_v4hi
-    = build_function_type_list (integer_type_node,
-				V4HI_type_node, NULL_TREE);
-  tree int_ftype_v2si
-    = build_function_type_list (integer_type_node,
-				V2SI_type_node, NULL_TREE);
-  tree int_ftype_v8qi_int
-    = build_function_type_list (integer_type_node,
-				V8QI_type_node, integer_type_node, NULL_TREE);
-  tree int_ftype_v4hi_int
-    = build_function_type_list (integer_type_node,
-				V4HI_type_node, integer_type_node, NULL_TREE);
-  tree int_ftype_v2si_int
-    = build_function_type_list (integer_type_node,
-				V2SI_type_node, integer_type_node, NULL_TREE);
-  tree v8qi_ftype_v8qi_int_int
-    = build_function_type_list (V8QI_type_node,
-				V8QI_type_node, integer_type_node,
-				integer_type_node, NULL_TREE);
-  tree v4hi_ftype_v4hi_int_int
-    = build_function_type_list (V4HI_type_node,
-				V4HI_type_node, integer_type_node,
-				integer_type_node, NULL_TREE);
-  tree v2si_ftype_v2si_int_int
-    = build_function_type_list (V2SI_type_node,
-				V2SI_type_node, integer_type_node,
-				integer_type_node, NULL_TREE);
-  /* Miscellaneous.  */
-  tree v8qi_ftype_v4hi_v4hi
-    = build_function_type_list (V8QI_type_node,
-				V4HI_type_node, V4HI_type_node, NULL_TREE);
-  tree v4hi_ftype_v2si_v2si
-    = build_function_type_list (V4HI_type_node,
-				V2SI_type_node, V2SI_type_node, NULL_TREE);
-  tree v8qi_ftype_v4hi_v8qi
-    = build_function_type_list (V8QI_type_node,
-	                        V4HI_type_node, V8QI_type_node, NULL_TREE);
-  tree v2si_ftype_v4hi_v4hi
-    = build_function_type_list (V2SI_type_node,
-				V4HI_type_node, V4HI_type_node, NULL_TREE);
-  tree v2si_ftype_v8qi_v8qi
-    = build_function_type_list (V2SI_type_node,
-				V8QI_type_node, V8QI_type_node, NULL_TREE);
-  tree v4hi_ftype_v4hi_di
-    = build_function_type_list (V4HI_type_node,
-				V4HI_type_node, long_long_integer_type_node,
-				NULL_TREE);
-  tree v2si_ftype_v2si_di
-    = build_function_type_list (V2SI_type_node,
-				V2SI_type_node, long_long_integer_type_node,
-				NULL_TREE);
-  tree di_ftype_void
-    = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
-  tree int_ftype_void
-    = build_function_type_list (integer_type_node, NULL_TREE);
-  tree di_ftype_v8qi
-    = build_function_type_list (long_long_integer_type_node,
-				V8QI_type_node, NULL_TREE);
-  tree di_ftype_v4hi
-    = build_function_type_list (long_long_integer_type_node,
-				V4HI_type_node, NULL_TREE);
-  tree di_ftype_v2si
-    = build_function_type_list (long_long_integer_type_node,
-				V2SI_type_node, NULL_TREE);
-  tree v2si_ftype_v4hi
-    = build_function_type_list (V2SI_type_node,
-				V4HI_type_node, NULL_TREE);
-  tree v4hi_ftype_v8qi
-    = build_function_type_list (V4HI_type_node,
-				V8QI_type_node, NULL_TREE);
-  tree v8qi_ftype_v8qi
-    = build_function_type_list (V8QI_type_node,
-	                        V8QI_type_node, NULL_TREE);
-  tree v4hi_ftype_v4hi
-    = build_function_type_list (V4HI_type_node,
-	                        V4HI_type_node, NULL_TREE);
-  tree v2si_ftype_v2si
-    = build_function_type_list (V2SI_type_node,
-	                        V2SI_type_node, NULL_TREE);
-
-  tree di_ftype_di_v4hi_v4hi
-    = build_function_type_list (long_long_unsigned_type_node,
-				long_long_unsigned_type_node,
-				V4HI_type_node, V4HI_type_node,
-				NULL_TREE);
-
-  tree di_ftype_v4hi_v4hi
-    = build_function_type_list (long_long_unsigned_type_node,
-				V4HI_type_node,V4HI_type_node,
-				NULL_TREE);
-
-  tree v2si_ftype_v2si_v4hi_v4hi
-    = build_function_type_list (V2SI_type_node,
-                                V2SI_type_node, V4HI_type_node,
-                                V4HI_type_node, NULL_TREE);
-
-  tree v2si_ftype_v2si_v8qi_v8qi
-    = build_function_type_list (V2SI_type_node,
-                                V2SI_type_node, V8QI_type_node,
-                                V8QI_type_node, NULL_TREE);
-
-  tree di_ftype_di_v2si_v2si
-     = build_function_type_list (long_long_unsigned_type_node,
-                                 long_long_unsigned_type_node,
-                                 V2SI_type_node, V2SI_type_node,
-                                 NULL_TREE);
-
-   tree di_ftype_di_di_int
-     = build_function_type_list (long_long_unsigned_type_node,
-                                 long_long_unsigned_type_node,
-                                 long_long_unsigned_type_node,
-                                 integer_type_node, NULL_TREE);
-
-   tree void_ftype_int
-     = build_function_type_list (void_type_node,
-                                 integer_type_node, NULL_TREE);
-
-   tree v8qi_ftype_char
-     = build_function_type_list (V8QI_type_node,
-                                 signed_char_type_node, NULL_TREE);
-
-   tree v4hi_ftype_short
-     = build_function_type_list (V4HI_type_node,
-                                 short_integer_type_node, NULL_TREE);
-
-   tree v2si_ftype_int
-     = build_function_type_list (V2SI_type_node,
-                                 integer_type_node, NULL_TREE);
-
-  /* Normal vector binops.  */
-  tree v8qi_ftype_v8qi_v8qi
-    = build_function_type_list (V8QI_type_node,
-				V8QI_type_node, V8QI_type_node, NULL_TREE);
-  tree v4hi_ftype_v4hi_v4hi
-    = build_function_type_list (V4HI_type_node,
-				V4HI_type_node,V4HI_type_node, NULL_TREE);
-  tree v2si_ftype_v2si_v2si
-    = build_function_type_list (V2SI_type_node,
-				V2SI_type_node, V2SI_type_node, NULL_TREE);
-  tree di_ftype_di_di
-    = build_function_type_list (long_long_unsigned_type_node,
-				long_long_unsigned_type_node,
-				long_long_unsigned_type_node,
-				NULL_TREE);
-
-  /* Add all builtins that are more or less simple operations on two
-     operands.  */
-  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
-    {
-      /* Use one of the operands; the target can have a different mode for
-	 mask-generating compares.  */
-      machine_mode mode;
-      tree type;
-
-      if (d->name == 0
-	  || !(d->feature == isa_bit_iwmmxt
-	       || d->feature == isa_bit_iwmmxt2))
-	continue;
-
-      mode = insn_data[d->icode].operand[1].mode;
-
-      switch (mode)
-	{
-	case E_V8QImode:
-	  type = v8qi_ftype_v8qi_v8qi;
-	  break;
-	case E_V4HImode:
-	  type = v4hi_ftype_v4hi_v4hi;
-	  break;
-	case E_V2SImode:
-	  type = v2si_ftype_v2si_v2si;
-	  break;
-	case E_DImode:
-	  type = di_ftype_di_di;
-	  break;
-
-	default:
-	  gcc_unreachable ();
-	}
-
-      def_mbuiltin (d->feature, d->name, type, d->code);
-    }
-
-  /* Add the remaining MMX insns with somewhat more complicated types.  */
-#define iwmmx_mbuiltin(NAME, TYPE, CODE)			\
-  def_mbuiltin (isa_bit_iwmmxt, "__builtin_arm_" NAME, \
-		(TYPE), ARM_BUILTIN_ ## CODE)
-
-#define iwmmx2_mbuiltin(NAME, TYPE, CODE)                      \
-  def_mbuiltin (isa_bit_iwmmxt2, "__builtin_arm_" NAME, \
-		(TYPE),	ARM_BUILTIN_ ## CODE)
-
-  iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
-  iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
-  iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
-  iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
-  iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
-  iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
-  iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
-  iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
-  iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
-
-  iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
-  iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
-  iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
-  iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
-  iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
-  iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
-
-  iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
-  iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
-  iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
-  iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
-  iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
-  iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
-
-  iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
-  iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
-  iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
-  iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
-  iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
-  iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
-
-  iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
-  iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
-  iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
-  iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
-  iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
-  iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
-
-  iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
-
-  iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
-  iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
-  iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
-  iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
-  iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
-  iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
-  iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
-  iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
-  iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
-  iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
-
-  iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
-  iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
-  iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
-  iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
-  iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
-  iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
-  iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
-  iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
-  iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
-
-  iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
-  iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
-  iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
-
-  iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
-  iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
-  iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
-
-  iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
-  iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
-
-  iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
-  iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
-  iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
-  iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
-  iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
-  iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
-
-  iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
-  iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
-  iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
-  iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
-  iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
-  iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
-  iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
-  iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
-  iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
-  iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
-  iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
-  iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
-
-  iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
-  iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
-  iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
-  iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
-
-  iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
-  iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
-  iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
-  iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
-  iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
-  iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
-  iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
-
-  iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
-  iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
-  iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
-
-  iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
-  iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
-  iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
-  iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
-
-  iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
-  iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
-  iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
-  iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
-
-  iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
-  iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
-  iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
-  iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
-
-  iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
-  iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
-  iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
-  iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
-
-  iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
-  iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
-  iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
-  iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
-
-  iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
-  iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
-  iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
-  iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
-
-  iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
-
-  iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
-  iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
-  iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
-
-#undef iwmmx_mbuiltin
-#undef iwmmx2_mbuiltin
-}
-
 static void
 arm_init_fp16_builtins (void)
 {
@@ -2454,9 +1693,6 @@ arm_init_fp16_builtins (void)
 void
 arm_init_builtins (void)
 {
-  if (TARGET_REALLY_IWMMXT)
-    arm_init_iwmmxt_builtins ();
-
   /* This creates the arm_simd_floatHF_type_node so must come before
      arm_init_neon_builtins which uses it.  */
   arm_init_fp16_builtins ();
@@ -2546,15 +1782,11 @@ arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
    clear instructions.  */
 
 static rtx
-safe_vector_operand (rtx x, machine_mode mode)
+safe_vector_operand (rtx x, machine_mode mode ATTRIBUTE_UNUSED)
 {
   if (x != const0_rtx)
     return x;
-  x = gen_reg_rtx (mode);
-
-  emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
-			       : gen_rtx_SUBREG (DImode, x, 0)));
-  return x;
+  __builtin_unreachable ();
 }
 
 /* Function to expand ternary builtins.  */
@@ -3053,8 +2285,7 @@ constant_arg:
      builtin and error out if not.  */
   start_sequence ();
   emit_insn (pat);
-  insn = get_insns ();
-  end_sequence ();
+  insn = end_sequence ();
 
   if (recog_memoized (insn) < 0)
     error ("this builtin is not supported for this target");
@@ -3266,21 +2497,10 @@ arm_general_expand_builtin (unsigned int fcode,
   const struct builtin_description * d;
   enum insn_code    icode;
   tree              arg0;
-  tree              arg1;
-  tree              arg2;
   rtx               op0;
   rtx               op1;
-  rtx               op2;
   rtx               pat;
   size_t            i;
-  machine_mode tmode;
-  machine_mode mode0;
-  machine_mode mode1;
-  machine_mode mode2;
-  int opint;
-  int selector;
-  int mask;
-  int imm;
 
   if (fcode == ARM_BUILTIN_SIMD_LANE_CHECK)
     {
@@ -3369,499 +2589,6 @@ arm_general_expand_builtin (unsigned int fcode,
       emit_insn (gen_cstoresi4 (target, op1, target, const0_rtx));
       return target;
 
-    case ARM_BUILTIN_TEXTRMSB:
-    case ARM_BUILTIN_TEXTRMUB:
-    case ARM_BUILTIN_TEXTRMSH:
-    case ARM_BUILTIN_TEXTRMUH:
-    case ARM_BUILTIN_TEXTRMSW:
-    case ARM_BUILTIN_TEXTRMUW:
-      icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
-	       : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
-	       : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
-	       : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
-	       : CODE_FOR_iwmmxt_textrmw);
-
-      arg0 = CALL_EXPR_ARG (exp, 0);
-      arg1 = CALL_EXPR_ARG (exp, 1);
-      op0 = expand_normal (arg0);
-      op1 = expand_normal (arg1);
-      tmode = insn_data[icode].operand[0].mode;
-      mode0 = insn_data[icode].operand[1].mode;
-      mode1 = insn_data[icode].operand[2].mode;
-
-      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
-	op0 = copy_to_mode_reg (mode0, op0);
-      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
-	{
-	  /* @@@ better error message */
-	  error ("selector must be an immediate");
-	  return gen_reg_rtx (tmode);
-	}
-
-      opint = INTVAL (op1);
-      if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
-	{
-	  if (opint > 7 || opint < 0)
-	    error ("the range of selector should be in 0 to 7");
-	}
-      else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
-	{
-	  if (opint > 3 || opint < 0)
-	    error ("the range of selector should be in 0 to 3");
-	}
-      else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW.  */
-	{
-	  if (opint > 1 || opint < 0)
-	    error ("the range of selector should be in 0 to 1");
-	}
-
-      if (target == 0
-	  || GET_MODE (target) != tmode
-	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
-	target = gen_reg_rtx (tmode);
-      pat = GEN_FCN (icode) (target, op0, op1);
-      if (! pat)
-	return 0;
-      emit_insn (pat);
-      return target;
-
-    case ARM_BUILTIN_WALIGNI:
-      /* If op2 is immediate, call walighi, else call walighr.  */
-      arg0 = CALL_EXPR_ARG (exp, 0);
-      arg1 = CALL_EXPR_ARG (exp, 1);
-      arg2 = CALL_EXPR_ARG (exp, 2);
-      op0 = expand_normal (arg0);
-      op1 = expand_normal (arg1);
-      op2 = expand_normal (arg2);
-      if (CONST_INT_P (op2))
-        {
-	  icode = CODE_FOR_iwmmxt_waligni;
-          tmode = insn_data[icode].operand[0].mode;
-	  mode0 = insn_data[icode].operand[1].mode;
-	  mode1 = insn_data[icode].operand[2].mode;
-	  mode2 = insn_data[icode].operand[3].mode;
-          if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
-	    op0 = copy_to_mode_reg (mode0, op0);
-          if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
-	    op1 = copy_to_mode_reg (mode1, op1);
-          gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
-	  selector = INTVAL (op2);
-	  if (selector > 7 || selector < 0)
-	    error ("the range of selector should be in 0 to 7");
-	}
-      else
-        {
-	  icode = CODE_FOR_iwmmxt_walignr;
-          tmode = insn_data[icode].operand[0].mode;
-	  mode0 = insn_data[icode].operand[1].mode;
-	  mode1 = insn_data[icode].operand[2].mode;
-	  mode2 = insn_data[icode].operand[3].mode;
-          if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
-	    op0 = copy_to_mode_reg (mode0, op0);
-          if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
-	    op1 = copy_to_mode_reg (mode1, op1);
-          if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
-	    op2 = copy_to_mode_reg (mode2, op2);
-	}
-      if (target == 0
-	  || GET_MODE (target) != tmode
-	  || !(*insn_data[icode].operand[0].predicate) (target, tmode))
-	target = gen_reg_rtx (tmode);
-      pat = GEN_FCN (icode) (target, op0, op1, op2);
-      if (!pat)
-	return 0;
-      emit_insn (pat);
-      return target;
-
-    case ARM_BUILTIN_TINSRB:
-    case ARM_BUILTIN_TINSRH:
-    case ARM_BUILTIN_TINSRW:
-    case ARM_BUILTIN_WMERGE:
-      icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
-	       : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
-	       : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
-	       : CODE_FOR_iwmmxt_tinsrw);
-      arg0 = CALL_EXPR_ARG (exp, 0);
-      arg1 = CALL_EXPR_ARG (exp, 1);
-      arg2 = CALL_EXPR_ARG (exp, 2);
-      op0 = expand_normal (arg0);
-      op1 = expand_normal (arg1);
-      op2 = expand_normal (arg2);
-      tmode = insn_data[icode].operand[0].mode;
-      mode0 = insn_data[icode].operand[1].mode;
-      mode1 = insn_data[icode].operand[2].mode;
-      mode2 = insn_data[icode].operand[3].mode;
-
-      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
-	op0 = copy_to_mode_reg (mode0, op0);
-      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
-	op1 = copy_to_mode_reg (mode1, op1);
-      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
-	{
-	  error ("selector must be an immediate");
-	  return const0_rtx;
-	}
-      if (icode == CODE_FOR_iwmmxt_wmerge)
-	{
-	  selector = INTVAL (op2);
-	  if (selector > 7 || selector < 0)
-	    error ("the range of selector should be in 0 to 7");
-	}
-      if ((icode == CODE_FOR_iwmmxt_tinsrb)
-	  || (icode == CODE_FOR_iwmmxt_tinsrh)
-	  || (icode == CODE_FOR_iwmmxt_tinsrw))
-        {
-	  mask = 0x01;
-	  selector= INTVAL (op2);
-	  if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
-	    error ("the range of selector should be in 0 to 7");
-	  else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
-	    error ("the range of selector should be in 0 to 3");
-	  else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
-	    error ("the range of selector should be in 0 to 1");
-	  mask <<= selector;
-	  op2 = GEN_INT (mask);
-	}
-      if (target == 0
-	  || GET_MODE (target) != tmode
-	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
-	target = gen_reg_rtx (tmode);
-      pat = GEN_FCN (icode) (target, op0, op1, op2);
-      if (! pat)
-	return 0;
-      emit_insn (pat);
-      return target;
-
-    case ARM_BUILTIN_SETWCGR0:
-    case ARM_BUILTIN_SETWCGR1:
-    case ARM_BUILTIN_SETWCGR2:
-    case ARM_BUILTIN_SETWCGR3:
-      icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
-	       : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
-	       : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
-	       : CODE_FOR_iwmmxt_setwcgr3);
-      arg0 = CALL_EXPR_ARG (exp, 0);
-      op0 = expand_normal (arg0);
-      mode0 = insn_data[icode].operand[0].mode;
-      if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
-        op0 = copy_to_mode_reg (mode0, op0);
-      pat = GEN_FCN (icode) (op0);
-      if (!pat)
-	return 0;
-      emit_insn (pat);
-      return 0;
-
-    case ARM_BUILTIN_GETWCGR0:
-    case ARM_BUILTIN_GETWCGR1:
-    case ARM_BUILTIN_GETWCGR2:
-    case ARM_BUILTIN_GETWCGR3:
-      icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
-	       : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
-	       : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
-	       : CODE_FOR_iwmmxt_getwcgr3);
-      tmode = insn_data[icode].operand[0].mode;
-      if (target == 0
-	  || GET_MODE (target) != tmode
-	  || !(*insn_data[icode].operand[0].predicate) (target, tmode))
-        target = gen_reg_rtx (tmode);
-      pat = GEN_FCN (icode) (target);
-      if (!pat)
-        return 0;
-      emit_insn (pat);
-      return target;
-
-    case ARM_BUILTIN_WSHUFH:
-      icode = CODE_FOR_iwmmxt_wshufh;
-      arg0 = CALL_EXPR_ARG (exp, 0);
-      arg1 = CALL_EXPR_ARG (exp, 1);
-      op0 = expand_normal (arg0);
-      op1 = expand_normal (arg1);
-      tmode = insn_data[icode].operand[0].mode;
-      mode1 = insn_data[icode].operand[1].mode;
-      mode2 = insn_data[icode].operand[2].mode;
-
-      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
-	op0 = copy_to_mode_reg (mode1, op0);
-      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
-	{
-	  error ("mask must be an immediate");
-	  return const0_rtx;
-	}
-      selector = INTVAL (op1);
-      if (selector < 0 || selector > 255)
-	error ("the range of mask should be in 0 to 255");
-      if (target == 0
-	  || GET_MODE (target) != tmode
-	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
-	target = gen_reg_rtx (tmode);
-      pat = GEN_FCN (icode) (target, op0, op1);
-      if (! pat)
-	return 0;
-      emit_insn (pat);
-      return target;
-
-    case ARM_BUILTIN_WMADDS:
-      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
-    case ARM_BUILTIN_WMADDSX:
-      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
-    case ARM_BUILTIN_WMADDSN:
-      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
-    case ARM_BUILTIN_WMADDU:
-      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
-    case ARM_BUILTIN_WMADDUX:
-      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
-    case ARM_BUILTIN_WMADDUN:
-      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
-    case ARM_BUILTIN_WSADBZ:
-      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
-    case ARM_BUILTIN_WSADHZ:
-      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
-
-      /* Several three-argument builtins.  */
-    case ARM_BUILTIN_WMACS:
-    case ARM_BUILTIN_WMACU:
-    case ARM_BUILTIN_TMIA:
-    case ARM_BUILTIN_TMIAPH:
-    case ARM_BUILTIN_TMIATT:
-    case ARM_BUILTIN_TMIATB:
-    case ARM_BUILTIN_TMIABT:
-    case ARM_BUILTIN_TMIABB:
-    case ARM_BUILTIN_WQMIABB:
-    case ARM_BUILTIN_WQMIABT:
-    case ARM_BUILTIN_WQMIATB:
-    case ARM_BUILTIN_WQMIATT:
-    case ARM_BUILTIN_WQMIABBN:
-    case ARM_BUILTIN_WQMIABTN:
-    case ARM_BUILTIN_WQMIATBN:
-    case ARM_BUILTIN_WQMIATTN:
-    case ARM_BUILTIN_WMIABB:
-    case ARM_BUILTIN_WMIABT:
-    case ARM_BUILTIN_WMIATB:
-    case ARM_BUILTIN_WMIATT:
-    case ARM_BUILTIN_WMIABBN:
-    case ARM_BUILTIN_WMIABTN:
-    case ARM_BUILTIN_WMIATBN:
-    case ARM_BUILTIN_WMIATTN:
-    case ARM_BUILTIN_WMIAWBB:
-    case ARM_BUILTIN_WMIAWBT:
-    case ARM_BUILTIN_WMIAWTB:
-    case ARM_BUILTIN_WMIAWTT:
-    case ARM_BUILTIN_WMIAWBBN:
-    case ARM_BUILTIN_WMIAWBTN:
-    case ARM_BUILTIN_WMIAWTBN:
-    case ARM_BUILTIN_WMIAWTTN:
-    case ARM_BUILTIN_WSADB:
-    case ARM_BUILTIN_WSADH:
-      icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
-	       : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
-	       : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
-	       : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
-	       : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
-	       : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
-	       : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
-	       : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
-	       : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
-	       : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
-	       : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
-	       : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
-	       : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
-	       : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
-	       : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
-	       : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
-	       : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
-	       : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
-	       : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
-	       : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
-	       : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
-	       : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
-	       : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
-	       : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
-	       : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
-	       : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
-	       : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
-	       : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
-	       : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
-	       : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
-	       : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
-	       : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
-	       : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
-	       : CODE_FOR_iwmmxt_wsadh);
-      arg0 = CALL_EXPR_ARG (exp, 0);
-      arg1 = CALL_EXPR_ARG (exp, 1);
-      arg2 = CALL_EXPR_ARG (exp, 2);
-      op0 = expand_normal (arg0);
-      op1 = expand_normal (arg1);
-      op2 = expand_normal (arg2);
-      tmode = insn_data[icode].operand[0].mode;
-      mode0 = insn_data[icode].operand[1].mode;
-      mode1 = insn_data[icode].operand[2].mode;
-      mode2 = insn_data[icode].operand[3].mode;
-
-      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
-	op0 = copy_to_mode_reg (mode0, op0);
-      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
-	op1 = copy_to_mode_reg (mode1, op1);
-      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
-	op2 = copy_to_mode_reg (mode2, op2);
-      if (target == 0
-	  || GET_MODE (target) != tmode
-	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
-	target = gen_reg_rtx (tmode);
-      pat = GEN_FCN (icode) (target, op0, op1, op2);
-      if (! pat)
-	return 0;
-      emit_insn (pat);
-      return target;
-
-    case ARM_BUILTIN_WZERO:
-      target = gen_reg_rtx (DImode);
-      emit_insn (gen_iwmmxt_clrdi (target));
-      return target;
-
-    case ARM_BUILTIN_WSRLHI:
-    case ARM_BUILTIN_WSRLWI:
-    case ARM_BUILTIN_WSRLDI:
-    case ARM_BUILTIN_WSLLHI:
-    case ARM_BUILTIN_WSLLWI:
-    case ARM_BUILTIN_WSLLDI:
-    case ARM_BUILTIN_WSRAHI:
-    case ARM_BUILTIN_WSRAWI:
-    case ARM_BUILTIN_WSRADI:
-    case ARM_BUILTIN_WRORHI:
-    case ARM_BUILTIN_WRORWI:
-    case ARM_BUILTIN_WRORDI:
-    case ARM_BUILTIN_WSRLH:
-    case ARM_BUILTIN_WSRLW:
-    case ARM_BUILTIN_WSRLD:
-    case ARM_BUILTIN_WSLLH:
-    case ARM_BUILTIN_WSLLW:
-    case ARM_BUILTIN_WSLLD:
-    case ARM_BUILTIN_WSRAH:
-    case ARM_BUILTIN_WSRAW:
-    case ARM_BUILTIN_WSRAD:
-    case ARM_BUILTIN_WRORH:
-    case ARM_BUILTIN_WRORW:
-    case ARM_BUILTIN_WRORD:
-      icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
-	       : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
-	       : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
-	       : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
-	       : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
-	       : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
-	       : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
-	       : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
-	       : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
-	       : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
-	       : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
-	       : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
-	       : fcode == ARM_BUILTIN_WSRLH  ? CODE_FOR_lshrv4hi3_di
-	       : fcode == ARM_BUILTIN_WSRLW  ? CODE_FOR_lshrv2si3_di
-	       : fcode == ARM_BUILTIN_WSRLD  ? CODE_FOR_lshrdi3_di
-	       : fcode == ARM_BUILTIN_WSLLH  ? CODE_FOR_ashlv4hi3_di
-	       : fcode == ARM_BUILTIN_WSLLW  ? CODE_FOR_ashlv2si3_di
-	       : fcode == ARM_BUILTIN_WSLLD  ? CODE_FOR_ashldi3_di
-	       : fcode == ARM_BUILTIN_WSRAH  ? CODE_FOR_ashrv4hi3_di
-	       : fcode == ARM_BUILTIN_WSRAW  ? CODE_FOR_ashrv2si3_di
-	       : fcode == ARM_BUILTIN_WSRAD  ? CODE_FOR_ashrdi3_di
-	       : fcode == ARM_BUILTIN_WRORH  ? CODE_FOR_rorv4hi3_di
-	       : fcode == ARM_BUILTIN_WRORW  ? CODE_FOR_rorv2si3_di
-	       : fcode == ARM_BUILTIN_WRORD  ? CODE_FOR_rordi3_di
-	       : CODE_FOR_nothing);
-      arg1 = CALL_EXPR_ARG (exp, 1);
-      op1 = expand_normal (arg1);
-      if (GET_MODE (op1) == VOIDmode)
-	{
-	  imm = INTVAL (op1);
-	  if ((fcode == ARM_BUILTIN_WRORWI || fcode == ARM_BUILTIN_WRORW)
-	      && (imm < 0 || imm > 32))
-	    {
-	      const char *builtin = (fcode == ARM_BUILTIN_WRORWI
-				     ? "_mm_rori_pi32" : "_mm_ror_pi32");
-	      error ("the range of count should be in 0 to 32; "
-		     "please check the intrinsic %qs in code", builtin);
-	    }
-	  else if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORH)
-		   && (imm < 0 || imm > 16))
-	    {
-	      const char *builtin = (fcode == ARM_BUILTIN_WRORHI
-				     ? "_mm_rori_pi16" : "_mm_ror_pi16");
-	      error ("the range of count should be in 0 to 16; "
-		     "please check the intrinsic %qs in code", builtin);
-	    }
-	  else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
-		   && (imm < 0 || imm > 64))
-	    {
-	      const char *builtin = (fcode == ARM_BUILTIN_WRORDI
-				     ? "_mm_rori_si64" : "_mm_ror_si64");
-	      error ("the range of count should be in 0 to 64; "
-		     "please check the intrinsic %qs in code", builtin);
-	    }
-	  else if (imm < 0)
-	    {
-	      const char *builtin;
-	      switch (fcode)
-		{
-		  case ARM_BUILTIN_WSRLHI:
-		    builtin = "_mm_srli_pi16";
-		    break;
-		  case ARM_BUILTIN_WSRLWI:
-		    builtin = "_mm_srli_pi32";
-		    break;
-		  case ARM_BUILTIN_WSRLDI:
-		    builtin = "_mm_srli_si64";
-		    break;
-		  case ARM_BUILTIN_WSLLHI:
-		    builtin = "_mm_slli_pi16";
-		    break;
-		  case ARM_BUILTIN_WSLLWI:
-		    builtin = "_mm_slli_pi32";
-		    break;
-		  case ARM_BUILTIN_WSLLDI:
-		    builtin = "_mm_slli_si64";
-		    break;
-		  case ARM_BUILTIN_WSRAHI:
-		    builtin = "_mm_srai_pi16";
-		    break;
-		  case ARM_BUILTIN_WSRAWI:
-		    builtin = "_mm_srai_pi32";
-		    break;
-		  case ARM_BUILTIN_WSRADI:
-		    builtin = "_mm_srai_si64";
-		    break;
-		  case ARM_BUILTIN_WSRLH:
-		    builtin = "_mm_srl_pi16";
-		    break;
-		  case ARM_BUILTIN_WSRLW:
-		    builtin = "_mm_srl_pi32";
-		    break;
-		  case ARM_BUILTIN_WSRLD:
-		    builtin = "_mm_srl_si64";
-		    break;
-		  case ARM_BUILTIN_WSLLH:
-		    builtin = "_mm_sll_pi16";
-		    break;
-		  case ARM_BUILTIN_WSLLW:
-		    builtin = "_mm_sll_pi32";
-		    break;
-		  case ARM_BUILTIN_WSLLD:
-		    builtin = "_mm_sll_si64";
-		    break;
-		  case ARM_BUILTIN_WSRAH:
-		    builtin = "_mm_sra_pi16";
-		    break;
-		  case ARM_BUILTIN_WSRAW:
-		    builtin = "_mm_sra_si64";
-		    break;
-		  default:
-		    builtin = "_mm_sra_si64";
-		    break;
-		}
-	      error ("the count should be no less than 0; "
-		     "please check the intrinsic %qs in code", builtin);
-	    }
-	}
-      return arm_expand_binop_builtin (icode, exp, target);
-
     default:
       break;
     }
diff --git a/gcc/config/arm/arm-c.cc b/gcc/config/arm/arm-c.cc
index 15e4080..d257e62 100644
--- a/gcc/config/arm/arm-c.cc
+++ b/gcc/config/arm/arm-c.cc
@@ -373,13 +373,6 @@ arm_cpu_builtins (struct cpp_reader* pfile)
   builtin_define (arm_arch_name);
   if (arm_arch_xscale)
     builtin_define ("__XSCALE__");
-  if (arm_arch_iwmmxt)
-    {
-      builtin_define ("__IWMMXT__");
-      builtin_define ("__ARM_WMMX");
-    }
-  if (arm_arch_iwmmxt2)
-    builtin_define ("__IWMMXT2__");
   /* ARMv6KZ was originally identified as the misspelled __ARM_ARCH_6ZK__.  To
      preserve the existing behavior, the misspelled feature macro must still be
      defined.  */
diff --git a/gcc/config/arm/arm-cpus.in b/gcc/config/arm/arm-cpus.in
index 1939d55..7f5a8c6 100644
--- a/gcc/config/arm/arm-cpus.in
+++ b/gcc/config/arm/arm-cpus.in
@@ -102,12 +102,6 @@ define feature armv8
 # ARMv8 CRC32 instructions.
 define feature crc32
 
-# XScale v2 (Wireless MMX).
-define feature iwmmxt
-
-# XScale Wireless MMX2.
-define feature iwmmxt2
-
 # Architecture rel 8.1.
 define feature armv8_1
 
@@ -778,18 +772,19 @@ begin arch armv9-a
  option bf16 add bf16 FP_ARMv8 DOTPROD
 end arch armv9-a
 
+# We no-longer support the iwmmxt{,2} extensions, so treat these like xscale.
 begin arch iwmmxt
- tune for iwmmxt
+ tune for xscale
  tune flags LDSCHED STRONG XSCALE
  base 5TE
- isa ARMv5te xscale iwmmxt
+ isa ARMv5te xscale
 end arch iwmmxt
 
 begin arch iwmmxt2
- tune for iwmmxt2
+ tune for xscale
  tune flags LDSCHED STRONG XSCALE
  base 5TE
- isa ARMv5te xscale iwmmxt iwmmxt2
+ isa ARMv5te xscale
 end arch iwmmxt2
 
 # CPU entries
@@ -924,23 +919,12 @@ end cpu arm10e
 
 begin cpu xscale
  tune flags LDSCHED XSCALE
+ alias iwmmxt iwmmxt2
  architecture armv5te
  isa xscale
  costs xscale
 end cpu xscale
 
-begin cpu iwmmxt
- tune flags LDSCHED XSCALE
- architecture iwmmxt
- costs xscale
-end cpu iwmmxt
-
-begin cpu iwmmxt2
- tune flags LDSCHED XSCALE
- architecture iwmmxt2
- costs xscale
-end cpu iwmmxt2
-
 begin cpu fa606te
  tune flags LDSCHED
  architecture armv5te
diff --git a/gcc/config/arm/arm-generic.md b/gcc/config/arm/arm-generic.md
index c270056..a8af0e6 100644
--- a/gcc/config/arm/arm-generic.md
+++ b/gcc/config/arm/arm-generic.md
@@ -96,14 +96,14 @@
   (and (eq_attr "generic_sched" "yes")
        (and (eq_attr "ldsched" "yes") 
 	    (and (eq_attr "type" "load_byte,load_4")
-	         (eq_attr "tune" "xscale,iwmmxt,iwmmxt2"))))
+	         (eq_attr "tune" "xscale"))))
   "core")
 
 (define_insn_reservation "load_ldsched" 2
   (and (eq_attr "generic_sched" "yes")
        (and (eq_attr "ldsched" "yes") 
 	    (and (eq_attr "type" "load_byte,load_4")
-	         (eq_attr "tune" "!xscale,iwmmxt,iwmmxt2"))))
+	         (eq_attr "tune" "!xscale"))))
   "core")
 
 (define_insn_reservation "load_or_store" 2
diff --git a/gcc/config/arm/arm-opts.h b/gcc/config/arm/arm-opts.h
index 06a1939..5c543bf 100644
--- a/gcc/config/arm/arm-opts.h
+++ b/gcc/config/arm/arm-opts.h
@@ -46,7 +46,6 @@ enum arm_abi_type
   ARM_ABI_APCS,
   ARM_ABI_ATPCS,
   ARM_ABI_AAPCS,
-  ARM_ABI_IWMMXT,
   ARM_ABI_AAPCS_LINUX
 };
 
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 254c731..ff7e765 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -190,8 +190,6 @@ extern void arm_output_multireg_pop (rtx *, bool, rtx, bool, bool);
 extern void arm_set_return_address (rtx, rtx);
 extern int arm_eliminable_register (rtx);
 extern const char *arm_output_shift(rtx *, int);
-extern const char *arm_output_iwmmxt_shift_immediate (const char *, rtx *, bool);
-extern const char *arm_output_iwmmxt_tinsr (rtx *);
 extern unsigned int arm_sync_loop_insns (rtx , rtx *);
 extern int arm_attr_length_push_multi(rtx, rtx);
 extern int arm_attr_length_pop_multi(rtx *, bool, bool);
@@ -475,12 +473,6 @@ extern int arm_ld_sched;
 /* Nonzero if this chip is a StrongARM.  */
 extern int arm_tune_strongarm;
 
-/* Nonzero if this chip supports Intel Wireless MMX technology.  */
-extern int arm_arch_iwmmxt;
-
-/* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
-extern int arm_arch_iwmmxt2;
-
 /* Nonzero if this chip is an XScale.  */
 extern int arm_arch_xscale;
 
diff --git a/gcc/config/arm/arm-tables.opt b/gcc/config/arm/arm-tables.opt
index db7767a..544de84 100644
--- a/gcc/config/arm/arm-tables.opt
+++ b/gcc/config/arm/arm-tables.opt
@@ -67,12 +67,6 @@ EnumValue
 Enum(processor_type) String(xscale) Value( TARGET_CPU_xscale)
 
 EnumValue
-Enum(processor_type) String(iwmmxt) Value( TARGET_CPU_iwmmxt)
-
-EnumValue
-Enum(processor_type) String(iwmmxt2) Value( TARGET_CPU_iwmmxt2)
-
-EnumValue
 Enum(processor_type) String(fa606te) Value( TARGET_CPU_fa606te)
 
 EnumValue
diff --git a/gcc/config/arm/arm-tune.md b/gcc/config/arm/arm-tune.md
index a04d1ee..20b5f93 100644
--- a/gcc/config/arm/arm-tune.md
+++ b/gcc/config/arm/arm-tune.md
@@ -25,31 +25,30 @@
 	fa526,fa626,arm7tdmi,
 	arm710t,arm9,arm9tdmi,
 	arm920t,arm10tdmi,arm9e,
-	arm10e,xscale,iwmmxt,
-	iwmmxt2,fa606te,fa626te,
-	fmp626,fa726te,arm926ejs,
-	arm1026ejs,arm1136js,arm1136jfs,
-	arm1176jzs,arm1176jzfs,mpcorenovfp,
-	mpcore,arm1156t2s,arm1156t2fs,
-	cortexm1,cortexm0,cortexm0plus,
-	cortexm1smallmultiply,cortexm0smallmultiply,cortexm0plussmallmultiply,
-	genericv7a,cortexa5,cortexa7,
-	cortexa8,cortexa9,cortexa12,
-	cortexa15,cortexa17,cortexr4,
-	cortexr4f,cortexr5,cortexr7,
-	cortexr8,cortexm7,cortexm4,
-	cortexm3,marvell_pj4,cortexa15cortexa7,
-	cortexa17cortexa7,cortexa32,cortexa35,
-	cortexa53,cortexa57,cortexa72,
-	cortexa73,exynosm1,xgene1,
-	cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,
-	cortexa73cortexa53,cortexa55,cortexa75,
-	cortexa76,cortexa76ae,cortexa77,
-	cortexa78,cortexa78ae,cortexa78c,
-	cortexa710,cortexx1,cortexx1c,
-	neoversen1,cortexa75cortexa55,cortexa76cortexa55,
-	neoversev1,neoversen2,cortexm23,
-	cortexm33,cortexm35p,cortexm52,
-	cortexm55,starmc1,cortexm85,
-	cortexr52,cortexr52plus"
+	arm10e,xscale,fa606te,
+	fa626te,fmp626,fa726te,
+	arm926ejs,arm1026ejs,arm1136js,
+	arm1136jfs,arm1176jzs,arm1176jzfs,
+	mpcorenovfp,mpcore,arm1156t2s,
+	arm1156t2fs,cortexm1,cortexm0,
+	cortexm0plus,cortexm1smallmultiply,cortexm0smallmultiply,
+	cortexm0plussmallmultiply,genericv7a,cortexa5,
+	cortexa7,cortexa8,cortexa9,
+	cortexa12,cortexa15,cortexa17,
+	cortexr4,cortexr4f,cortexr5,
+	cortexr7,cortexr8,cortexm7,
+	cortexm4,cortexm3,marvell_pj4,
+	cortexa15cortexa7,cortexa17cortexa7,cortexa32,
+	cortexa35,cortexa53,cortexa57,
+	cortexa72,cortexa73,exynosm1,
+	xgene1,cortexa57cortexa53,cortexa72cortexa53,
+	cortexa73cortexa35,cortexa73cortexa53,cortexa55,
+	cortexa75,cortexa76,cortexa76ae,
+	cortexa77,cortexa78,cortexa78ae,
+	cortexa78c,cortexa710,cortexx1,
+	cortexx1c,neoversen1,cortexa75cortexa55,
+	cortexa76cortexa55,neoversev1,neoversen2,
+	cortexm23,cortexm33,cortexm35p,
+	cortexm52,cortexm55,starmc1,
+	cortexm85,cortexr52,cortexr52plus"
 	(const (symbol_ref "((enum attr_tune) arm_tune)")))
diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 670f487..8b951f3 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -289,7 +289,8 @@ static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
 						     const_tree type,
 						     int misalignment,
-						     bool is_packed);
+						     bool is_packed,
+						     bool is_gather_scatter);
 static void arm_conditional_register_usage (void);
 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
@@ -948,12 +949,6 @@ int arm_ld_sched = 0;
 /* Nonzero if this chip is a StrongARM.  */
 int arm_tune_strongarm = 0;
 
-/* Nonzero if this chip supports Intel Wireless MMX technology.  */
-int arm_arch_iwmmxt = 0;
-
-/* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
-int arm_arch_iwmmxt2 = 0;
-
 /* Nonzero if this chip is an XScale.  */
 int arm_arch_xscale = 0;
 
@@ -2970,11 +2965,6 @@ arm_option_check_internal (struct gcc_options *opts)
 {
   int flags = opts->x_target_flags;
 
-  /* iWMMXt and NEON are incompatible.  */
-  if (TARGET_IWMMXT
-      && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
-    error ("iWMMXt and NEON are incompatible");
-
   /* Make sure that the processor choice does not conflict with any of the
      other command line choices.  */
   if (TARGET_ARM_P (flags)
@@ -2997,10 +2987,6 @@ arm_option_check_internal (struct gcc_options *opts)
     warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
 	     "debugging");
 
-  /* iWMMXt unsupported under Thumb mode.  */
-  if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
-    error ("iWMMXt unsupported under Thumb mode");
-
   if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
     error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
 
@@ -3928,8 +3914,6 @@ arm_option_reconfigure_globals (void)
   arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
   arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
   arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
-  arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
-  arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
   arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
   arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
   arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
@@ -3997,12 +3981,6 @@ arm_options_perform_arch_sanity_checks (void)
   if (arm_arch5t)
     target_flags &= ~MASK_INTERWORK;
 
-  if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
-    error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
-
-  if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
-    error ("iwmmxt abi requires an iwmmxt capable cpu");
-
   /* BPABI targets use linker tricks to allow interworking on cores
      without thumb support.  */
   if (TARGET_INTERWORK
@@ -4043,9 +4021,7 @@ arm_options_perform_arch_sanity_checks (void)
 
   if (TARGET_AAPCS_BASED)
     {
-      if (arm_abi == ARM_ABI_IWMMXT)
-	arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
-      else if (TARGET_HARD_FLOAT_ABI)
+      if (TARGET_HARD_FLOAT_ABI)
 	{
 	  arm_pcs_default = ARM_PCS_AAPCS_VFP;
 	  if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2)
@@ -4555,11 +4531,6 @@ use_return_insn (int iscond, rtx sibling)
       if (reg_needs_saving_p (regno))
 	return 0;
 
-  if (TARGET_REALLY_IWMMXT)
-    for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
-      if (reg_needs_saving_p (regno))
-	return 0;
-
   return 1;
 }
 
@@ -6048,9 +6019,6 @@ arm_libcall_value_1 (machine_mode mode)
 {
   if (TARGET_AAPCS_BASED)
     return aapcs_libcall_value (mode);
-  else if (TARGET_IWMMXT_ABI
-	   && arm_vector_mode_supported_p (mode))
-    return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
   else
     return gen_rtx_REG (mode, ARG_REGISTER (1));
 }
@@ -6083,9 +6051,7 @@ arm_function_value_regno_p (const unsigned int regno)
       || (TARGET_32BIT
 	  && TARGET_AAPCS_BASED
 	  && TARGET_HARD_FLOAT
-	  && regno == FIRST_VFP_REGNUM)
-      || (TARGET_IWMMXT_ABI
-	  && regno == FIRST_IWMMXT_REGNUM))
+	  && regno == FIRST_VFP_REGNUM))
     return true;
 
   return false;
@@ -6102,8 +6068,6 @@ arm_apply_result_size (void)
     {
       if (TARGET_HARD_FLOAT_ABI)
 	size += 32;
-      if (TARGET_IWMMXT_ABI)
-	size += 8;
     }
 
   return size;
@@ -6265,7 +6229,6 @@ const struct pcs_attribute_arg
 #if 0
     /* We could recognize these, but changes would be needed elsewhere
      * to implement them.  */
-    {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
     {"atpcs", ARM_PCS_ATPCS},
     {"apcs", ARM_PCS_APCS},
 #endif
@@ -7195,26 +7158,12 @@ arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
 
   /* On the ARM, the offset starts at 0.  */
   pcum->nregs = 0;
-  pcum->iwmmxt_nregs = 0;
   pcum->can_split = true;
 
   /* Varargs vectors are treated the same as long long.
      named_count avoids having to change the way arm handles 'named' */
   pcum->named_count = 0;
   pcum->nargs = 0;
-
-  if (TARGET_REALLY_IWMMXT && fntype)
-    {
-      tree fn_arg;
-
-      for (fn_arg = TYPE_ARG_TYPES (fntype);
-	   fn_arg;
-	   fn_arg = TREE_CHAIN (fn_arg))
-	pcum->named_count += 1;
-
-      if (! pcum->named_count)
-	pcum->named_count = INT_MAX;
-    }
 }
 
 /* Return 2 if double word alignment is required for argument passing,
@@ -7308,22 +7257,6 @@ arm_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
       return pcum->aapcs_reg;
     }
 
-  /* Varargs vectors are treated the same as long long.
-     named_count avoids having to change the way arm handles 'named' */
-  if (TARGET_IWMMXT_ABI
-      && arm_vector_mode_supported_p (arg.mode)
-      && pcum->named_count > pcum->nargs + 1)
-    {
-      if (pcum->iwmmxt_nregs <= 9)
-	return gen_rtx_REG (arg.mode,
-			    pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
-      else
-	{
-	  pcum->can_split = false;
-	  return NULL_RTX;
-	}
-    }
-
   /* Put doubleword aligned quantities in even register pairs.  */
   if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
     {
@@ -7383,9 +7316,6 @@ arm_arg_partial_bytes (cumulative_args_t pcum_v, const function_arg_info &arg)
       return pcum->aapcs_partial;
     }
 
-  if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (arg.mode))
-    return 0;
-
   if (NUM_ARG_REGS > nregs
       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (arg.mode, arg.type))
       && pcum->can_split)
@@ -7422,12 +7352,7 @@ arm_function_arg_advance (cumulative_args_t pcum_v,
   else
     {
       pcum->nargs += 1;
-      if (arm_vector_mode_supported_p (arg.mode)
-	  && pcum->named_count > pcum->nargs
-	  && TARGET_IWMMXT_ABI)
-	pcum->iwmmxt_nregs += 1;
-      else
-	pcum->nregs += ARM_NUM_REGS2 (arg.mode, arg.type);
+      pcum->nregs += ARM_NUM_REGS2 (arg.mode, arg.type);
     }
 }
 
@@ -8149,8 +8074,7 @@ require_pic_register (rtx pic_reg, bool compute_now)
 	      else
 		arm_load_pic_register (0UL, pic_reg);
 
-	      seq = get_insns ();
-	      end_sequence ();
+	      seq = end_sequence ();
 
 	      for (insn = seq; insn; insn = NEXT_INSN (insn))
 		if (INSN_P (insn))
@@ -8906,12 +8830,6 @@ arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
 	    && INTVAL (index) > -1024
 	    && (INTVAL (index) & 3) == 0);
 
-  if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
-    return (code == CONST_INT
-	    && INTVAL (index) < 1024
-	    && INTVAL (index) > -1024
-	    && (INTVAL (index) & 3) == 0);
-
   if (GET_MODE_SIZE (mode) <= 4
       && ! (arm_arch4
 	    && (mode == HImode
@@ -8991,17 +8909,6 @@ thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
 	    && INTVAL (index) > -256
 	    && (INTVAL (index) & 3) == 0);
 
-  if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
-    {
-      /* For DImode assume values will usually live in core regs
-	 and only allow LDRD addressing modes.  */
-      if (!TARGET_LDRD || mode != DImode)
-	return (code == CONST_INT
-		&& INTVAL (index) < 1024
-		&& INTVAL (index) > -1024
-		&& (INTVAL (index) & 3) == 0);
-    }
-
   /* For quad modes, we restrict the constant offset to be slightly less
      than what the instruction format permits.  We do this because for
      quad mode moves, we will actually decompose them into two separate
@@ -9372,10 +9279,7 @@ arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
 				     LCT_PURE, /* LCT_CONST?  */
 				     Pmode, reg, Pmode);
 
-  rtx_insn *insns = get_insns ();
-  end_sequence ();
-
-  return insns;
+  return end_sequence ();
 }
 
 static rtx
@@ -12463,11 +12367,6 @@ arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
 	  || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
 	return 15;
-      else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
-	       || (from != IWMMXT_REGS && to == IWMMXT_REGS))
-	return 4;
-      else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
-	return 20;
       else
 	return 2;
     }
@@ -14993,8 +14892,6 @@ arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
 
   if (!multiple_operation_profitable_p (false, count, 0))
     {
-      rtx seq;
-
       start_sequence ();
 
       for (i = 0; i < count; i++)
@@ -15003,10 +14900,7 @@ arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
       if (wback_offset != 0)
 	emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
 
-      seq = get_insns ();
-      end_sequence ();
-
-      return seq;
+      return end_sequence ();
     }
 
   result = gen_rtx_PARALLEL (VOIDmode,
@@ -15044,8 +14938,6 @@ arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
 
   if (!multiple_operation_profitable_p (false, count, 0))
     {
-      rtx seq;
-
       start_sequence ();
 
       for (i = 0; i < count; i++)
@@ -15054,10 +14946,7 @@ arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
       if (wback_offset != 0)
 	emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
 
-      seq = get_insns ();
-      end_sequence ();
-
-      return seq;
+      return end_sequence ();
     }
 
   result = gen_rtx_PARALLEL (VOIDmode,
@@ -16211,14 +16100,16 @@ arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
 	case UNGT:
 	case UNGE:
 	case UNEQ:
-	case LTGT:
 	  return CCFPmode;
 
 	case LT:
 	case LE:
 	case GT:
 	case GE:
-	  return CCFPEmode;
+	case LTGT:
+	  return (flag_finite_math_only
+		  ? CCFPmode
+		  : CCFPEmode);
 
 	default:
 	  gcc_unreachable ();
@@ -17581,8 +17472,7 @@ struct minipool_node
   rtx value;
   /* The mode of value.  */
   machine_mode mode;
-  /* The size of the value.  With iWMMXt enabled
-     sizes > 4 also imply an alignment of 8-bytes.  */
+  /* The size of the value.  */
   int fix_size;
 };
 
@@ -18942,8 +18832,7 @@ cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
 	      XVECEXP (par, 0, k++) = set;
 	      emit_use (reg);
 	    }
-	  use_seq = get_insns ();
-	  end_sequence ();
+	  use_seq = end_sequence ();
 
 	  emit_insn_after (use_seq, emit_insn (par));
 	}
@@ -18988,8 +18877,7 @@ cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
       rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
       XVECEXP (par, 0, j) = clobber;
 
-      use_seq = get_insns ();
-      end_sequence ();
+      use_seq = end_sequence ();
 
       emit_insn_after (use_seq, emit_insn (par));
     }
@@ -19095,7 +18983,8 @@ cmse_nonsecure_call_inline_register_clear (void)
 	      call = SET_SRC (call);
 
 	  /* Check if it is a cmse_nonsecure_call.  */
-	  unspec = XEXP (call, 0);
+	  unspec = XVECEXP (pat, 0, 2);
+
 	  if (GET_CODE (unspec) != UNSPEC
 	      || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
 	    continue;
@@ -19122,7 +19011,7 @@ cmse_nonsecure_call_inline_register_clear (void)
 
 	  /* Make sure the register used to hold the function address is not
 	     cleared.  */
-	  address = RTVEC_ELT (XVEC (unspec, 0), 0);
+	  address = XEXP (call, 0);
 	  gcc_assert (MEM_P (address));
 	  gcc_assert (REG_P (XEXP (address, 0)));
 	  address_regnum = REGNO (XEXP (address, 0));
@@ -19230,8 +19119,7 @@ cmse_nonsecure_call_inline_register_clear (void)
 	  cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
 				NUM_ARG_REGS, ip_reg, clearing_reg);
 
-	  seq = get_insns ();
-	  end_sequence ();
+	  seq = end_sequence ();
 	  emit_insn_before (seq, insn);
 
 	  /* The AAPCS requires the callee to widen integral types narrower
@@ -20245,9 +20133,7 @@ output_move_double (rtx *operands, bool emit, int *count)
 		}
 	      else
 		{
-		  /* Use a single insn if we can.
-		     FIXME: IWMMXT allows offsets larger than ldrd can
-		     handle, fix these up with a pair of ldr.  */
+		  /* Use a single insn if we can.  */
 		  if (can_ldrd
 		      && (TARGET_THUMB2
 		      || !CONST_INT_P (otherops[2])
@@ -20272,9 +20158,7 @@ output_move_double (rtx *operands, bool emit, int *count)
 	    }
 	  else
 	    {
-	      /* Use a single insn if we can.
-		 FIXME: IWMMXT allows offsets larger than ldrd can handle,
-		 fix these up with a pair of ldr.  */
+	      /* Use a single insn if we can.  */
 	      if (can_ldrd
 		  && (TARGET_THUMB2
 		  || !CONST_INT_P (otherops[2])
@@ -20512,8 +20396,6 @@ output_move_double (rtx *operands, bool emit, int *count)
 	  otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
 	  otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
 
-	  /* IWMMXT allows offsets larger than strd can handle,
-	     fix these up with a pair of str.  */
 	  if (!TARGET_THUMB2
 	      && CONST_INT_P (otherops[2])
 	      && (INTVAL(otherops[2]) <= -256
@@ -21450,34 +21332,6 @@ arm_compute_save_core_reg_mask (void)
   if (cfun->machine->lr_save_eliminated)
     save_reg_mask &= ~ (1 << LR_REGNUM);
 
-  if (TARGET_REALLY_IWMMXT
-      && ((bit_count (save_reg_mask)
-	   + ARM_NUM_INTS (crtl->args.pretend_args_size +
-			   arm_compute_static_chain_stack_bytes())
-	   ) % 2) != 0)
-    {
-      /* The total number of registers that are going to be pushed
-	 onto the stack is odd.  We need to ensure that the stack
-	 is 64-bit aligned before we start to save iWMMXt registers,
-	 and also before we start to create locals.  (A local variable
-	 might be a double or long long which we will load/store using
-	 an iWMMXt instruction).  Therefore we need to push another
-	 ARM register, so that the stack will be 64-bit aligned.  We
-	 try to avoid using the arg registers (r0 -r3) as they might be
-	 used to pass values in a tail call.  */
-      for (reg = 4; reg <= 12; reg++)
-	if ((save_reg_mask & (1 << reg)) == 0)
-	  break;
-
-      if (reg <= 12)
-	save_reg_mask |= (1 << reg);
-      else
-	{
-	  cfun->machine->sibcall_blocked = 1;
-	  save_reg_mask |= (1 << 3);
-	}
-    }
-
   /* We may need to push an additional register for use initializing the
      PIC base register.  */
   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
@@ -21685,19 +21539,17 @@ output_return_instruction (rtx operand, bool really_return, bool reverse,
 
       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
 	{
-	  /* There are three possible reasons for the IP register
-	     being saved.  1) a stack frame was created, in which case
-	     IP contains the old stack pointer, or 2) an ISR routine
-	     corrupted it, or 3) it was saved to align the stack on
-	     iWMMXt.  In case 1, restore IP into SP, otherwise just
-	     restore IP.  */
+	  /* There are two possible reasons for the IP register being saved.
+	     1) a stack frame was created, in which case IP contains the old
+	     stack pointer, or 2) an ISR routine corrupted it.  In case 1,
+	     restore IP into SP, otherwise just restore IP.  */
 	  if (frame_pointer_needed)
 	    {
 	      live_regs_mask &= ~ (1 << IP_REGNUM);
 	      live_regs_mask |=   (1 << SP_REGNUM);
 	    }
 	  else
-	    gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
+	    gcc_assert (IS_INTERRUPT (func_type));
 	}
 
       /* On some ARM architectures it is faster to use LDR rather than
@@ -23149,8 +23001,6 @@ arm_compute_frame_layout (void)
 
   if (TARGET_32BIT)
     {
-      unsigned int regno;
-
       offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
       core_saved = bit_count (offsets->saved_regs_mask) * 4;
       saved = core_saved;
@@ -23159,16 +23009,6 @@ arm_compute_frame_layout (void)
 	 preserve that condition at any subroutine call.  We also require the
 	 soft frame pointer to be doubleword aligned.  */
 
-      if (TARGET_REALLY_IWMMXT)
-	{
-	  /* Check for the call-saved iWMMXt registers.  */
-	  for (regno = FIRST_IWMMXT_REGNUM;
-	       regno <= LAST_IWMMXT_REGNUM;
-	       regno++)
-	    if (reg_needs_saving_p (regno))
-	      saved += 8;
-	}
-
       func_type = arm_current_func_type ();
       /* Space for saved VFP registers.  */
       if (! IS_VOLATILE (func_type)
@@ -23384,18 +23224,6 @@ arm_save_coproc_regs(void)
   int saved_size = 0;
   unsigned reg;
   unsigned start_reg;
-  rtx insn;
-
-  if (TARGET_REALLY_IWMMXT)
-  for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
-    if (reg_needs_saving_p (reg))
-      {
-	insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
-	insn = gen_rtx_MEM (V2SImode, insn);
-	insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
-	RTX_FRAME_RELATED_P (insn) = 1;
-	saved_size += 8;
-      }
 
   if (TARGET_VFP_BASE)
     {
@@ -24554,42 +24382,9 @@ arm_print_operand (FILE *stream, rtx x, int code)
       return;
 
     case 'U':
-      if (!REG_P (x)
-	  || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
-	  || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
-	/* Bad value for wCG register number.  */
-	{
-	  output_operand_lossage ("invalid operand for code '%c'", code);
-	  return;
-	}
-
-      else
-	fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
-      return;
-
-      /* Print an iWMMXt control register name.  */
     case 'w':
-      if (!CONST_INT_P (x)
-	  || INTVAL (x) < 0
-	  || INTVAL (x) >= 16)
-	/* Bad value for wC register number.  */
-	{
-	  output_operand_lossage ("invalid operand for code '%c'", code);
-	  return;
-	}
-
-      else
-	{
-	  static const char * wc_reg_names [16] =
-	    {
-	      "wCID",  "wCon",  "wCSSF", "wCASF",
-	      "wC4",   "wC5",   "wC6",   "wC7",
-	      "wCGR0", "wCGR1", "wCGR2", "wCGR3",
-	      "wC12",  "wC13",  "wC14",  "wC15"
-	    };
-
-	  fputs (wc_reg_names [INTVAL (x)], stream);
-	}
+      /* Former iWMMXT support, removed after GCC-15.  */
+      output_operand_lossage ("obsolete iWMMXT format code '%c'", code);
       return;
 
     /* Print the high single-precision register of a VFP double-precision
@@ -25924,15 +25719,6 @@ arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
       return false;
     }
 
-  if (TARGET_REALLY_IWMMXT)
-    {
-      if (IS_IWMMXT_GR_REGNUM (regno))
-	return mode == SImode;
-
-      if (IS_IWMMXT_REGNUM (regno))
-	return VALID_IWMMXT_REG_MODE (mode);
-    }
-
   /* We allow almost any value to be stored in the general registers.
      Restrict doubleword quantities to even register pairs in ARM state
      so that we can use ldrd. The same restriction applies for MVE
@@ -26038,12 +25824,6 @@ arm_regno_class (int regno)
         return VFP_HI_REGS;
     }
 
-  if (IS_IWMMXT_REGNUM (regno))
-    return IWMMXT_REGS;
-
-  if (IS_IWMMXT_GR_REGNUM (regno))
-    return IWMMXT_GR_REGS;
-
   return NO_REGS;
 }
 
@@ -27961,27 +27741,6 @@ arm_expand_epilogue_apcs_frame (bool really_return)
                                     gen_rtx_REG (SImode, IP_REGNUM));
     }
 
-  if (TARGET_IWMMXT)
-    {
-      /* The frame pointer is guaranteed to be non-double-word aligned, as
-         it is set to double-word-aligned old_stack_pointer - 4.  */
-      rtx_insn *insn;
-      int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
-
-      for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
-	if (reg_needs_saving_p (i))
-          {
-            rtx addr = gen_frame_mem (V2SImode,
-                                 plus_constant (Pmode, hard_frame_pointer_rtx,
-                                                - lrm_count * 4));
-            insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
-            REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
-                                               gen_rtx_REG (V2SImode, i),
-                                               NULL_RTX);
-            lrm_count += 2;
-          }
-    }
-
   /* saved_regs_mask should contain IP which contains old stack pointer
      at the time of activation creation.  Since SP and IP are adjacent registers,
      we can restore the value directly into SP.  */
@@ -28194,23 +27953,6 @@ arm_expand_epilogue (bool really_return)
                                     stack_pointer_rtx);
     }
 
-  if (TARGET_IWMMXT)
-    for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
-      if (reg_needs_saving_p (i))
-        {
-          rtx_insn *insn;
-          rtx addr = gen_rtx_MEM (V2SImode,
-                                  gen_rtx_POST_INC (SImode,
-                                                    stack_pointer_rtx));
-          set_mem_alias_set (addr, get_frame_alias_set ());
-          insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
-          REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
-                                             gen_rtx_REG (V2SImode, i),
-                                             NULL_RTX);
-	  arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
-				       stack_pointer_rtx, stack_pointer_rtx);
-        }
-
   if (saved_regs_mask)
     {
       rtx insn;
@@ -29851,7 +29593,7 @@ arm_vector_mode_supported_p (machine_mode mode)
       || mode == V8BFmode))
     return true;
 
-  if ((TARGET_NEON || TARGET_IWMMXT)
+  if (TARGET_NEON
       && ((mode == V2SImode)
 	  || (mode == V4HImode)
 	  || (mode == V8QImode)))
@@ -29943,19 +29685,6 @@ arm_preferred_simd_mode (scalar_mode mode)
       default:;
       }
 
-  if (TARGET_REALLY_IWMMXT)
-    switch (mode)
-      {
-      case E_SImode:
-	return V2SImode;
-      case E_HImode:
-	return V4HImode;
-      case E_QImode:
-	return V8QImode;
-
-      default:;
-      }
-
   if (TARGET_HAVE_MVE)
     switch (mode)
       {
@@ -30037,12 +29766,6 @@ arm_debugger_regno (unsigned int regno)
 	return 256 + (regno - FIRST_VFP_REGNUM) / 2;
     }
 
-  if (IS_IWMMXT_GR_REGNUM (regno))
-    return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
-
-  if (IS_IWMMXT_REGNUM (regno))
-    return 112 + regno - FIRST_IWMMXT_REGNUM;
-
   if (IS_PAC_REGNUM (regno))
     return DWARF_PAC_REGNUM;
 
@@ -30629,95 +30352,6 @@ arm_output_shift(rtx * operands, int set_flags)
   return "";
 }
 
-/* Output assembly for a WMMX immediate shift instruction.  */
-const char *
-arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
-{
-  int shift = INTVAL (operands[2]);
-  char templ[50];
-  machine_mode opmode = GET_MODE (operands[0]);
-
-  gcc_assert (shift >= 0);
-
-  /* If the shift value in the register versions is > 63 (for D qualifier),
-     31 (for W qualifier) or 15 (for H qualifier).  */
-  if (((opmode == V4HImode) && (shift > 15))
-	|| ((opmode == V2SImode) && (shift > 31))
-	|| ((opmode == DImode) && (shift > 63)))
-  {
-    if (wror_or_wsra)
-      {
-        sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
-        output_asm_insn (templ, operands);
-        if (opmode == DImode)
-          {
-	    sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
-	    output_asm_insn (templ, operands);
-          }
-      }
-    else
-      {
-        /* The destination register will contain all zeros.  */
-        sprintf (templ, "wzero\t%%0");
-        output_asm_insn (templ, operands);
-      }
-    return "";
-  }
-
-  if ((opmode == DImode) && (shift > 32))
-    {
-      sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
-      output_asm_insn (templ, operands);
-      sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
-      output_asm_insn (templ, operands);
-    }
-  else
-    {
-      sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
-      output_asm_insn (templ, operands);
-    }
-  return "";
-}
-
-/* Output assembly for a WMMX tinsr instruction.  */
-const char *
-arm_output_iwmmxt_tinsr (rtx *operands)
-{
-  int mask = INTVAL (operands[3]);
-  int i;
-  char templ[50];
-  int units = mode_nunits[GET_MODE (operands[0])];
-  gcc_assert ((mask & (mask - 1)) == 0);
-  for (i = 0; i < units; ++i)
-    {
-      if ((mask & 0x01) == 1)
-        {
-          break;
-        }
-      mask >>= 1;
-    }
-  gcc_assert (i < units);
-  {
-    switch (GET_MODE (operands[0]))
-      {
-      case E_V8QImode:
-	sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
-	break;
-      case E_V4HImode:
-	sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
-	break;
-      case E_V2SImode:
-	sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
-	break;
-      default:
-	gcc_unreachable ();
-	break;
-      }
-    output_asm_insn (templ, operands);
-  }
-  return "";
-}
-
 /* Output an arm casesi dispatch sequence.  Used by arm_casesi_internal insn.
    Responsible for the handling of switch statements in arm.  */
 const char *
@@ -31029,12 +30663,16 @@ arm_vector_alignment_reachable (const_tree type, bool is_packed)
 static bool
 arm_builtin_support_vector_misalignment (machine_mode mode,
 					 const_tree type, int misalignment,
-					 bool is_packed)
+					 bool is_packed,
+					 bool is_gather_scatter)
 {
   if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
     {
       HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
 
+      if (is_gather_scatter)
+	return true;
+
       if (is_packed)
         return align == 1;
 
@@ -31051,7 +30689,8 @@ arm_builtin_support_vector_misalignment (machine_mode mode,
     }
 
   return default_builtin_support_vector_misalignment (mode, type, misalignment,
-						      is_packed);
+						      is_packed,
+						      is_gather_scatter);
 }
 
 static void
@@ -31090,26 +30729,6 @@ arm_conditional_register_usage (void)
 	fixed_regs[VPR_REGNUM] = 0;
     }
 
-  if (TARGET_REALLY_IWMMXT && !TARGET_GENERAL_REGS_ONLY)
-    {
-      regno = FIRST_IWMMXT_GR_REGNUM;
-      /* The 2002/10/09 revision of the XScale ABI has wCG0
-         and wCG1 as call-preserved registers.  The 2002/11/21
-         revision changed this so that all wCG registers are
-         scratch registers.  */
-      for (regno = FIRST_IWMMXT_GR_REGNUM;
-	   regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
-	fixed_regs[regno] = 0;
-      /* The XScale ABI has wR0 - wR9 as scratch registers,
-	 the rest as call-preserved registers.  */
-      for (regno = FIRST_IWMMXT_REGNUM;
-	   regno <= LAST_IWMMXT_REGNUM; ++ regno)
-	{
-	  fixed_regs[regno] = 0;
-	  call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
-	}
-    }
-
   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
     {
       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
@@ -35959,8 +35578,7 @@ arm_attempt_dlstp_transform (rtx label)
 	  emit_insn (PATTERN (insn));
 	}
     }
-  seq = get_insns ();
-  end_sequence ();
+  seq = end_sequence ();
 
   /* Re-write the entire BB contents with the transformed
      sequence.  */
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 8472b75..2e9d678 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -137,13 +137,6 @@ emission of floating point pcs attributes.  */
 #define TARGET_MAYBE_HARD_FLOAT (arm_float_abi != ARM_FLOAT_ABI_SOFT)
 /* Use hardware floating point calling convention.  */
 #define TARGET_HARD_FLOAT_ABI		(arm_float_abi == ARM_FLOAT_ABI_HARD)
-#define TARGET_IWMMXT			(arm_arch_iwmmxt)
-#define TARGET_IWMMXT2			(arm_arch_iwmmxt2)
-#define TARGET_REALLY_IWMMXT		(TARGET_IWMMXT && TARGET_32BIT \
-					 && !TARGET_GENERAL_REGS_ONLY)
-#define TARGET_REALLY_IWMMXT2		(TARGET_IWMMXT2 && TARGET_32BIT \
-					 && !TARGET_GENERAL_REGS_ONLY)
-#define TARGET_IWMMXT_ABI (TARGET_32BIT && arm_abi == ARM_ABI_IWMMXT)
 #define TARGET_ARM                      (! TARGET_THUMB)
 #define TARGET_EITHER			1 /* (TARGET_ARM | TARGET_THUMB) */
 #define TARGET_BACKTRACE	        (crtl->is_leaf \
@@ -526,12 +519,6 @@ extern int arm_ld_sched;
 /* Nonzero if this chip is a StrongARM.  */
 extern int arm_tune_strongarm;
 
-/* Nonzero if this chip supports Intel XScale with Wireless MMX technology.  */
-extern int arm_arch_iwmmxt;
-
-/* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
-extern int arm_arch_iwmmxt2;
-
 /* Nonzero if this chip is an XScale.  */
 extern int arm_arch_xscale;
 
@@ -855,10 +842,6 @@ extern const int arm_arch_cde_coproc_bits[];
   1,1,1,1,1,1,1,1,		\
   1,1,1,1,1,1,1,1,		\
   1,1,1,1,1,1,1,1,		\
-  /* IWMMXT regs.  */		\
-  1,1,1,1,1,1,1,1,		\
-  1,1,1,1,1,1,1,1,		\
-  1,1,1,1,			\
   /* Specials.  */		\
   1,1,1,1,1,1,1,1		\
 }
@@ -885,10 +868,6 @@ extern const int arm_arch_cde_coproc_bits[];
   1,1,1,1,1,1,1,1,		\
   1,1,1,1,1,1,1,1,		\
   1,1,1,1,1,1,1,1,		\
-  /* IWMMXT regs.  */		\
-  1,1,1,1,1,1,1,1,		\
-  1,1,1,1,1,1,1,1,		\
-  1,1,1,1,			\
   /* Specials.  */		\
   1,1,1,1,1,1,1,1		\
 }
@@ -1010,23 +989,11 @@ extern const int arm_arch_cde_coproc_bits[];
 /* Register to use for pushing function arguments.  */
 #define STACK_POINTER_REGNUM	SP_REGNUM
 
-#define FIRST_IWMMXT_REGNUM	(LAST_HI_VFP_REGNUM + 1)
-#define LAST_IWMMXT_REGNUM	(FIRST_IWMMXT_REGNUM + 15)
-
-/* Need to sync with WCGR in iwmmxt.md.  */
-#define FIRST_IWMMXT_GR_REGNUM	(LAST_IWMMXT_REGNUM + 1)
-#define LAST_IWMMXT_GR_REGNUM	(FIRST_IWMMXT_GR_REGNUM + 3)
-
-#define IS_IWMMXT_REGNUM(REGNUM) \
-  (((REGNUM) >= FIRST_IWMMXT_REGNUM) && ((REGNUM) <= LAST_IWMMXT_REGNUM))
-#define IS_IWMMXT_GR_REGNUM(REGNUM) \
-  (((REGNUM) >= FIRST_IWMMXT_GR_REGNUM) && ((REGNUM) <= LAST_IWMMXT_GR_REGNUM))
-
 /* Base register for access to local variables of the function.  */
-#define FRAME_POINTER_REGNUM	102
+#define FRAME_POINTER_REGNUM	(CC_REGNUM + 2)
 
 /* Base register for access to arguments of the function.  */
-#define ARG_POINTER_REGNUM	103
+#define ARG_POINTER_REGNUM	(FRAME_POINTER_REGNUM + 1)
 
 #define FIRST_VFP_REGNUM	16
 #define D7_VFP_REGNUM		(FIRST_VFP_REGNUM + 15)
@@ -1067,9 +1034,8 @@ extern const int arm_arch_cde_coproc_bits[];
 
 /* The number of hard registers is 16 ARM + 1 CC + 1 SFP + 1 AFP
    + 1 APSRQ + 1 APSRGE + 1 VPR + 1 Pseudo register to save PAC.  */
-/* Intel Wireless MMX Technology registers add 16 + 4 more.  */
 /* VFP (VFP3) adds 32 (64) + 1 VFPCC.  */
-#define FIRST_PSEUDO_REGISTER   108
+#define FIRST_PSEUDO_REGISTER   88
 
 #define DWARF_PAC_REGNUM 143
 
@@ -1086,9 +1052,6 @@ extern const int arm_arch_cde_coproc_bits[];
 #define SUBTARGET_FRAME_POINTER_REQUIRED 0
 #endif
 
-#define VALID_IWMMXT_REG_MODE(MODE) \
- (arm_vector_mode_supported_p (MODE) || (MODE) == DImode)
-
 /* Modes valid for Neon D registers.  */
 #define VALID_NEON_DREG_MODE(MODE) \
   ((MODE) == V2SImode || (MODE) == V4HImode || (MODE) == V8QImode \
@@ -1168,9 +1131,9 @@ extern const int arm_arch_cde_coproc_bits[];
 /* The conditions under which vector modes are supported for general
    arithmetic by any vector extension.  */
 
-#define ARM_HAVE_V8QI_ARITH (ARM_HAVE_NEON_V8QI_ARITH || TARGET_REALLY_IWMMXT)
-#define ARM_HAVE_V4HI_ARITH (ARM_HAVE_NEON_V4HI_ARITH || TARGET_REALLY_IWMMXT)
-#define ARM_HAVE_V2SI_ARITH (ARM_HAVE_NEON_V2SI_ARITH || TARGET_REALLY_IWMMXT)
+#define ARM_HAVE_V8QI_ARITH (ARM_HAVE_NEON_V8QI_ARITH)
+#define ARM_HAVE_V4HI_ARITH (ARM_HAVE_NEON_V4HI_ARITH)
+#define ARM_HAVE_V2SI_ARITH (ARM_HAVE_NEON_V2SI_ARITH)
 
 #define ARM_HAVE_V16QI_ARITH (ARM_HAVE_NEON_V16QI_ARITH || TARGET_HAVE_MVE)
 #define ARM_HAVE_V8HI_ARITH (ARM_HAVE_NEON_V8HI_ARITH || TARGET_HAVE_MVE)
@@ -1204,9 +1167,9 @@ extern const int arm_arch_cde_coproc_bits[];
 /* The conditions under which vector modes are supported by load/store
    instructions by any vector extension.  */
 
-#define ARM_HAVE_V8QI_LDST (ARM_HAVE_NEON_V8QI_LDST || TARGET_REALLY_IWMMXT)
-#define ARM_HAVE_V4HI_LDST (ARM_HAVE_NEON_V4HI_LDST || TARGET_REALLY_IWMMXT)
-#define ARM_HAVE_V2SI_LDST (ARM_HAVE_NEON_V2SI_LDST || TARGET_REALLY_IWMMXT)
+#define ARM_HAVE_V8QI_LDST (ARM_HAVE_NEON_V8QI_LDST)
+#define ARM_HAVE_V4HI_LDST (ARM_HAVE_NEON_V4HI_LDST)
+#define ARM_HAVE_V2SI_LDST (ARM_HAVE_NEON_V2SI_LDST)
 
 #define ARM_HAVE_V16QI_LDST (ARM_HAVE_NEON_V16QI_LDST || TARGET_HAVE_MVE)
 #define ARM_HAVE_V8HI_LDST (ARM_HAVE_NEON_V8HI_LDST || TARGET_HAVE_MVE)
@@ -1238,8 +1201,6 @@ extern int arm_regs_in_sequence[];
    function.  */
 
 #define VREG(X)  (FIRST_VFP_REGNUM + (X))
-#define WREG(X)  (FIRST_IWMMXT_REGNUM + (X))
-#define WGREG(X) (FIRST_IWMMXT_GR_REGNUM + (X))
 
 #define REG_ALLOC_ORDER				\
 {						\
@@ -1265,12 +1226,6 @@ extern int arm_regs_in_sequence[];
   VREG(20), VREG(21), VREG(22), VREG(23),	\
   VREG(24), VREG(25), VREG(26), VREG(27),	\
   VREG(28), VREG(29), VREG(30), VREG(31),	\
-  /* IWMMX registers.  */			\
-  WREG(0),  WREG(1),  WREG(2),  WREG(3),	\
-  WREG(4),  WREG(5),  WREG(6),  WREG(7),	\
-  WREG(8),  WREG(9),  WREG(10), WREG(11),	\
-  WREG(12), WREG(13), WREG(14), WREG(15),	\
-  WGREG(0), WGREG(1), WGREG(2), WGREG(3),	\
   /* Registers not for general use.  */		\
   CC_REGNUM, VFPCC_REGNUM,			\
   FRAME_POINTER_REGNUM, ARG_POINTER_REGNUM,	\
@@ -1315,8 +1270,6 @@ enum reg_class
   VFP_LO_REGS,
   VFP_HI_REGS,
   VFP_REGS,
-  IWMMXT_REGS,
-  IWMMXT_GR_REGS,
   CC_REG,
   VFPCC_REG,
   SFP_REG,
@@ -1346,8 +1299,6 @@ enum reg_class
   "VFP_LO_REGS",	\
   "VFP_HI_REGS",	\
   "VFP_REGS",		\
-  "IWMMXT_REGS",	\
-  "IWMMXT_GR_REGS",	\
   "CC_REG",		\
   "VFPCC_REG",		\
   "SFP_REG",		\
@@ -1363,29 +1314,27 @@ enum reg_class
    of length N_REG_CLASSES.  */
 #define REG_CLASS_CONTENTS						\
 {									\
-  { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS  */	\
-  { 0x000000FF, 0x00000000, 0x00000000, 0x00000000 }, /* LO_REGS */	\
-  { 0x00002000, 0x00000000, 0x00000000, 0x00000000 }, /* STACK_REG */	\
-  { 0x000020FF, 0x00000000, 0x00000000, 0x00000000 }, /* BASE_REGS */	\
-  { 0x00005F00, 0x00000000, 0x00000000, 0x00000000 }, /* HI_REGS */	\
-  { 0x0000100F, 0x00000000, 0x00000000, 0x00000000 }, /* CALLER_SAVE_REGS */ \
-  { 0x00005555, 0x00000000, 0x00000000, 0x00000000 }, /* EVEN_REGS.  */ \
-  { 0x00005FFF, 0x00000000, 0x00000000, 0x00000000 }, /* GENERAL_REGS */ \
-  { 0x00007FFF, 0x00000000, 0x00000000, 0x00000000 }, /* CORE_REGS */	\
-  { 0xFFFF0000, 0x00000000, 0x00000000, 0x00000000 }, /* VFP_D0_D7_REGS  */ \
-  { 0xFFFF0000, 0x0000FFFF, 0x00000000, 0x00000000 }, /* VFP_LO_REGS  */ \
-  { 0x00000000, 0xFFFF0000, 0x0000FFFF, 0x00000000 }, /* VFP_HI_REGS  */ \
-  { 0xFFFF0000, 0xFFFFFFFF, 0x0000FFFF, 0x00000000 }, /* VFP_REGS  */	\
-  { 0x00000000, 0x00000000, 0xFFFF0000, 0x00000000 }, /* IWMMXT_REGS */	\
-  { 0x00000000, 0x00000000, 0x00000000, 0x0000000F }, /* IWMMXT_GR_REGS */ \
-  { 0x00000000, 0x00000000, 0x00000000, 0x00000010 }, /* CC_REG */	\
-  { 0x00000000, 0x00000000, 0x00000000, 0x00000020 }, /* VFPCC_REG */	\
-  { 0x00000000, 0x00000000, 0x00000000, 0x00000040 }, /* SFP_REG */	\
-  { 0x00000000, 0x00000000, 0x00000000, 0x00000080 }, /* AFP_REG */	\
-  { 0x00000000, 0x00000000, 0x00000000, 0x00000400 }, /* VPR_REG.  */	\
-  { 0x00000000, 0x00000000, 0x00000000, 0x00000800 }, /* PAC_REG.  */	\
-  { 0x00005FFF, 0x00000000, 0x00000000, 0x00000400 }, /* GENERAL_AND_VPR_REGS.  */ \
-  { 0xFFFF7FFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0000040F }  /* ALL_REGS.  */	\
+  { 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS  */		\
+  { 0x000000FF, 0x00000000, 0x00000000 }, /* LO_REGS */			\
+  { 0x00002000, 0x00000000, 0x00000000 }, /* STACK_REG */		\
+  { 0x000020FF, 0x00000000, 0x00000000 }, /* BASE_REGS */		\
+  { 0x00005F00, 0x00000000, 0x00000000 }, /* HI_REGS */			\
+  { 0x0000100F, 0x00000000, 0x00000000 }, /* CALLER_SAVE_REGS */	\
+  { 0x00005555, 0x00000000, 0x00000000 }, /* EVEN_REGS.  */		\
+  { 0x00005FFF, 0x00000000, 0x00000000 }, /* GENERAL_REGS */		\
+  { 0x00007FFF, 0x00000000, 0x00000000 }, /* CORE_REGS */		\
+  { 0xFFFF0000, 0x00000000, 0x00000000 }, /* VFP_D0_D7_REGS  */		\
+  { 0xFFFF0000, 0x0000FFFF, 0x00000000 }, /* VFP_LO_REGS  */		\
+  { 0x00000000, 0xFFFF0000, 0x0000FFFF }, /* VFP_HI_REGS  */		\
+  { 0xFFFF0000, 0xFFFFFFFF, 0x0000FFFF }, /* VFP_REGS  */		\
+  { 0x00000000, 0x00000000, 0x00010000 }, /* CC_REG */			\
+  { 0x00000000, 0x00000000, 0x00020000 }, /* VFPCC_REG */		\
+  { 0x00000000, 0x00000000, 0x00040000 }, /* SFP_REG */			\
+  { 0x00000000, 0x00000000, 0x00080000 }, /* AFP_REG */			\
+  { 0x00000000, 0x00000000, 0x00400000 }, /* VPR_REG.  */		\
+  { 0x00000000, 0x00000000, 0x00800000 }, /* PAC_REG.  */		\
+  { 0x00005FFF, 0x00000000, 0x00400000 }, /* GENERAL_AND_VPR_REGS.  */	\
+  { 0xFFFF7FFF, 0xFFFFFFFF, 0x0040FFFF }  /* ALL_REGS.  */		\
 }
 
 #define FP_SYSREGS \
@@ -1460,39 +1409,34 @@ extern const char *fp_sysreg_names[NB_FP_SYSREGS];
 /* Return the register class of a scratch register needed to copy IN into
    or out of a register in CLASS in MODE.  If it can be done directly,
    NO_REGS is returned.  */
-#define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, X)		\
-  /* Restrict which direct reloads are allowed for VFP/iWMMXt regs.  */ \
-  ((TARGET_HARD_FLOAT && IS_VFP_CLASS (CLASS))			\
-   ? coproc_secondary_reload_class (MODE, X, FALSE)		\
-   : (TARGET_IWMMXT && (CLASS) == IWMMXT_REGS)			\
-   ? coproc_secondary_reload_class (MODE, X, TRUE)		\
-   : TARGET_32BIT						\
-   ? (((MODE) == HImode && ! arm_arch4 && true_regnum (X) == -1) \
-    ? GENERAL_REGS : NO_REGS)					\
-   : THUMB_SECONDARY_OUTPUT_RELOAD_CLASS (CLASS, MODE, X))
+#define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, X)			\
+  /* Restrict which direct reloads are allowed for VFP regs.  */	\
+  ((TARGET_HARD_FLOAT && IS_VFP_CLASS (CLASS))				\
+   ? coproc_secondary_reload_class (MODE, X, FALSE)			\
+   : (TARGET_32BIT							\
+      ? (((MODE) == HImode && ! arm_arch4 && true_regnum (X) == -1)	\
+	 ? GENERAL_REGS							\
+	 : NO_REGS)							\
+      : THUMB_SECONDARY_OUTPUT_RELOAD_CLASS (CLASS, MODE, X)))
 
 /* If we need to load shorts byte-at-a-time, then we need a scratch.  */
-#define SECONDARY_INPUT_RELOAD_CLASS(CLASS, MODE, X)		\
-  /* Restrict which direct reloads are allowed for VFP/iWMMXt regs.  */ \
-  ((TARGET_HARD_FLOAT && IS_VFP_CLASS (CLASS))			\
-    ? coproc_secondary_reload_class (MODE, X, FALSE) :		\
-    (TARGET_IWMMXT && (CLASS) == IWMMXT_REGS) ?			\
-    coproc_secondary_reload_class (MODE, X, TRUE) :		\
-   (TARGET_32BIT ?						\
-    (((CLASS) == IWMMXT_REGS || (CLASS) == IWMMXT_GR_REGS)	\
-     && CONSTANT_P (X))						\
-    ? GENERAL_REGS :						\
-    (((MODE) == HImode && ! arm_arch4				\
-      && (MEM_P (X)					\
-	  || ((REG_P (X) || GET_CODE (X) == SUBREG)	\
-	      && true_regnum (X) == -1)))			\
-     ? GENERAL_REGS : NO_REGS)					\
-    : THUMB_SECONDARY_INPUT_RELOAD_CLASS (CLASS, MODE, X)))
+#define SECONDARY_INPUT_RELOAD_CLASS(CLASS, MODE, X)			\
+  /* Restrict which direct reloads are allowed for VFP regs.  */	\
+  ((TARGET_HARD_FLOAT && IS_VFP_CLASS (CLASS))				\
+   ? coproc_secondary_reload_class (MODE, X, FALSE)			\
+   : (TARGET_32BIT							\
+      ? (((MODE) == HImode						\
+	  && ! arm_arch4						\
+	  && (MEM_P (X)							\
+	      || ((REG_P (X) || GET_CODE (X) == SUBREG)			\
+		  && true_regnum (X) == -1)))				\
+	 ? GENERAL_REGS							\
+	 : NO_REGS)							\
+      : THUMB_SECONDARY_INPUT_RELOAD_CLASS (CLASS, MODE, X)))
 
 /* Return the maximum number of consecutive registers
    needed to represent mode MODE in a register of class CLASS.
-   ARM regs are UNITS_PER_WORD bits.
-   FIXME: Is this true for iWMMX?  */
+   ARM regs are UNITS_PER_WORD bits.  */
 #define CLASS_MAX_NREGS(CLASS, MODE)  \
   (CLASS == VPR_REG)		      \
   ? CEIL (GET_MODE_SIZE (MODE), 2)    \
@@ -1672,7 +1616,6 @@ enum arm_pcs
 {
   ARM_PCS_AAPCS,	/* Base standard AAPCS.  */
   ARM_PCS_AAPCS_VFP,	/* Use VFP registers for floating point values.  */
-  ARM_PCS_AAPCS_IWMMXT, /* Use iWMMXT registers for vectors.  */
   /* This must be the last AAPCS variant.  */
   ARM_PCS_AAPCS_LOCAL,	/* Private call within this compilation unit.  */
   ARM_PCS_ATPCS,	/* ATPCS.  */
@@ -1690,8 +1633,6 @@ typedef struct
 {
   /* This is the number of registers of arguments scanned so far.  */
   int nregs;
-  /* This is the number of iWMMXt register arguments scanned so far.  */
-  int iwmmxt_nregs;
   int named_count;
   int nargs;
   /* Which procedure call variant to use for this call.  */
@@ -1739,9 +1680,7 @@ typedef struct
 #define FUNCTION_ARG_REGNO_P(REGNO)					\
    (IN_RANGE ((REGNO), 0, 3)						\
     || (TARGET_AAPCS_BASED && TARGET_HARD_FLOAT				\
-	&& IN_RANGE ((REGNO), FIRST_VFP_REGNUM, FIRST_VFP_REGNUM + 15))	\
-    || (TARGET_IWMMXT_ABI						\
-	&& IN_RANGE ((REGNO), FIRST_IWMMXT_REGNUM, FIRST_IWMMXT_REGNUM + 9)))
+	&& IN_RANGE ((REGNO), FIRST_VFP_REGNUM, FIRST_VFP_REGNUM + 15)))
 
 
 /* If your target environment doesn't prefix user functions with an
@@ -2257,7 +2196,11 @@ extern int making_const_table;
 
 #define SELECT_CC_MODE(OP, X, Y)  arm_select_cc_mode (OP, X, Y)
 
-#define REVERSIBLE_CC_MODE(MODE) 1
+/* Floating-point modes cannot be reversed unless we don't care about
+   NaNs.  */
+#define REVERSIBLE_CC_MODE(MODE)			\
+  (flag_finite_math_only				\
+   || !((MODE) == CCFPmode || (MODE) == CCFPEmode))
 
 #define REVERSE_CONDITION(CODE,MODE) \
   (((MODE) == CCFPmode || (MODE) == CCFPEmode) \
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 597ef67..422ae54 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -37,12 +37,12 @@
    (LR_REGNUM        14)	; Return address register
    (PC_REGNUM	     15)	; Program counter
    (LAST_ARM_REGNUM  15)	;
-   (CC_REGNUM       100)	; Condition code pseudo register
-   (VFPCC_REGNUM    101)	; VFP Condition code pseudo register
-   (APSRQ_REGNUM    104)	; Q bit pseudo register
-   (APSRGE_REGNUM   105)	; GE bits pseudo register
-   (VPR_REGNUM      106)	; Vector Predication Register - MVE register.
-   (RA_AUTH_CODE    107)	; Pseudo register to save PAC.
+   (CC_REGNUM        80)	; Condition code pseudo register
+   (VFPCC_REGNUM     81)	; VFP Condition code pseudo register
+   (APSRQ_REGNUM     84)	; Q bit pseudo register
+   (APSRGE_REGNUM    85)	; GE bits pseudo register
+   (VPR_REGNUM       86)	; Vector Predication Register - MVE register.
+   (RA_AUTH_CODE     87)	; Pseudo register to save PAC.
   ]
 )
 ;; 3rd operand to select_dominance_cc_mode
@@ -149,7 +149,7 @@
 ; This attribute is used to compute attribute "enabled",
 ; use type "any" to enable an alternative in all cases.
 (define_attr "arch" "any, a, t, 32, t1, t2, v6,nov6, v6t2, \
-		     v8mb, fix_vlldm, iwmmxt, iwmmxt2, armv6_or_vfpv3, \
+		     v8mb, fix_vlldm, armv6_or_vfpv3, \
 		     neon, mve"
   (const_string "any"))
 
@@ -197,10 +197,6 @@
 	      (match_test "fix_vlldm"))
 	 (const_string "yes")
 
-	 (and (eq_attr "arch" "iwmmxt2")
-	      (match_test "TARGET_REALLY_IWMMXT2"))
-	 (const_string "yes")
-
 	 (and (eq_attr "arch" "armv6_or_vfpv3")
 	      (match_test "arm_arch6 || TARGET_VFP3"))
 	 (const_string "yes")
@@ -362,18 +358,7 @@
     alus_ext, alus_imm, alus_sreg,\
     alus_shift_imm, alus_shift_reg, bfm, csel, rev, logic_imm, logic_reg,\
     logic_shift_imm, logic_shift_reg, logics_imm, logics_reg,\
-    logics_shift_imm, logics_shift_reg, extend, shift_imm, float, fcsel,\
-    wmmx_wor, wmmx_wxor, wmmx_wand, wmmx_wandn, wmmx_wmov, wmmx_tmcrr,\
-    wmmx_tmrrc, wmmx_wldr, wmmx_wstr, wmmx_tmcr, wmmx_tmrc, wmmx_wadd,\
-    wmmx_wsub, wmmx_wmul, wmmx_wmac, wmmx_wavg2, wmmx_tinsr, wmmx_textrm,\
-    wmmx_wshufh, wmmx_wcmpeq, wmmx_wcmpgt, wmmx_wmax, wmmx_wmin, wmmx_wpack,\
-    wmmx_wunpckih, wmmx_wunpckil, wmmx_wunpckeh, wmmx_wunpckel, wmmx_wror,\
-    wmmx_wsra, wmmx_wsrl, wmmx_wsll, wmmx_wmadd, wmmx_tmia, wmmx_tmiaph,\
-    wmmx_tmiaxy, wmmx_tbcst, wmmx_tmovmsk, wmmx_wacc, wmmx_waligni,\
-    wmmx_walignr, wmmx_tandc, wmmx_textrc, wmmx_torc, wmmx_torvsc, wmmx_wsad,\
-    wmmx_wabs, wmmx_wabsdiff, wmmx_waddsubhx, wmmx_wsubaddhx, wmmx_wavg4,\
-    wmmx_wmulw, wmmx_wqmulm, wmmx_wqmulwm, wmmx_waddbhus, wmmx_wqmiaxy,\
-    wmmx_wmiaxy, wmmx_wmiawxy, wmmx_wmerge")
+    logics_shift_imm, logics_shift_reg, extend, shift_imm, float, fcsel")
 		(const_string "single")
 	        (const_string "multi")))
 
@@ -435,7 +420,6 @@
 	  (const_string "yes")
 	  (const_string "no"))))
 
-(include "marvell-f-iwmmxt.md")
 (include "arm-generic.md")
 (include "arm926ejs.md")
 (include "arm1020e.md")
@@ -2893,14 +2877,12 @@
 ;; Split DImode and, ior, xor operations.  Simply perform the logical
 ;; operation on the upper and lower halves of the registers.
 ;; This is needed for atomic operations in arm_split_atomic_op.
-;; Avoid splitting IWMMXT instructions.
 (define_split
   [(set (match_operand:DI 0 "s_register_operand" "")
 	(match_operator:DI 6 "logical_binary_operator"
 	  [(match_operand:DI 1 "s_register_operand" "")
 	   (match_operand:DI 2 "s_register_operand" "")]))]
-  "TARGET_32BIT && reload_completed
-   && ! IS_IWMMXT_REGNUM (REGNO (operands[0]))"
+  "TARGET_32BIT && reload_completed"
   [(set (match_dup 0) (match_op_dup:SI 6 [(match_dup 1) (match_dup 2)]))
    (set (match_dup 3) (match_op_dup:SI 6 [(match_dup 4) (match_dup 5)]))]
   "
@@ -6345,7 +6327,6 @@
   "TARGET_32BIT
    && !(TARGET_HARD_FLOAT)
    && !(TARGET_HAVE_MVE || TARGET_HAVE_MVE_FLOAT)
-   && !TARGET_IWMMXT
    && (   register_operand (operands[0], DImode)
        || register_operand (operands[1], DImode))"
   "*
@@ -6554,7 +6535,7 @@
 (define_insn "*arm_movsi_insn"
   [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,rk,m")
 	(match_operand:SI 1 "general_operand"      "rk, I,K,j,mi,rk"))]
-  "TARGET_ARM && !TARGET_IWMMXT && !TARGET_HARD_FLOAT
+  "TARGET_ARM && !TARGET_HARD_FLOAT
    && (   register_operand (operands[0], SImode)
        || register_operand (operands[1], SImode))"
   "@
@@ -8642,7 +8623,7 @@
     if (detect_cmse_nonsecure_call (addr))
       {
 	pat = gen_nonsecure_call_internal (operands[0], operands[1],
-					   operands[2]);
+					   operands[2], const0_rtx);
 	emit_call_insn (pat);
       }
     else
@@ -8684,10 +8665,10 @@
 	      (clobber (reg:SI LR_REGNUM))])])
 
 (define_expand "nonsecure_call_internal"
-  [(parallel [(call (unspec:SI [(match_operand 0 "memory_operand")]
-			       UNSPEC_NONSECURE_MEM)
+  [(parallel [(call (match_operand 0 "memory_operand")
 		    (match_operand 1 "general_operand"))
 	      (use (match_operand 2 "" ""))
+	      (unspec:SI [(match_operand 3)] UNSPEC_NONSECURE_MEM)
 	      (clobber (reg:SI LR_REGNUM))])]
   "use_cmse"
   {
@@ -8764,7 +8745,8 @@
     if (detect_cmse_nonsecure_call (addr))
       {
 	pat = gen_nonsecure_call_value_internal (operands[0], operands[1],
-						 operands[2], operands[3]);
+						 operands[2], operands[3],
+						 const0_rtx);
 	emit_call_insn (pat);
       }
     else
@@ -8798,10 +8780,10 @@
 
 (define_expand "nonsecure_call_value_internal"
   [(parallel [(set (match_operand       0 "" "")
-		   (call (unspec:SI [(match_operand 1 "memory_operand")]
-				    UNSPEC_NONSECURE_MEM)
+		   (call (match_operand 1 "memory_operand")
 			 (match_operand 2 "general_operand")))
 	      (use (match_operand 3 "" ""))
+	      (unspec:SI [(match_operand 4)] UNSPEC_NONSECURE_MEM)
 	      (clobber (reg:SI LR_REGNUM))])]
   "use_cmse"
   "
@@ -13044,7 +13026,7 @@
   "arm_coproc_builtin_available (VUNSPEC_<MCRR>)"
 {
   arm_const_bounds (operands[0], 0, 16);
-  arm_const_bounds (operands[1], 0, 8);
+  arm_const_bounds (operands[1], 0, 16);
   arm_const_bounds (operands[3], 0, (1 << 5));
   return "<mcrr>\\tp%c0, %1, %Q2, %R2, CR%c3";
 }
@@ -13059,7 +13041,7 @@
   "arm_coproc_builtin_available (VUNSPEC_<MRRC>)"
 {
   arm_const_bounds (operands[1], 0, 16);
-  arm_const_bounds (operands[2], 0, 8);
+  arm_const_bounds (operands[2], 0, 16);
   arm_const_bounds (operands[3], 0, (1 << 5));
   return "<mrrc>\\tp%c1, %2, %Q0, %R0, CR%c3";
 }
@@ -13123,10 +13105,8 @@
   [(set_attr "conds" "unconditional")
    (set_attr "type" "nop")])
 
-;; Vector bits common to IWMMXT, Neon and MVE
+;; Vector bits common to Neon and MVE
 (include "vec-common.md")
-;; Load the Intel Wireless Multimedia Extension patterns
-(include "iwmmxt.md")
 ;; Load the VFP co-processor patterns
 (include "vfp.md")
 ;; Thumb-1 patterns
diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt
index 042cb54..d5eeeae 100644
--- a/gcc/config/arm/arm.opt
+++ b/gcc/config/arm/arm.opt
@@ -58,9 +58,6 @@ EnumValue
 Enum(arm_abi_type) String(aapcs) Value(ARM_ABI_AAPCS)
 
 EnumValue
-Enum(arm_abi_type) String(iwmmxt) Value(ARM_ABI_IWMMXT)
-
-EnumValue
 Enum(arm_abi_type) String(aapcs-linux) Value(ARM_ABI_AAPCS_LINUX)
 
 mabort-on-noreturn
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index cba50de..105385f 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -20938,11 +20938,6 @@ vbfdotq_lane_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x4_t __b,
   return __builtin_neon_vbfdot_lanev4bfv4sf (__r, __a, __b, __index);
 }
 
-#pragma GCC pop_options
-
-#pragma GCC push_options
-#pragma GCC target ("arch=armv8.2-a+bf16")
-
 typedef struct bfloat16x4x2_t
 {
   bfloat16x4_t val[2];
diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md
index 9f1a37a..24743a8 100644
--- a/gcc/config/arm/constraints.md
+++ b/gcc/config/arm/constraints.md
@@ -19,11 +19,12 @@
 ;; <http://www.gnu.org/licenses/>.
 
 ;; The following register constraints have been used:
-;; - in ARM/Thumb-2 state: t, w, x, y, z
+;; - in ARM/Thumb-2 state: t, w, x
 ;; - in Thumb state: h, b
 ;; - in both states: l, c, k, q, Cs, Ts, US
 ;; In ARM state, 'l' is an alias for 'r'
 ;; 'f' and 'v' were previously used for FPA and MAVERICK registers.
+;; 'y' and 'z' were previously used for iWMMX registers (removed after gcc-15)
 
 ;; The following normal constraints have been used:
 ;; in ARM/Thumb-2 state: G, I, j, J, K, L, M
@@ -39,7 +40,7 @@
 ;; in all states: Pg
 
 ;; The following memory constraints have been used:
-;; in ARM/Thumb-2 state: Uh, Ut, Uv, Uy, Un, Um, Us, Uo, Up, Uf, Ux, Ul, Uz
+;; in ARM/Thumb-2 state: Uh, Ut, Uv, Un, Um, Us, Uo, Up, Uf, Ux, Ul, Uz
 ;; in ARM state: Uq
 ;; in Thumb state: Uu, Uw
 ;; in all states: Q
@@ -112,13 +113,6 @@
 (define_register_constraint "x" "TARGET_32BIT ? VFP_D0_D7_REGS : NO_REGS"
  "The VFP registers @code{d0}-@code{d7}.")
 
-(define_register_constraint "y" "TARGET_REALLY_IWMMXT ? IWMMXT_REGS : NO_REGS"
- "The Intel iWMMX co-processor registers.")
-
-(define_register_constraint "z"
- "TARGET_REALLY_IWMMXT ? IWMMXT_GR_REGS : NO_REGS"
- "The Intel iWMMX GR registers.")
-
 (define_register_constraint "l" "TARGET_THUMB ? LO_REGS : GENERAL_REGS"
  "In Thumb state the core registers @code{r0}-@code{r7}.")
 
@@ -478,12 +472,6 @@
                    ? arm_coproc_mem_operand_no_writeback (op)
                    : neon_vector_mem_operand (op, 2, true)")))
 
-(define_memory_constraint "Uy"
- "@internal
-  In ARM/Thumb-2 state a valid iWMMX load/store address."
- (and (match_code "mem")
-      (match_test "TARGET_32BIT && arm_coproc_mem_operand (op, TRUE)")))
-
 (define_memory_constraint "Un"
  "@internal
   In ARM/Thumb-2 state a valid address for Neon doubleword vector
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 743fe48..0c163ed 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -59,30 +59,25 @@
 ;; A list of modes which the VFP unit can handle
 (define_mode_iterator SDF [(SF "") (DF "TARGET_VFP_DOUBLE")])
 
-;; Integer element sizes implemented by IWMMXT.
-(define_mode_iterator VMMX [V2SI V4HI V8QI])
-
-(define_mode_iterator VMMX2 [V4HI V2SI])
-
 ;; Integer element sizes for shifts.
 (define_mode_iterator VSHFT [V4HI V2SI DI])
 
-;; Integer and float modes supported by Neon and IWMMXT.
+;; Integer and float modes supported by Neon.
 (define_mode_iterator VALL [V2DI V2SI V4HI V8QI V2SF V4SI V8HI V16QI V4SF])
 
-;; Integer and float modes supported by Neon, IWMMXT and MVE.
+;; Integer and float modes supported by Neon and MVE.
 (define_mode_iterator VNIM1 [V16QI V8HI V4SI V4SF V2DI])
 
-;; Integer and float modes supported by Neon and IWMMXT but not MVE.
+;; Integer and float modes supported by Neon but not MVE.
 (define_mode_iterator VNINOTM1 [V2SI V4HI V8QI V2SF])
 
-;; Integer and float modes supported by Neon and IWMMXT, except V2DI.
+;; Integer and float modes supported by Neon, except V2DI.
 (define_mode_iterator VALLW [V2SI V4HI V8QI V2SF V4SI V8HI V16QI V4SF])
 
-;; Integer modes supported by Neon and IWMMXT
+;; Integer modes supported by Neon
 (define_mode_iterator VINT [V2DI V2SI V4HI V8QI V4SI V8HI V16QI])
 
-;; Integer modes supported by Neon and IWMMXT, except V2DI
+;; Integer modes supported by Neon, except V2DI
 (define_mode_iterator VINTW [V2SI V4HI V8QI V4SI V8HI V16QI])
 
 ;; Double-width vector modes, on which we support arithmetic (no HF!)
@@ -1644,9 +1639,6 @@
 ;; distinguishes between 16-bit Thumb and 32-bit Thumb/ARM.
 (define_mode_attr arch [(CC_Z "32") (SI "t1")])
 
-;; Determine element size suffix from vector mode.
-(define_mode_attr MMX_char [(V8QI "b") (V4HI "h") (V2SI "w") (DI "d")])
-
 ;; vtbl<n> suffix for NEON vector modes.
 (define_mode_attr VTAB_n [(TI "2") (EI "3") (OI "4")])
 
diff --git a/gcc/config/arm/iwmmxt.md b/gcc/config/arm/iwmmxt.md
deleted file mode 100644
index 0aa5dcd..0000000
--- a/gcc/config/arm/iwmmxt.md
+++ /dev/null
@@ -1,1766 +0,0 @@
-;; Patterns for the Intel Wireless MMX technology architecture.
-;; Copyright (C) 2003-2025 Free Software Foundation, Inc.
-;; Contributed by Red Hat.
-
-;; This file is part of GCC.
-
-;; GCC is free software; you can redistribute it and/or modify it under
-;; the terms of the GNU General Public License as published by the Free
-;; Software Foundation; either version 3, or (at your option) any later
-;; version.
-
-;; GCC is distributed in the hope that it will be useful, but WITHOUT
-;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-;; License for more details.
-
-;; You should have received a copy of the GNU General Public License
-;; along with GCC; see the file COPYING3.  If not see
-;; <http://www.gnu.org/licenses/>.
-
-;; Register numbers. Need to sync with FIRST_IWMMXT_GR_REGNUM in arm.h
-(define_constants
-  [(WCGR0           96)
-   (WCGR1           97)
-   (WCGR2           98)
-   (WCGR3           99)
-  ]
-)
-
-(define_insn "tbcstv8qi"
-  [(set (match_operand:V8QI                   0 "register_operand" "=y")
-        (vec_duplicate:V8QI (match_operand:QI 1 "s_register_operand" "r")))]
-  "TARGET_REALLY_IWMMXT"
-  "tbcstb%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_tbcst")]
-)
-
-(define_insn "tbcstv4hi"
-  [(set (match_operand:V4HI                   0 "register_operand" "=y")
-        (vec_duplicate:V4HI (match_operand:HI 1 "s_register_operand" "r")))]
-  "TARGET_REALLY_IWMMXT"
-  "tbcsth%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_tbcst")]
-)
-
-(define_insn "tbcstv2si"
-  [(set (match_operand:V2SI                   0 "register_operand" "=y")
-        (vec_duplicate:V2SI (match_operand:SI 1 "s_register_operand" "r")))]
-  "TARGET_REALLY_IWMMXT"
-  "tbcstw%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_tbcst")]
-)
-
-(define_insn "iwmmxt_iordi3"
-  [(set (match_operand:DI         0 "register_operand" "=y")
-        (ior:DI (match_operand:DI 1 "register_operand" "%y")
-		(match_operand:DI 2 "register_operand"  "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wor%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "length" "4")
-   (set_attr "type" "wmmx_wor")]
-)
-
-(define_insn "iwmmxt_xordi3"
-  [(set (match_operand:DI         0 "register_operand" "=y")
-        (xor:DI (match_operand:DI 1 "register_operand" "%y")
-		(match_operand:DI 2 "register_operand"  "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wxor%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "length" "4")
-   (set_attr "type" "wmmx_wxor")]
-)
-
-(define_insn "iwmmxt_anddi3"
-  [(set (match_operand:DI         0 "register_operand" "=y")
-        (and:DI (match_operand:DI 1 "register_operand" "%y")
-		(match_operand:DI 2 "register_operand"  "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wand%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "length" "4")
-   (set_attr "type" "wmmx_wand")]
-)
-
-(define_insn "iwmmxt_nanddi3"
-  [(set (match_operand:DI                 0 "register_operand" "=y")
-        (and:DI (match_operand:DI         1 "register_operand"  "y")
-		(not:DI (match_operand:DI 2 "register_operand"  "y"))))]
-  "TARGET_REALLY_IWMMXT"
-  "wandn%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wandn")]
-)
-
-(define_insn "*iwmmxt_arm_movdi"
-  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, r, m,y,y,r, y,Uy,*w, r,*w,*w, *Uv")
-        (match_operand:DI 1 "di_operand"              "rDa,Db,Dc,mi,r,y,r,y,Uy,y,  r,*w,*w,*Uvi,*w"))]
-  "TARGET_REALLY_IWMMXT
-   && (   register_operand (operands[0], DImode)
-       || register_operand (operands[1], DImode))"
-  "*
-  switch (which_alternative)
-    {
-    case 0:
-    case 1:
-    case 2:
-      return \"#\";
-    case 3: case 4:
-      return output_move_double (operands, true, NULL);
-    case 5:
-      return \"wmov%?\\t%0,%1\";
-    case 6:
-      return \"tmcrr%?\\t%0,%Q1,%R1\";
-    case 7:
-      return \"tmrrc%?\\t%Q0,%R0,%1\";
-    case 8:
-      return \"wldrd%?\\t%0,%1\";
-    case 9:
-      return \"wstrd%?\\t%1,%0\";
-    case 10:
-      return \"fmdrr%?\\t%P0, %Q1, %R1\\t%@ int\";
-    case 11:
-      return \"fmrrd%?\\t%Q0, %R0, %P1\\t%@ int\";
-    case 12:
-      if (TARGET_VFP_SINGLE)
-	return \"fcpys%?\\t%0, %1\\t%@ int\;fcpys%?\\t%p0, %p1\\t%@ int\";
-      else
-	return \"fcpyd%?\\t%P0, %P1\\t%@ int\";
-    case 13: case 14:
-      return output_move_vfp (operands);
-    default:
-      gcc_unreachable ();
-    }
-  "
-  [(set (attr "length") (cond [(eq_attr "alternative" "0,3,4") (const_int 8)
-                              (eq_attr "alternative" "1") (const_int 12)
-                              (eq_attr "alternative" "2") (const_int 16)
-                              (eq_attr "alternative" "12")
-                               (if_then_else
-                                 (eq (symbol_ref "TARGET_VFP_SINGLE") (const_int 1))
-                                 (const_int 8)
-                                 (const_int 4))]
-                              (const_int 4)))
-   (set_attr "type" "*,*,*,load_8,store_8,*,*,*,*,*,f_mcrr,f_mrrc,\
-                     ffarithd,f_loadd,f_stored")
-   (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,*,*,*,*,*,*,1020,*")
-   (set_attr "arm_neg_pool_range" "*,*,*,1008,*,*,*,*,*,*,*,*,*,1008,*")]
-)
-
-(define_insn "*iwmmxt_movsi_insn"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,rk, m,z,r,?z,?Uy,*t, r,*t,*t  ,*Uv")
-	(match_operand:SI 1 "general_operand"      " rk,I,K,j,mi,rk,r,z,Uy,  z, r,*t,*t,*Uvi, *t"))]
-  "TARGET_REALLY_IWMMXT
-   && (   register_operand (operands[0], SImode)
-       || register_operand (operands[1], SImode))"
-  "*
-   switch (which_alternative)
-     {
-     case 0: return \"mov\\t%0, %1\";
-     case 1: return \"mov\\t%0, %1\";
-     case 2: return \"mvn\\t%0, #%B1\";
-     case 3: return \"movw\\t%0, %1\";
-     case 4: return \"ldr\\t%0, %1\";
-     case 5: return \"str\\t%1, %0\";
-     case 6: return \"tmcr\\t%0, %1\";
-     case 7: return \"tmrc\\t%0, %1\";
-     case 8: return arm_output_load_gr (operands);
-     case 9: return \"wstrw\\t%1, %0\";
-     case 10:return \"fmsr\\t%0, %1\";
-     case 11:return \"fmrs\\t%0, %1\";
-     case 12:return \"fcpys\\t%0, %1\\t%@ int\";
-     case 13: case 14:
-       return output_move_vfp (operands);
-     default:
-       gcc_unreachable ();
-     }"
-  [(set_attr "type"           "*,*,*,*,load_4,store_4,*,*,*,*,f_mcr,f_mrc,\
-                               fmov,f_loads,f_stores")
-   (set_attr "length"         "*,*,*,*,*,        *,*,*,  16,     *,*,*,*,*,*")
-   (set_attr "pool_range"     "*,*,*,*,4096,     *,*,*,1024,     *,*,*,*,1020,*")
-   (set_attr "neg_pool_range" "*,*,*,*,4084,     *,*,*,   *,  1012,*,*,*,1008,*")
-   ;; Note - the "predicable" attribute is not allowed to have alternatives.
-   ;; Since the wSTRw wCx instruction is not predicable, we cannot support
-   ;; predicating any of the alternatives in this template.  Instead,
-   ;; we do the predication ourselves, in cond_iwmmxt_movsi_insn.
-   (set_attr "predicable"     "no")
-   ;; Also - we have to pretend that these insns clobber the condition code
-   ;; bits as otherwise arm_final_prescan_insn() will try to conditionalize
-   ;; them.
-   (set_attr "conds" "clob")]
-)
-
-;; Because iwmmxt_movsi_insn is not predicable, we provide the
-;; cond_exec version explicitly, with appropriate constraints.
-
-(define_insn "*cond_iwmmxt_movsi_insn"
-  [(cond_exec
-     (match_operator 2 "arm_comparison_operator"
-      [(match_operand 3 "cc_register" "")
-      (const_int 0)])
-     (set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r, m,z,r")
-	  (match_operand:SI 1 "general_operand"      "rI,K,mi,r,r,z")))]
-  "TARGET_REALLY_IWMMXT
-   && (   register_operand (operands[0], SImode)
-       || register_operand (operands[1], SImode))"
-  "*
-   switch (which_alternative)
-   {
-   case 0: return \"mov%?\\t%0, %1\";
-   case 1: return \"mvn%?\\t%0, #%B1\";
-   case 2: return \"ldr%?\\t%0, %1\";
-   case 3: return \"str%?\\t%1, %0\";
-   case 4: return \"tmcr%?\\t%0, %1\";
-   default: return \"tmrc%?\\t%0, %1\";
-  }"
-  [(set_attr "type"           "*,*,load_4,store_4,*,*")
-   (set_attr "pool_range"     "*,*,4096,     *,*,*")
-   (set_attr "neg_pool_range" "*,*,4084,     *,*,*")]
-)
-
-(define_insn "mov<mode>_internal"
-  [(set (match_operand:VMMX 0 "nonimmediate_operand" "=y,m,y,?r,?y,?r,?r,?m")
-	(match_operand:VMMX 1 "general_operand"       "y,y,mi,y,r,r,mi,r"))]
-  "TARGET_REALLY_IWMMXT"
-  "*
-   switch (which_alternative)
-   {
-   case 0: return \"wmov%?\\t%0, %1\";
-   case 1: return \"wstrd%?\\t%1, %0\";
-   case 2: return \"wldrd%?\\t%0, %1\";
-   case 3: return \"tmrrc%?\\t%Q0, %R0, %1\";
-   case 4: return \"tmcrr%?\\t%0, %Q1, %R1\";
-   case 5: return \"#\";
-   default: return output_move_double (operands, true, NULL);
-   }"
-  [(set_attr "predicable" "yes")
-   (set_attr "length"         "4,     4,   4,4,4,8,   8,8")
-   (set_attr "type"           "wmmx_wmov,wmmx_wstr,wmmx_wldr,wmmx_tmrrc,wmmx_tmcrr,*,load_4,store_4")
-   (set_attr "pool_range"     "*,     *, 256,*,*,*, 256,*")
-   (set_attr "neg_pool_range" "*,     *, 244,*,*,*, 244,*")]
-)
-
-(define_expand "iwmmxt_setwcgr0"
-  [(set (reg:SI WCGR0)
-	(match_operand:SI 0 "register_operand"))]
-  "TARGET_REALLY_IWMMXT"
-  {}
-)
-
-(define_expand "iwmmxt_setwcgr1"
-  [(set (reg:SI WCGR1)
-	(match_operand:SI 0 "register_operand"))]
-  "TARGET_REALLY_IWMMXT"
-  {}
-)
-
-(define_expand "iwmmxt_setwcgr2"
-  [(set (reg:SI WCGR2)
-	(match_operand:SI 0 "register_operand"))]
-  "TARGET_REALLY_IWMMXT"
-  {}
-)
-
-(define_expand "iwmmxt_setwcgr3"
-  [(set (reg:SI WCGR3)
-	(match_operand:SI 0 "register_operand"))]
-  "TARGET_REALLY_IWMMXT"
-  {}
-)
-
-(define_expand "iwmmxt_getwcgr0"
-  [(set (match_operand:SI 0 "register_operand")
-        (reg:SI WCGR0))]
-  "TARGET_REALLY_IWMMXT"
-  {}
-)
-
-(define_expand "iwmmxt_getwcgr1"
-  [(set (match_operand:SI 0 "register_operand")
-        (reg:SI WCGR1))]
-  "TARGET_REALLY_IWMMXT"
-  {}
-)
-
-(define_expand "iwmmxt_getwcgr2"
-  [(set (match_operand:SI 0 "register_operand")
-        (reg:SI WCGR2))]
-  "TARGET_REALLY_IWMMXT"
-  {}
-)
-
-(define_expand "iwmmxt_getwcgr3"
-  [(set (match_operand:SI 0 "register_operand")
-        (reg:SI WCGR3))]
-  "TARGET_REALLY_IWMMXT"
-  {}
-)
-
-(define_insn "*and<mode>3_iwmmxt"
-  [(set (match_operand:VMMX           0 "register_operand" "=y")
-        (and:VMMX (match_operand:VMMX 1 "register_operand"  "y")
-	          (match_operand:VMMX 2 "register_operand"  "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wand\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wand")]
-)
-
-(define_insn "*ior<mode>3_iwmmxt"
-  [(set (match_operand:VMMX           0 "register_operand" "=y")
-        (ior:VMMX (match_operand:VMMX 1 "register_operand"  "y")
-	          (match_operand:VMMX 2 "register_operand"  "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wor\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wor")]
-)
-
-(define_insn "*xor<mode>3_iwmmxt"
-  [(set (match_operand:VMMX           0 "register_operand" "=y")
-        (xor:VMMX (match_operand:VMMX 1 "register_operand"  "y")
-	          (match_operand:VMMX 2 "register_operand"  "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wxor\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wxor")]
-)
-
-
-;; Vector add/subtract
-
-(define_insn "*add<mode>3_iwmmxt"
-  [(set (match_operand:VMMX            0 "register_operand" "=y")
-        (plus:VMMX (match_operand:VMMX 1 "register_operand" "y")
-	           (match_operand:VMMX 2 "register_operand" "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wadd<MMX_char>%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wadd")]
-)
-
-(define_insn "ssaddv8qi3"
-  [(set (match_operand:V8QI               0 "register_operand" "=y")
-        (ss_plus:V8QI (match_operand:V8QI 1 "register_operand"  "y")
-		      (match_operand:V8QI 2 "register_operand"  "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "waddbss%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wadd")]
-)
-
-(define_insn "ssaddv4hi3"
-  [(set (match_operand:V4HI               0 "register_operand" "=y")
-        (ss_plus:V4HI (match_operand:V4HI 1 "register_operand"  "y")
-		      (match_operand:V4HI 2 "register_operand"  "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "waddhss%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wadd")]
-)
-
-(define_insn "ssaddv2si3"
-  [(set (match_operand:V2SI               0 "register_operand" "=y")
-        (ss_plus:V2SI (match_operand:V2SI 1 "register_operand"  "y")
-		      (match_operand:V2SI 2 "register_operand"  "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "waddwss%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wadd")]
-)
-
-(define_insn "usaddv8qi3"
-  [(set (match_operand:V8QI               0 "register_operand" "=y")
-        (us_plus:V8QI (match_operand:V8QI 1 "register_operand"  "y")
-		      (match_operand:V8QI 2 "register_operand"  "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "waddbus%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wadd")]
-)
-
-(define_insn "usaddv4hi3"
-  [(set (match_operand:V4HI               0 "register_operand" "=y")
-        (us_plus:V4HI (match_operand:V4HI 1 "register_operand"  "y")
-		      (match_operand:V4HI 2 "register_operand"  "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "waddhus%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wadd")]
-)
-
-(define_insn "usaddv2si3"
-  [(set (match_operand:V2SI               0 "register_operand" "=y")
-        (us_plus:V2SI (match_operand:V2SI 1 "register_operand"  "y")
-		      (match_operand:V2SI 2 "register_operand"  "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "waddwus%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wadd")]
-)
-
-(define_insn "*sub<mode>3_iwmmxt"
-  [(set (match_operand:VMMX             0 "register_operand" "=y")
-        (minus:VMMX (match_operand:VMMX 1 "register_operand"  "y")
-		    (match_operand:VMMX 2 "register_operand"  "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wsub<MMX_char>%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wsub")]
-)
-
-(define_insn "sssubv8qi3"
-  [(set (match_operand:V8QI                0 "register_operand" "=y")
-        (ss_minus:V8QI (match_operand:V8QI 1 "register_operand"  "y")
-		       (match_operand:V8QI 2 "register_operand"  "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wsubbss%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wsub")]
-)
-
-(define_insn "sssubv4hi3"
-  [(set (match_operand:V4HI                0 "register_operand" "=y")
-        (ss_minus:V4HI (match_operand:V4HI 1 "register_operand" "y")
-		       (match_operand:V4HI 2 "register_operand" "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wsubhss%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wsub")]
-)
-
-(define_insn "sssubv2si3"
-  [(set (match_operand:V2SI                0 "register_operand" "=y")
-        (ss_minus:V2SI (match_operand:V2SI 1 "register_operand" "y")
-		       (match_operand:V2SI 2 "register_operand" "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wsubwss%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wsub")]
-)
-
-(define_insn "ussubv8qi3"
-  [(set (match_operand:V8QI                0 "register_operand" "=y")
-        (us_minus:V8QI (match_operand:V8QI 1 "register_operand" "y")
-		       (match_operand:V8QI 2 "register_operand" "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wsubbus%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wsub")]
-)
-
-(define_insn "ussubv4hi3"
-  [(set (match_operand:V4HI                0 "register_operand" "=y")
-        (us_minus:V4HI (match_operand:V4HI 1 "register_operand" "y")
-		       (match_operand:V4HI 2 "register_operand" "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wsubhus%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wsub")]
-)
-
-(define_insn "ussubv2si3"
-  [(set (match_operand:V2SI                0 "register_operand" "=y")
-        (us_minus:V2SI (match_operand:V2SI 1 "register_operand" "y")
-		       (match_operand:V2SI 2 "register_operand" "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wsubwus%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wsub")]
-)
-
-(define_insn "*mulv4hi3_iwmmxt"
-  [(set (match_operand:V4HI            0 "register_operand" "=y")
-        (mult:V4HI (match_operand:V4HI 1 "register_operand" "y")
-		   (match_operand:V4HI 2 "register_operand" "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wmulul%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmul")]
-)
-
-(define_insn "smulv4hi3_highpart"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-	  (truncate:V4HI
-	    (lshiftrt:V4SI
-	      (mult:V4SI (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
-	                 (sign_extend:V4SI (match_operand:V4HI 2 "register_operand" "y")))
-	      (const_int 16))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmulsm%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmul")]
-)
-
-(define_insn "umulv4hi3_highpart"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-	  (truncate:V4HI
-	    (lshiftrt:V4SI
-	      (mult:V4SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
-	                 (zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y")))
-	      (const_int 16))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmulum%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmul")]
-)
-
-(define_insn "iwmmxt_wmacs"
-  [(set (match_operand:DI               0 "register_operand" "=y")
-	(unspec:DI [(match_operand:DI   1 "register_operand" "0")
-	            (match_operand:V4HI 2 "register_operand" "y")
-	            (match_operand:V4HI 3 "register_operand" "y")] UNSPEC_WMACS))]
-  "TARGET_REALLY_IWMMXT"
-  "wmacs%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmac")]
-)
-
-(define_insn "iwmmxt_wmacsz"
-  [(set (match_operand:DI               0 "register_operand" "=y")
-	(unspec:DI [(match_operand:V4HI 1 "register_operand" "y")
-	            (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WMACSZ))]
-  "TARGET_REALLY_IWMMXT"
-  "wmacsz%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmac")]
-)
-
-(define_insn "iwmmxt_wmacu"
-  [(set (match_operand:DI               0 "register_operand" "=y")
-	(unspec:DI [(match_operand:DI   1 "register_operand" "0")
-	            (match_operand:V4HI 2 "register_operand" "y")
-	            (match_operand:V4HI 3 "register_operand" "y")] UNSPEC_WMACU))]
-  "TARGET_REALLY_IWMMXT"
-  "wmacu%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmac")]
-)
-
-(define_insn "iwmmxt_wmacuz"
-  [(set (match_operand:DI               0 "register_operand" "=y")
-	(unspec:DI [(match_operand:V4HI 1 "register_operand" "y")
-	            (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WMACUZ))]
-  "TARGET_REALLY_IWMMXT"
-  "wmacuz%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmac")]
-)
-
-;; Same as xordi3, but don't show input operands so that we don't think
-;; they are live.
-(define_insn "iwmmxt_clrdi"
-  [(set (match_operand:DI 0 "register_operand" "=y")
-        (unspec:DI [(const_int 0)] UNSPEC_CLRDI))]
-  "TARGET_REALLY_IWMMXT"
-  "wxor%?\\t%0, %0, %0"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wxor")]
-)
-
-;; Seems like cse likes to generate these, so we have to support them.
-
-(define_insn "iwmmxt_clrv8qi"
-  [(set (match_operand:V8QI 0 "s_register_operand" "=y")
-        (const_vector:V8QI [(const_int 0) (const_int 0)
-			    (const_int 0) (const_int 0)
-			    (const_int 0) (const_int 0)
-			    (const_int 0) (const_int 0)]))]
-  "TARGET_REALLY_IWMMXT"
-  "wxor%?\\t%0, %0, %0"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wxor")]
-)
-
-(define_insn "iwmmxt_clrv4hi"
-  [(set (match_operand:V4HI 0 "s_register_operand" "=y")
-        (const_vector:V4HI [(const_int 0) (const_int 0)
-			    (const_int 0) (const_int 0)]))]
-  "TARGET_REALLY_IWMMXT"
-  "wxor%?\\t%0, %0, %0"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wxor")]
-)
-
-(define_insn "iwmmxt_clrv2si"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-        (const_vector:V2SI [(const_int 0) (const_int 0)]))]
-  "TARGET_REALLY_IWMMXT"
-  "wxor%?\\t%0, %0, %0"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wxor")]
-)
-
-;; Unsigned averages/sum of absolute differences
-
-(define_insn "iwmmxt_uavgrndv8qi3"
-  [(set (match_operand:V8QI                                    0 "register_operand" "=y")
-        (truncate:V8QI
-	  (lshiftrt:V8HI
-	    (plus:V8HI
-	      (plus:V8HI (zero_extend:V8HI (match_operand:V8QI 1 "register_operand" "y"))
-	                 (zero_extend:V8HI (match_operand:V8QI 2 "register_operand" "y")))
-	      (const_vector:V8HI [(const_int 1)
-	                          (const_int 1)
-	                          (const_int 1)
-	                          (const_int 1)
-	                          (const_int 1)
-	                          (const_int 1)
-	                          (const_int 1)
-	                          (const_int 1)]))
-	    (const_int 1))))]
-  "TARGET_REALLY_IWMMXT"
-  "wavg2br%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wavg2")]
-)
-
-(define_insn "iwmmxt_uavgrndv4hi3"
-  [(set (match_operand:V4HI                                    0 "register_operand" "=y")
-        (truncate:V4HI
-	  (lshiftrt:V4SI
-            (plus:V4SI
-	      (plus:V4SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
-	                 (zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y")))
-	      (const_vector:V4SI [(const_int 1)
-	                          (const_int 1)
-	                          (const_int 1)
-	                          (const_int 1)]))
-	    (const_int 1))))]
-  "TARGET_REALLY_IWMMXT"
-  "wavg2hr%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wavg2")]
-)
-
-(define_insn "iwmmxt_uavgv8qi3"
-  [(set (match_operand:V8QI                                  0 "register_operand" "=y")
-        (truncate:V8QI
-	  (lshiftrt:V8HI
-	    (plus:V8HI (zero_extend:V8HI (match_operand:V8QI 1 "register_operand" "y"))
-	               (zero_extend:V8HI (match_operand:V8QI 2 "register_operand" "y")))
-	    (const_int 1))))]
-  "TARGET_REALLY_IWMMXT"
-  "wavg2b%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wavg2")]
-)
-
-(define_insn "iwmmxt_uavgv4hi3"
-  [(set (match_operand:V4HI                                  0 "register_operand" "=y")
-        (truncate:V4HI
-	  (lshiftrt:V4SI
-	    (plus:V4SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
-	               (zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y")))
-	    (const_int 1))))]
-  "TARGET_REALLY_IWMMXT"
-  "wavg2h%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wavg2")]
-)
-
-;; Insert/extract/shuffle
-
-(define_insn "iwmmxt_tinsrb"
-  [(set (match_operand:V8QI                0 "register_operand" "=y")
-        (vec_merge:V8QI
-	  (vec_duplicate:V8QI
-	    (truncate:QI (match_operand:SI 2 "nonimmediate_operand" "r")))
-	  (match_operand:V8QI              1 "register_operand"     "0")
-	  (match_operand:SI                3 "immediate_operand"    "i")))]
-  "TARGET_REALLY_IWMMXT"
-  "*
-   {
-     return arm_output_iwmmxt_tinsr (operands);
-   }
-   "
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_tinsr")]
-)
-
-(define_insn "iwmmxt_tinsrh"
-  [(set (match_operand:V4HI                0 "register_operand"    "=y")
-        (vec_merge:V4HI
-          (vec_duplicate:V4HI
-            (truncate:HI (match_operand:SI 2 "nonimmediate_operand" "r")))
-	  (match_operand:V4HI              1 "register_operand"     "0")
-	  (match_operand:SI                3 "immediate_operand"    "i")))]
-  "TARGET_REALLY_IWMMXT"
-  "*
-   {
-     return arm_output_iwmmxt_tinsr (operands);
-   }
-   "
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_tinsr")]
-)
-
-(define_insn "iwmmxt_tinsrw"
-  [(set (match_operand:V2SI   0 "register_operand"    "=y")
-        (vec_merge:V2SI
-          (vec_duplicate:V2SI
-            (match_operand:SI 2 "nonimmediate_operand" "r"))
-          (match_operand:V2SI 1 "register_operand"     "0")
-          (match_operand:SI   3 "immediate_operand"    "i")))]
-  "TARGET_REALLY_IWMMXT"
-  "*
-   {
-     return arm_output_iwmmxt_tinsr (operands);
-   }
-   "
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_tinsr")]
-)
-
-(define_insn "iwmmxt_textrmub"
-  [(set (match_operand:SI                                   0 "register_operand" "=r")
-        (zero_extend:SI (vec_select:QI (match_operand:V8QI  1 "register_operand" "y")
-		                       (parallel
-				         [(match_operand:SI 2 "immediate_operand" "i")]))))]
-  "TARGET_REALLY_IWMMXT"
-  "textrmub%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_textrm")]
-)
-
-(define_insn "iwmmxt_textrmsb"
-  [(set (match_operand:SI                                   0 "register_operand" "=r")
-        (sign_extend:SI (vec_select:QI (match_operand:V8QI  1 "register_operand" "y")
-				       (parallel
-				         [(match_operand:SI 2 "immediate_operand" "i")]))))]
-  "TARGET_REALLY_IWMMXT"
-  "textrmsb%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_textrm")]
-)
-
-(define_insn "iwmmxt_textrmuh"
-  [(set (match_operand:SI                                   0 "register_operand" "=r")
-        (zero_extend:SI (vec_select:HI (match_operand:V4HI  1 "register_operand" "y")
-				       (parallel
-				         [(match_operand:SI 2 "immediate_operand" "i")]))))]
-  "TARGET_REALLY_IWMMXT"
-  "textrmuh%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_textrm")]
-)
-
-(define_insn "iwmmxt_textrmsh"
-  [(set (match_operand:SI                                   0 "register_operand" "=r")
-        (sign_extend:SI (vec_select:HI (match_operand:V4HI  1 "register_operand" "y")
-				       (parallel
-				         [(match_operand:SI 2 "immediate_operand" "i")]))))]
-  "TARGET_REALLY_IWMMXT"
-  "textrmsh%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_textrm")]
-)
-
-;; There are signed/unsigned variants of this instruction, but they are
-;; pointless.
-(define_insn "iwmmxt_textrmw"
-  [(set (match_operand:SI                           0 "register_operand" "=r")
-        (vec_select:SI (match_operand:V2SI          1 "register_operand" "y")
-		       (parallel [(match_operand:SI 2 "immediate_operand" "i")])))]
-  "TARGET_REALLY_IWMMXT"
-  "textrmsw%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_textrm")]
-)
-
-(define_insn "iwmmxt_wshufh"
-  [(set (match_operand:V4HI               0 "register_operand" "=y")
-        (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "y")
-		      (match_operand:SI   2 "immediate_operand" "i")] UNSPEC_WSHUFH))]
-  "TARGET_REALLY_IWMMXT"
-  "wshufh%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wshufh")]
-)
-
-;; Mask-generating comparisons
-;;
-;; Note - you cannot use patterns like these here:
-;;
-;;   (set (match:<vector>) (<comparator>:<vector> (match:<vector>) (match:<vector>)))
-;;
-;; Because GCC will assume that the truth value (1 or 0) is installed
-;; into the entire destination vector, (with the '1' going into the least
-;; significant element of the vector).  This is not how these instructions
-;; behave.
-
-(define_insn "eqv8qi3"
-  [(set (match_operand:V8QI                        0 "register_operand" "=y")
-	(unspec_volatile:V8QI [(match_operand:V8QI 1 "register_operand"  "y")
-	                       (match_operand:V8QI 2 "register_operand"  "y")]
-	                      VUNSPEC_WCMP_EQ))]
-  "TARGET_REALLY_IWMMXT"
-  "wcmpeqb%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wcmpeq")]
-)
-
-(define_insn "eqv4hi3"
-  [(set (match_operand:V4HI                        0 "register_operand" "=y")
-	(unspec_volatile:V4HI [(match_operand:V4HI 1 "register_operand"  "y")
-		               (match_operand:V4HI 2 "register_operand"  "y")]
-	                       VUNSPEC_WCMP_EQ))]
-  "TARGET_REALLY_IWMMXT"
-  "wcmpeqh%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wcmpeq")]
-)
-
-(define_insn "eqv2si3"
-  [(set (match_operand:V2SI    0 "register_operand" "=y")
-	(unspec_volatile:V2SI
-	  [(match_operand:V2SI 1 "register_operand"  "y")
-	   (match_operand:V2SI 2 "register_operand"  "y")]
-           VUNSPEC_WCMP_EQ))]
-  "TARGET_REALLY_IWMMXT"
-  "wcmpeqw%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wcmpeq")]
-)
-
-(define_insn "gtuv8qi3"
-  [(set (match_operand:V8QI                        0 "register_operand" "=y")
-	(unspec_volatile:V8QI [(match_operand:V8QI 1 "register_operand"  "y")
-	                       (match_operand:V8QI 2 "register_operand"  "y")]
-	                       VUNSPEC_WCMP_GTU))]
-  "TARGET_REALLY_IWMMXT"
-  "wcmpgtub%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wcmpgt")]
-)
-
-(define_insn "gtuv4hi3"
-  [(set (match_operand:V4HI                        0 "register_operand" "=y")
-        (unspec_volatile:V4HI [(match_operand:V4HI 1 "register_operand"  "y")
-                               (match_operand:V4HI 2 "register_operand"  "y")]
-                               VUNSPEC_WCMP_GTU))]
-  "TARGET_REALLY_IWMMXT"
-  "wcmpgtuh%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wcmpgt")]
-)
-
-(define_insn "gtuv2si3"
-  [(set (match_operand:V2SI                        0 "register_operand" "=y")
-	(unspec_volatile:V2SI [(match_operand:V2SI 1 "register_operand"  "y")
-	                       (match_operand:V2SI 2 "register_operand"  "y")]
-	                       VUNSPEC_WCMP_GTU))]
-  "TARGET_REALLY_IWMMXT"
-  "wcmpgtuw%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wcmpgt")]
-)
-
-(define_insn "gtv8qi3"
-  [(set (match_operand:V8QI                        0 "register_operand" "=y")
-	(unspec_volatile:V8QI [(match_operand:V8QI 1 "register_operand"  "y")
-	                       (match_operand:V8QI 2 "register_operand"  "y")]
-	                       VUNSPEC_WCMP_GT))]
-  "TARGET_REALLY_IWMMXT"
-  "wcmpgtsb%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wcmpgt")]
-)
-
-(define_insn "gtv4hi3"
-  [(set (match_operand:V4HI                        0 "register_operand" "=y")
-	(unspec_volatile:V4HI [(match_operand:V4HI 1 "register_operand"  "y")
-	                       (match_operand:V4HI 2 "register_operand"  "y")]
-	                       VUNSPEC_WCMP_GT))]
-  "TARGET_REALLY_IWMMXT"
-  "wcmpgtsh%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wcmpgt")]
-)
-
-(define_insn "gtv2si3"
-  [(set (match_operand:V2SI                        0 "register_operand" "=y")
-	(unspec_volatile:V2SI [(match_operand:V2SI 1 "register_operand"  "y")
-	                       (match_operand:V2SI 2 "register_operand"  "y")]
-	                       VUNSPEC_WCMP_GT))]
-  "TARGET_REALLY_IWMMXT"
-  "wcmpgtsw%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wcmpgt")]
-)
-
-;; Max/min insns
-
-(define_insn "*smax<mode>3_iwmmxt"
-  [(set (match_operand:VMMX            0 "register_operand" "=y")
-        (smax:VMMX (match_operand:VMMX 1 "register_operand" "y")
-		   (match_operand:VMMX 2 "register_operand" "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wmaxs<MMX_char>%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmax")]
-)
-
-(define_insn "*umax<mode>3_iwmmxt"
-  [(set (match_operand:VMMX            0 "register_operand" "=y")
-        (umax:VMMX (match_operand:VMMX 1 "register_operand" "y")
-		   (match_operand:VMMX 2 "register_operand" "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wmaxu<MMX_char>%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmax")]
-)
-
-(define_insn "*smin<mode>3_iwmmxt"
-  [(set (match_operand:VMMX            0 "register_operand" "=y")
-        (smin:VMMX (match_operand:VMMX 1 "register_operand" "y")
-		   (match_operand:VMMX 2 "register_operand" "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wmins<MMX_char>%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmin")]
-)
-
-(define_insn "*umin<mode>3_iwmmxt"
-  [(set (match_operand:VMMX            0 "register_operand" "=y")
-        (umin:VMMX (match_operand:VMMX 1 "register_operand" "y")
-		   (match_operand:VMMX 2 "register_operand" "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wminu<MMX_char>%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmin")]
-)
-
-;; Pack/unpack insns.
-
-(define_insn "iwmmxt_wpackhss"
-  [(set (match_operand:V8QI                     0 "register_operand" "=y")
-	(vec_concat:V8QI
-	  (ss_truncate:V4QI (match_operand:V4HI 1 "register_operand" "y"))
-	  (ss_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))]
-  "TARGET_REALLY_IWMMXT"
-  "wpackhss%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wpack")]
-)
-
-(define_insn "iwmmxt_wpackwss"
-  [(set (match_operand:V4HI                     0 "register_operand" "=y")
-        (vec_concat:V4HI
-	  (ss_truncate:V2HI (match_operand:V2SI 1 "register_operand" "y"))
-	  (ss_truncate:V2HI (match_operand:V2SI 2 "register_operand" "y"))))]
-  "TARGET_REALLY_IWMMXT"
-  "wpackwss%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wpack")]
-)
-
-(define_insn "iwmmxt_wpackdss"
-  [(set (match_operand:V2SI                 0 "register_operand" "=y")
-	(vec_concat:V2SI
-	  (ss_truncate:SI (match_operand:DI 1 "register_operand" "y"))
-	  (ss_truncate:SI (match_operand:DI 2 "register_operand" "y"))))]
-  "TARGET_REALLY_IWMMXT"
-  "wpackdss%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wpack")]
-)
-
-(define_insn "iwmmxt_wpackhus"
-  [(set (match_operand:V8QI                     0 "register_operand" "=y")
-	(vec_concat:V8QI
-	  (us_truncate:V4QI (match_operand:V4HI 1 "register_operand" "y"))
-	  (us_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))]
-  "TARGET_REALLY_IWMMXT"
-  "wpackhus%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wpack")]
-)
-
-(define_insn "iwmmxt_wpackwus"
-  [(set (match_operand:V4HI                     0 "register_operand" "=y")
-	(vec_concat:V4HI
-	  (us_truncate:V2HI (match_operand:V2SI 1 "register_operand" "y"))
-	  (us_truncate:V2HI (match_operand:V2SI 2 "register_operand" "y"))))]
-  "TARGET_REALLY_IWMMXT"
-  "wpackwus%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wpack")]
-)
-
-(define_insn "iwmmxt_wpackdus"
-  [(set (match_operand:V2SI                 0 "register_operand" "=y")
-	(vec_concat:V2SI
-	  (us_truncate:SI (match_operand:DI 1 "register_operand" "y"))
-	  (us_truncate:SI (match_operand:DI 2 "register_operand" "y"))))]
-  "TARGET_REALLY_IWMMXT"
-  "wpackdus%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wpack")]
-)
-
-(define_insn "iwmmxt_wunpckihb"
-  [(set (match_operand:V8QI                                      0 "register_operand" "=y")
-	(vec_merge:V8QI
-	  (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "y")
-		           (parallel [(const_int 4)
-			              (const_int 0)
-			              (const_int 5)
-			              (const_int 1)
-			              (const_int 6)
-			              (const_int 2)
-			              (const_int 7)
-			              (const_int 3)]))
-          (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y")
-			   (parallel [(const_int 0)
-			              (const_int 4)
-			              (const_int 1)
-			              (const_int 5)
-			              (const_int 2)
-			              (const_int 6)
-			              (const_int 3)
-			              (const_int 7)]))
-          (const_int 85)))]
-  "TARGET_REALLY_IWMMXT"
-  "wunpckihb%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wunpckih")]
-)
-
-(define_insn "iwmmxt_wunpckihh"
-  [(set (match_operand:V4HI                                      0 "register_operand" "=y")
-	(vec_merge:V4HI
-	  (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "y")
-		           (parallel [(const_int 2)
-			              (const_int 0)
-			              (const_int 3)
-			              (const_int 1)]))
-	  (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y")
-		           (parallel [(const_int 0)
-			              (const_int 2)
-			              (const_int 1)
-			              (const_int 3)]))
-          (const_int 5)))]
-  "TARGET_REALLY_IWMMXT"
-  "wunpckihh%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wunpckih")]
-)
-
-(define_insn "iwmmxt_wunpckihw"
-  [(set (match_operand:V2SI                    0 "register_operand" "=y")
-	(vec_merge:V2SI
-	  (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "y")
-		           (parallel [(const_int 1)
-		                      (const_int 0)]))
-          (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y")
-		           (parallel [(const_int 0)
-			              (const_int 1)]))
-          (const_int 1)))]
-  "TARGET_REALLY_IWMMXT"
-  "wunpckihw%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wunpckih")]
-)
-
-(define_insn "iwmmxt_wunpckilb"
-  [(set (match_operand:V8QI                                      0 "register_operand" "=y")
-	(vec_merge:V8QI
-	  (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "y")
-		           (parallel [(const_int 0)
-			              (const_int 4)
-			              (const_int 1)
-			              (const_int 5)
-		                      (const_int 2)
-				      (const_int 6)
-				      (const_int 3)
-				      (const_int 7)]))
-	  (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y")
-		           (parallel [(const_int 4)
-			              (const_int 0)
-			              (const_int 5)
-			              (const_int 1)
-			              (const_int 6)
-			              (const_int 2)
-			              (const_int 7)
-			              (const_int 3)]))
-	  (const_int 85)))]
-  "TARGET_REALLY_IWMMXT"
-  "wunpckilb%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wunpckil")]
-)
-
-(define_insn "iwmmxt_wunpckilh"
-  [(set (match_operand:V4HI                                      0 "register_operand" "=y")
-	(vec_merge:V4HI
-	  (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "y")
-		           (parallel [(const_int 0)
-			              (const_int 2)
-			              (const_int 1)
-			              (const_int 3)]))
-	  (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y")
-			   (parallel [(const_int 2)
-			              (const_int 0)
-			              (const_int 3)
-			              (const_int 1)]))
-	  (const_int 5)))]
-  "TARGET_REALLY_IWMMXT"
-  "wunpckilh%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wunpckil")]
-)
-
-(define_insn "iwmmxt_wunpckilw"
-  [(set (match_operand:V2SI                    0 "register_operand" "=y")
-	(vec_merge:V2SI
-	  (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "y")
-		           (parallel [(const_int 0)
-				      (const_int 1)]))
-	  (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y")
-		           (parallel [(const_int 1)
-			              (const_int 0)]))
-	  (const_int 1)))]
-  "TARGET_REALLY_IWMMXT"
-  "wunpckilw%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wunpckil")]
-)
-
-(define_insn "iwmmxt_wunpckehub"
-  [(set (match_operand:V4HI                     0 "register_operand" "=y")
-	(vec_select:V4HI
-	  (zero_extend:V8HI (match_operand:V8QI 1 "register_operand" "y"))
-	  (parallel [(const_int 4) (const_int 5)
-	             (const_int 6) (const_int 7)])))]
-  "TARGET_REALLY_IWMMXT"
-  "wunpckehub%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wunpckeh")]
-)
-
-(define_insn "iwmmxt_wunpckehuh"
-  [(set (match_operand:V2SI                     0 "register_operand" "=y")
-	(vec_select:V2SI
-	  (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
-	  (parallel [(const_int 2) (const_int 3)])))]
-  "TARGET_REALLY_IWMMXT"
-  "wunpckehuh%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wunpckeh")]
-)
-
-(define_insn "iwmmxt_wunpckehuw"
-  [(set (match_operand:DI                       0 "register_operand" "=y")
-	(vec_select:DI
-	  (zero_extend:V2DI (match_operand:V2SI 1 "register_operand" "y"))
-	  (parallel [(const_int 1)])))]
-  "TARGET_REALLY_IWMMXT"
-  "wunpckehuw%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wunpckeh")]
-)
-
-(define_insn "iwmmxt_wunpckehsb"
-  [(set (match_operand:V4HI                     0 "register_operand" "=y")
-        (vec_select:V4HI
-	  (sign_extend:V8HI (match_operand:V8QI 1 "register_operand" "y"))
-	  (parallel [(const_int 4) (const_int 5)
-	             (const_int 6) (const_int 7)])))]
-  "TARGET_REALLY_IWMMXT"
-  "wunpckehsb%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wunpckeh")]
-)
-
-(define_insn "iwmmxt_wunpckehsh"
-  [(set (match_operand:V2SI                     0 "register_operand" "=y")
-	(vec_select:V2SI
-	  (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
-	  (parallel [(const_int 2) (const_int 3)])))]
-  "TARGET_REALLY_IWMMXT"
-  "wunpckehsh%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wunpckeh")]
-)
-
-(define_insn "iwmmxt_wunpckehsw"
-  [(set (match_operand:DI                       0 "register_operand" "=y")
-	(vec_select:DI
-	  (sign_extend:V2DI (match_operand:V2SI 1 "register_operand" "y"))
-	  (parallel [(const_int 1)])))]
-  "TARGET_REALLY_IWMMXT"
-  "wunpckehsw%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wunpckeh")]
-)
-
-(define_insn "iwmmxt_wunpckelub"
-  [(set (match_operand:V4HI                     0 "register_operand" "=y")
-	(vec_select:V4HI
-	  (zero_extend:V8HI (match_operand:V8QI 1 "register_operand" "y"))
-	  (parallel [(const_int 0) (const_int 1)
-		     (const_int 2) (const_int 3)])))]
-  "TARGET_REALLY_IWMMXT"
-  "wunpckelub%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wunpckel")]
-)
-
-(define_insn "iwmmxt_wunpckeluh"
-  [(set (match_operand:V2SI                     0 "register_operand" "=y")
-	(vec_select:V2SI
-	  (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
-	  (parallel [(const_int 0) (const_int 1)])))]
-  "TARGET_REALLY_IWMMXT"
-  "wunpckeluh%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wunpckel")]
-)
-
-(define_insn "iwmmxt_wunpckeluw"
-  [(set (match_operand:DI                       0 "register_operand" "=y")
-	(vec_select:DI
-	  (zero_extend:V2DI (match_operand:V2SI 1 "register_operand" "y"))
-	  (parallel [(const_int 0)])))]
-  "TARGET_REALLY_IWMMXT"
-  "wunpckeluw%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wunpckel")]
-)
-
-(define_insn "iwmmxt_wunpckelsb"
-  [(set (match_operand:V4HI                     0 "register_operand" "=y")
-	(vec_select:V4HI
-	  (sign_extend:V8HI (match_operand:V8QI 1 "register_operand" "y"))
-	  (parallel [(const_int 0) (const_int 1)
-		     (const_int 2) (const_int 3)])))]
-  "TARGET_REALLY_IWMMXT"
-  "wunpckelsb%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wunpckel")]
-)
-
-(define_insn "iwmmxt_wunpckelsh"
-  [(set (match_operand:V2SI                     0 "register_operand" "=y")
-	(vec_select:V2SI
-	  (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
-	  (parallel [(const_int 0) (const_int 1)])))]
-  "TARGET_REALLY_IWMMXT"
-  "wunpckelsh%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wunpckel")]
-)
-
-(define_insn "iwmmxt_wunpckelsw"
-  [(set (match_operand:DI                       0 "register_operand" "=y")
-        (vec_select:DI
-	  (sign_extend:V2DI (match_operand:V2SI 1 "register_operand" "y"))
-	  (parallel [(const_int 0)])))]
-  "TARGET_REALLY_IWMMXT"
-  "wunpckelsw%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wunpckel")]
-)
-
-;; Shifts
-
-(define_insn "ror<mode>3"
-  [(set (match_operand:VSHFT                 0 "register_operand" "=y,y")
-        (rotatert:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y")
-		        (match_operand:SI    2 "imm_or_reg_operand" "z,i")))]
-  "TARGET_REALLY_IWMMXT"
-  "*
-  switch  (which_alternative)
-    {
-    case 0:
-      return \"wror<MMX_char>g%?\\t%0, %1, %2\";
-    case 1:
-      return arm_output_iwmmxt_shift_immediate (\"wror<MMX_char>\", operands, true);
-    default:
-      gcc_unreachable ();
-    }
-  "
-  [(set_attr "predicable" "yes")
-   (set_attr "arch" "*, iwmmxt2")
-   (set_attr "type" "wmmx_wror, wmmx_wror")]
-)
-
-(define_insn "ashr<mode>3_iwmmxt"
-  [(set (match_operand:VSHFT                 0 "register_operand" "=y,y")
-        (ashiftrt:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y")
-			(match_operand:SI    2 "imm_or_reg_operand" "z,i")))]
-  "TARGET_REALLY_IWMMXT"
-  "*
-  switch  (which_alternative)
-    {
-    case 0:
-      return \"wsra<MMX_char>g%?\\t%0, %1, %2\";
-    case 1:
-      return arm_output_iwmmxt_shift_immediate (\"wsra<MMX_char>\", operands, true);
-    default:
-      gcc_unreachable ();
-    }
-  "
-  [(set_attr "predicable" "yes")
-   (set_attr "arch" "*, iwmmxt2")
-   (set_attr "type" "wmmx_wsra, wmmx_wsra")]
-)
-
-(define_insn "lshr<mode>3_iwmmxt"
-  [(set (match_operand:VSHFT                 0 "register_operand" "=y,y")
-        (lshiftrt:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y")
-			(match_operand:SI    2 "imm_or_reg_operand" "z,i")))]
-  "TARGET_REALLY_IWMMXT"
-  "*
-  switch  (which_alternative)
-    {
-    case 0:
-      return \"wsrl<MMX_char>g%?\\t%0, %1, %2\";
-    case 1:
-      return arm_output_iwmmxt_shift_immediate (\"wsrl<MMX_char>\", operands, false);
-    default:
-      gcc_unreachable ();
-    }
-  "
-  [(set_attr "predicable" "yes")
-   (set_attr "arch" "*, iwmmxt2")
-   (set_attr "type" "wmmx_wsrl, wmmx_wsrl")]
-)
-
-(define_insn "ashl<mode>3_iwmmxt"
-  [(set (match_operand:VSHFT               0 "register_operand" "=y,y")
-        (ashift:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y")
-		      (match_operand:SI    2 "imm_or_reg_operand" "z,i")))]
-  "TARGET_REALLY_IWMMXT"
-  "*
-  switch  (which_alternative)
-    {
-    case 0:
-      return \"wsll<MMX_char>g%?\\t%0, %1, %2\";
-    case 1:
-      return arm_output_iwmmxt_shift_immediate (\"wsll<MMX_char>\", operands, false);
-    default:
-      gcc_unreachable ();
-    }
-  "
-  [(set_attr "predicable" "yes")
-   (set_attr "arch" "*, iwmmxt2")
-   (set_attr "type" "wmmx_wsll, wmmx_wsll")]
-)
-
-(define_insn "ror<mode>3_di"
-  [(set (match_operand:VSHFT                 0 "register_operand" "=y,y")
-        (rotatert:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y")
-		        (match_operand:DI    2 "imm_or_reg_operand" "y,i")))]
-  "TARGET_REALLY_IWMMXT"
-  "*
-  switch (which_alternative)
-    {
-    case 0:
-      return \"wror<MMX_char>%?\\t%0, %1, %2\";
-    case 1:
-      return arm_output_iwmmxt_shift_immediate (\"wror<MMX_char>\", operands, true);
-    default:
-      gcc_unreachable ();
-    }
-  "
-  [(set_attr "predicable" "yes")
-   (set_attr "arch" "*, iwmmxt2")
-   (set_attr "type" "wmmx_wror, wmmx_wror")]
-)
-
-(define_insn "ashr<mode>3_di"
-  [(set (match_operand:VSHFT                 0 "register_operand" "=y,y")
-        (ashiftrt:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y")
-		        (match_operand:DI    2 "imm_or_reg_operand" "y,i")))]
-  "TARGET_REALLY_IWMMXT"
-  "*
-  switch (which_alternative)
-    {
-    case 0:
-      return \"wsra<MMX_char>%?\\t%0, %1, %2\";
-    case 1:
-      return arm_output_iwmmxt_shift_immediate (\"wsra<MMX_char>\", operands, true);
-    default:
-      gcc_unreachable ();
-    }
-  "
-  [(set_attr "predicable" "yes")
-   (set_attr "arch" "*, iwmmxt2")
-   (set_attr "type" "wmmx_wsra, wmmx_wsra")]
-)
-
-(define_insn "lshr<mode>3_di"
-  [(set (match_operand:VSHFT                 0 "register_operand" "=y,y")
-        (lshiftrt:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y")
-		        (match_operand:DI    2 "register_operand" "y,i")))]
-  "TARGET_REALLY_IWMMXT"
-  "*
-  switch (which_alternative)
-    {
-    case 0:
-      return \"wsrl<MMX_char>%?\\t%0, %1, %2\";
-    case 1:
-      return arm_output_iwmmxt_shift_immediate (\"wsrl<MMX_char>\", operands, false);
-    default:
-      gcc_unreachable ();
-    }
-  "
-  [(set_attr "predicable" "yes")
-   (set_attr "arch" "*, iwmmxt2")
-   (set_attr "type" "wmmx_wsrl, wmmx_wsrl")]
-)
-
-(define_insn "ashl<mode>3_di"
-  [(set (match_operand:VSHFT               0 "register_operand" "=y,y")
-        (ashift:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y")
-		      (match_operand:DI    2 "imm_or_reg_operand" "y,i")))]
-  "TARGET_REALLY_IWMMXT"
-  "*
-  switch (which_alternative)
-    {
-    case 0:
-      return \"wsll<MMX_char>%?\\t%0, %1, %2\";
-    case 1:
-      return arm_output_iwmmxt_shift_immediate (\"wsll<MMX_char>\", operands, false);
-    default:
-      gcc_unreachable ();
-    }
-  "
-  [(set_attr "predicable" "yes")
-   (set_attr "arch" "*, iwmmxt2")
-   (set_attr "type" "wmmx_wsll, wmmx_wsll")]
-)
-
-(define_insn "iwmmxt_wmadds"
-  [(set (match_operand:V2SI                                        0 "register_operand" "=y")
-	(plus:V2SI
-	  (mult:V2SI
-	    (vec_select:V2SI (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
-	                     (parallel [(const_int 1) (const_int 3)]))
-	    (vec_select:V2SI (sign_extend:V4SI (match_operand:V4HI 2 "register_operand" "y"))
-	                     (parallel [(const_int 1) (const_int 3)])))
-	  (mult:V2SI
-	    (vec_select:V2SI (sign_extend:V4SI (match_dup 1))
-	                     (parallel [(const_int 0) (const_int 2)]))
-	    (vec_select:V2SI (sign_extend:V4SI (match_dup 2))
-	                     (parallel [(const_int 0) (const_int 2)])))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmadds%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmadd")]
-)
-
-(define_insn "iwmmxt_wmaddu"
-  [(set (match_operand:V2SI               0 "register_operand" "=y")
-	(plus:V2SI
-	  (mult:V2SI
-	    (vec_select:V2SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
-	                     (parallel [(const_int 1) (const_int 3)]))
-	    (vec_select:V2SI (zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y"))
-	                     (parallel [(const_int 1) (const_int 3)])))
-	  (mult:V2SI
-	    (vec_select:V2SI (zero_extend:V4SI (match_dup 1))
-	                     (parallel [(const_int 0) (const_int 2)]))
-	    (vec_select:V2SI (zero_extend:V4SI (match_dup 2))
-	                     (parallel [(const_int 0) (const_int 2)])))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmaddu%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmadd")]
-)
-
-(define_insn "iwmmxt_tmia"
-  [(set (match_operand:DI                     0 "register_operand" "=y")
-	(plus:DI (match_operand:DI            1 "register_operand" "0")
-		 (mult:DI (sign_extend:DI
-			    (match_operand:SI 2 "register_operand" "r"))
-			  (sign_extend:DI
-			    (match_operand:SI 3 "register_operand" "r")))))]
-  "TARGET_REALLY_IWMMXT"
-  "tmia%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_tmia")]
-)
-
-(define_insn "iwmmxt_tmiaph"
-  [(set (match_operand:DI                                    0 "register_operand" "=y")
-	(plus:DI (match_operand:DI                           1 "register_operand" "0")
-		 (plus:DI
-		   (mult:DI (sign_extend:DI
-			      (truncate:HI (match_operand:SI 2 "register_operand" "r")))
-			    (sign_extend:DI
-			      (truncate:HI (match_operand:SI 3 "register_operand" "r"))))
-		   (mult:DI (sign_extend:DI
-			      (truncate:HI (ashiftrt:SI (match_dup 2) (const_int 16))))
-			    (sign_extend:DI
-			      (truncate:HI (ashiftrt:SI (match_dup 3) (const_int 16))))))))]
-  "TARGET_REALLY_IWMMXT"
-  "tmiaph%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_tmiaph")]
-)
-
-(define_insn "iwmmxt_tmiabb"
-  [(set (match_operand:DI                                  0 "register_operand" "=y")
-	(plus:DI (match_operand:DI                         1 "register_operand" "0")
-		 (mult:DI (sign_extend:DI
-			    (truncate:HI (match_operand:SI 2 "register_operand" "r")))
-			  (sign_extend:DI
-			    (truncate:HI (match_operand:SI 3 "register_operand" "r"))))))]
-  "TARGET_REALLY_IWMMXT"
-  "tmiabb%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_tmiaxy")]
-)
-
-(define_insn "iwmmxt_tmiatb"
-  [(set (match_operand:DI                         0 "register_operand" "=y")
-	(plus:DI (match_operand:DI                1 "register_operand" "0")
-		 (mult:DI (sign_extend:DI
-			    (truncate:HI
-			      (ashiftrt:SI
-				(match_operand:SI 2 "register_operand" "r")
-				(const_int 16))))
-			  (sign_extend:DI
-			    (truncate:HI
-			      (match_operand:SI   3 "register_operand" "r"))))))]
-  "TARGET_REALLY_IWMMXT"
-  "tmiatb%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_tmiaxy")]
-)
-
-(define_insn "iwmmxt_tmiabt"
-  [(set (match_operand:DI                         0 "register_operand" "=y")
-	(plus:DI (match_operand:DI                1 "register_operand" "0")
-		 (mult:DI (sign_extend:DI
-			    (truncate:HI
-			      (match_operand:SI   2 "register_operand" "r")))
-			  (sign_extend:DI
-			    (truncate:HI
-			      (ashiftrt:SI
-				(match_operand:SI 3 "register_operand" "r")
-				(const_int 16)))))))]
-  "TARGET_REALLY_IWMMXT"
-  "tmiabt%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_tmiaxy")]
-)
-
-(define_insn "iwmmxt_tmiatt"
-  [(set (match_operand:DI          0 "register_operand" "=y")
-	(plus:DI (match_operand:DI 1 "register_operand" "0")
-		 (mult:DI (sign_extend:DI
-			    (truncate:HI
-			      (ashiftrt:SI
-				(match_operand:SI 2 "register_operand" "r")
-				(const_int 16))))
-			  (sign_extend:DI
-			    (truncate:HI
-			      (ashiftrt:SI
-				(match_operand:SI 3 "register_operand" "r")
-				(const_int 16)))))))]
-  "TARGET_REALLY_IWMMXT"
-  "tmiatt%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_tmiaxy")]
-)
-
-(define_insn "iwmmxt_tmovmskb"
-  [(set (match_operand:SI               0 "register_operand" "=r")
-	(unspec:SI [(match_operand:V8QI 1 "register_operand" "y")] UNSPEC_TMOVMSK))]
-  "TARGET_REALLY_IWMMXT"
-  "tmovmskb%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_tmovmsk")]
-)
-
-(define_insn "iwmmxt_tmovmskh"
-  [(set (match_operand:SI               0 "register_operand" "=r")
-	(unspec:SI [(match_operand:V4HI 1 "register_operand" "y")] UNSPEC_TMOVMSK))]
-  "TARGET_REALLY_IWMMXT"
-  "tmovmskh%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_tmovmsk")]
-)
-
-(define_insn "iwmmxt_tmovmskw"
-  [(set (match_operand:SI               0 "register_operand" "=r")
-	(unspec:SI [(match_operand:V2SI 1 "register_operand" "y")] UNSPEC_TMOVMSK))]
-  "TARGET_REALLY_IWMMXT"
-  "tmovmskw%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_tmovmsk")]
-)
-
-(define_insn "iwmmxt_waccb"
-  [(set (match_operand:DI               0 "register_operand" "=y")
-	(unspec:DI [(match_operand:V8QI 1 "register_operand" "y")] UNSPEC_WACC))]
-  "TARGET_REALLY_IWMMXT"
-  "waccb%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wacc")]
-)
-
-(define_insn "iwmmxt_wacch"
-  [(set (match_operand:DI               0 "register_operand" "=y")
-	(unspec:DI [(match_operand:V4HI 1 "register_operand" "y")] UNSPEC_WACC))]
-  "TARGET_REALLY_IWMMXT"
-  "wacch%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wacc")]
-)
-
-(define_insn "iwmmxt_waccw"
-  [(set (match_operand:DI               0 "register_operand" "=y")
-	(unspec:DI [(match_operand:V2SI 1 "register_operand" "y")] UNSPEC_WACC))]
-  "TARGET_REALLY_IWMMXT"
-  "waccw%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wacc")]
-)
-
-;; use unspec here to prevent 8 * imm to be optimized by cse
-(define_insn "iwmmxt_waligni"
-  [(set (match_operand:V8QI                                0 "register_operand" "=y")
-	(unspec:V8QI [(subreg:V8QI
-		        (ashiftrt:TI
-		          (subreg:TI (vec_concat:V16QI
-				       (match_operand:V8QI 1 "register_operand" "y")
-				       (match_operand:V8QI 2 "register_operand" "y")) 0)
-		          (mult:SI
-		            (match_operand:SI              3 "immediate_operand" "i")
-		            (const_int 8))) 0)] UNSPEC_WALIGNI))]
-  "TARGET_REALLY_IWMMXT"
-  "waligni%?\\t%0, %1, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_waligni")]
-)
-
-(define_insn "iwmmxt_walignr"
-  [(set (match_operand:V8QI                           0 "register_operand" "=y")
-	(subreg:V8QI (ashiftrt:TI
-		       (subreg:TI (vec_concat:V16QI
-				    (match_operand:V8QI 1 "register_operand" "y")
-				    (match_operand:V8QI 2 "register_operand" "y")) 0)
-		       (mult:SI
-		         (zero_extract:SI (match_operand:SI 3 "register_operand" "z") (const_int 3) (const_int 0))
-		         (const_int 8))) 0))]
-  "TARGET_REALLY_IWMMXT"
-  "walignr%U3%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_walignr")]
-)
-
-(define_insn "iwmmxt_walignr0"
-  [(set (match_operand:V8QI                           0 "register_operand" "=y")
-	(subreg:V8QI (ashiftrt:TI
-		       (subreg:TI (vec_concat:V16QI
-				    (match_operand:V8QI 1 "register_operand" "y")
-				    (match_operand:V8QI 2 "register_operand" "y")) 0)
-		       (mult:SI
-		         (zero_extract:SI (reg:SI WCGR0) (const_int 3) (const_int 0))
-		         (const_int 8))) 0))]
-  "TARGET_REALLY_IWMMXT"
-  "walignr0%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_walignr")]
-)
-
-(define_insn "iwmmxt_walignr1"
-  [(set (match_operand:V8QI                           0 "register_operand" "=y")
-	(subreg:V8QI (ashiftrt:TI
-		       (subreg:TI (vec_concat:V16QI
-				    (match_operand:V8QI 1 "register_operand" "y")
-				    (match_operand:V8QI 2 "register_operand" "y")) 0)
-		       (mult:SI
-		         (zero_extract:SI (reg:SI WCGR1) (const_int 3) (const_int 0))
-		         (const_int 8))) 0))]
-  "TARGET_REALLY_IWMMXT"
-  "walignr1%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_walignr")]
-)
-
-(define_insn "iwmmxt_walignr2"
-  [(set (match_operand:V8QI                           0 "register_operand" "=y")
-	(subreg:V8QI (ashiftrt:TI
-		       (subreg:TI (vec_concat:V16QI
-				    (match_operand:V8QI 1 "register_operand" "y")
-				    (match_operand:V8QI 2 "register_operand" "y")) 0)
-		       (mult:SI
-		         (zero_extract:SI (reg:SI WCGR2) (const_int 3) (const_int 0))
-		         (const_int 8))) 0))]
-  "TARGET_REALLY_IWMMXT"
-  "walignr2%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_walignr")]
-)
-
-(define_insn "iwmmxt_walignr3"
-  [(set (match_operand:V8QI                           0 "register_operand" "=y")
-	(subreg:V8QI (ashiftrt:TI
-		       (subreg:TI (vec_concat:V16QI
-				    (match_operand:V8QI 1 "register_operand" "y")
-				    (match_operand:V8QI 2 "register_operand" "y")) 0)
-		       (mult:SI
-		         (zero_extract:SI (reg:SI WCGR3) (const_int 3) (const_int 0))
-		         (const_int 8))) 0))]
-  "TARGET_REALLY_IWMMXT"
-  "walignr3%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_walignr")]
-)
-
-(define_insn "iwmmxt_wsadb"
-  [(set (match_operand:V2SI               0 "register_operand" "=y")
-        (unspec:V2SI [
-		      (match_operand:V2SI 1 "register_operand" "0")
-		      (match_operand:V8QI 2 "register_operand" "y")
-		      (match_operand:V8QI 3 "register_operand" "y")] UNSPEC_WSAD))]
-  "TARGET_REALLY_IWMMXT"
-  "wsadb%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wsad")]
-)
-
-(define_insn "iwmmxt_wsadh"
-  [(set (match_operand:V2SI               0 "register_operand" "=y")
-        (unspec:V2SI [
-		      (match_operand:V2SI 1 "register_operand" "0")
-		      (match_operand:V4HI 2 "register_operand" "y")
-		      (match_operand:V4HI 3 "register_operand" "y")] UNSPEC_WSAD))]
-  "TARGET_REALLY_IWMMXT"
-  "wsadh%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wsad")]
-)
-
-(define_insn "iwmmxt_wsadbz"
-  [(set (match_operand:V2SI               0 "register_operand" "=y")
-        (unspec:V2SI [(match_operand:V8QI 1 "register_operand" "y")
-		      (match_operand:V8QI 2 "register_operand" "y")] UNSPEC_WSADZ))]
-  "TARGET_REALLY_IWMMXT"
-  "wsadbz%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wsad")]
-)
-
-(define_insn "iwmmxt_wsadhz"
-  [(set (match_operand:V2SI               0 "register_operand" "=y")
-        (unspec:V2SI [(match_operand:V4HI 1 "register_operand" "y")
-		      (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WSADZ))]
-  "TARGET_REALLY_IWMMXT"
-  "wsadhz%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wsad")]
-)
-
-(include "iwmmxt2.md")
diff --git a/gcc/config/arm/iwmmxt2.md b/gcc/config/arm/iwmmxt2.md
deleted file mode 100644
index 74cd148..0000000
--- a/gcc/config/arm/iwmmxt2.md
+++ /dev/null
@@ -1,903 +0,0 @@
-;; Patterns for the Intel Wireless MMX technology architecture.
-;; Copyright (C) 2011-2025 Free Software Foundation, Inc.
-;; Written by Marvell, Inc.
-;;
-;; This file is part of GCC.
-;;
-;; GCC is free software; you can redistribute it and/or modify it
-;; under the terms of the GNU General Public License as published
-;; by the Free Software Foundation; either version 3, or (at your
-;; option) any later version.
-
-;; GCC is distributed in the hope that it will be useful, but WITHOUT
-;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-;; License for more details.
-
-;; You should have received a copy of the GNU General Public License
-;; along with GCC; see the file COPYING3.  If not see
-;; <http://www.gnu.org/licenses/>.
-
-(define_insn "iwmmxt_wabs<mode>3"
-  [(set (match_operand:VMMX               0 "register_operand" "=y")
-        (unspec:VMMX [(match_operand:VMMX 1 "register_operand"  "y")] UNSPEC_WABS))]
-  "TARGET_REALLY_IWMMXT"
-  "wabs<MMX_char>%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wabs")]
-)
-
-(define_insn "iwmmxt_wabsdiffb"
-  [(set (match_operand:V8QI                          0 "register_operand" "=y")
-	(truncate:V8QI
-	  (abs:V8HI
-	    (minus:V8HI
-	      (zero_extend:V8HI (match_operand:V8QI  1 "register_operand"  "y"))
-	      (zero_extend:V8HI (match_operand:V8QI  2 "register_operand"  "y"))))))]
- "TARGET_REALLY_IWMMXT"
- "wabsdiffb%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
-  (set_attr "type" "wmmx_wabsdiff")]
-)
-
-(define_insn "iwmmxt_wabsdiffh"
-  [(set (match_operand:V4HI                          0 "register_operand" "=y")
-        (truncate: V4HI
-          (abs:V4SI
-            (minus:V4SI
-              (zero_extend:V4SI (match_operand:V4HI  1 "register_operand"  "y"))
-	      (zero_extend:V4SI (match_operand:V4HI  2 "register_operand"  "y"))))))]
-  "TARGET_REALLY_IWMMXT"
-  "wabsdiffh%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wabsdiff")]
-)
-
-(define_insn "iwmmxt_wabsdiffw"
-  [(set (match_operand:V2SI                          0 "register_operand" "=y")
-        (truncate: V2SI
-	  (abs:V2DI
-	    (minus:V2DI
-	      (zero_extend:V2DI (match_operand:V2SI  1 "register_operand"  "y"))
-	      (zero_extend:V2DI (match_operand:V2SI  2 "register_operand"  "y"))))))]
- "TARGET_REALLY_IWMMXT"
- "wabsdiffw%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
-  (set_attr "type" "wmmx_wabsdiff")]
-)
-
-(define_insn "iwmmxt_waddsubhx"
-  [(set (match_operand:V4HI                                        0 "register_operand" "=y")
-	(vec_merge:V4HI
-	  (ss_minus:V4HI
-	    (match_operand:V4HI                                    1 "register_operand" "y")
-	    (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y")
-	                     (parallel [(const_int 1) (const_int 0) (const_int 3) (const_int 2)])))
-	  (ss_plus:V4HI
-	    (match_dup 1)
-	    (vec_select:V4HI (match_dup 2)
-	                     (parallel [(const_int 1) (const_int 0) (const_int 3) (const_int 2)])))
-	  (const_int 10)))]
-  "TARGET_REALLY_IWMMXT"
-  "waddsubhx%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_waddsubhx")]
-)
-
-(define_insn "iwmmxt_wsubaddhx"
-  [(set (match_operand:V4HI                                        0 "register_operand" "=y")
-	(vec_merge:V4HI
-	  (ss_plus:V4HI
-	    (match_operand:V4HI                                    1 "register_operand" "y")
-	    (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y")
-	                     (parallel [(const_int 1) (const_int 0) (const_int 3) (const_int 2)])))
-	  (ss_minus:V4HI
-	    (match_dup 1)
-	    (vec_select:V4HI (match_dup 2)
-	                     (parallel [(const_int 1) (const_int 0) (const_int 3) (const_int 2)])))
-	  (const_int 10)))]
-  "TARGET_REALLY_IWMMXT"
-  "wsubaddhx%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wsubaddhx")]
-)
-
-(define_insn "addc<mode>3"
-  [(set (match_operand:VMMX2      0 "register_operand" "=y")
-	(unspec:VMMX2
-          [(plus:VMMX2
-             (match_operand:VMMX2 1 "register_operand"  "y")
-	     (match_operand:VMMX2 2 "register_operand"  "y"))] UNSPEC_WADDC))]
-  "TARGET_REALLY_IWMMXT"
-  "wadd<MMX_char>c%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wadd")]
-)
-
-(define_insn "iwmmxt_avg4"
-[(set (match_operand:V8QI                                 0 "register_operand" "=y")
-      (truncate:V8QI
-        (vec_select:V8HI
-	  (vec_merge:V8HI
-	    (lshiftrt:V8HI
-	      (plus:V8HI
-	        (plus:V8HI
-		  (plus:V8HI
-	            (plus:V8HI
-		      (zero_extend:V8HI (match_operand:V8QI 1 "register_operand" "y"))
-		      (zero_extend:V8HI (match_operand:V8QI 2 "register_operand" "y")))
-		    (vec_select:V8HI (zero_extend:V8HI (match_dup 1))
-		                     (parallel [(const_int 7) (const_int 0) (const_int 1) (const_int 2)
-				                (const_int 3) (const_int 4) (const_int 5) (const_int 6)])))
-		  (vec_select:V8HI (zero_extend:V8HI (match_dup 2))
-		                   (parallel [(const_int 7) (const_int 0) (const_int 1) (const_int 2)
-				              (const_int 3) (const_int 4) (const_int 5) (const_int 6)])))
-	        (const_vector:V8HI [(const_int 1) (const_int 1) (const_int 1) (const_int 1)
-	                            (const_int 1) (const_int 1) (const_int 1) (const_int 1)]))
-	      (const_int 2))
-	    (const_vector:V8HI [(const_int 0) (const_int 0) (const_int 0) (const_int 0)
-	                        (const_int 0) (const_int 0) (const_int 0) (const_int 0)])
-	    (const_int 254))
-	  (parallel [(const_int 1) (const_int 2) (const_int 3) (const_int 4)
-	             (const_int 5) (const_int 6) (const_int 7) (const_int 0)]))))]
-  "TARGET_REALLY_IWMMXT"
-  "wavg4%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wavg4")]
-)
-
-(define_insn "iwmmxt_avg4r"
-  [(set (match_operand:V8QI                                   0 "register_operand" "=y")
-	(truncate:V8QI
-	  (vec_select:V8HI
-	    (vec_merge:V8HI
-	      (lshiftrt:V8HI
-	        (plus:V8HI
-		  (plus:V8HI
-		    (plus:V8HI
-		      (plus:V8HI
-		        (zero_extend:V8HI (match_operand:V8QI 1 "register_operand" "y"))
-		        (zero_extend:V8HI (match_operand:V8QI 2 "register_operand" "y")))
-		      (vec_select:V8HI (zero_extend:V8HI (match_dup 1))
-		                       (parallel [(const_int 7) (const_int 0) (const_int 1) (const_int 2)
-				                  (const_int 3) (const_int 4) (const_int 5) (const_int 6)])))
-		    (vec_select:V8HI (zero_extend:V8HI (match_dup 2))
-		                     (parallel [(const_int 7) (const_int 0) (const_int 1) (const_int 2)
-				                (const_int 3) (const_int 4) (const_int 5) (const_int 6)])))
-		  (const_vector:V8HI [(const_int 2) (const_int 2) (const_int 2) (const_int 2)
-		                      (const_int 2) (const_int 2) (const_int 2) (const_int 2)]))
-	        (const_int 2))
-	      (const_vector:V8HI [(const_int 0) (const_int 0) (const_int 0) (const_int 0)
-	                          (const_int 0) (const_int 0) (const_int 0) (const_int 0)])
-	      (const_int 254))
-	    (parallel [(const_int 1) (const_int 2) (const_int 3) (const_int 4)
-	               (const_int 5) (const_int 6) (const_int 7) (const_int 0)]))))]
-  "TARGET_REALLY_IWMMXT"
-  "wavg4r%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wavg4")]
-)
-
-(define_insn "iwmmxt_wmaddsx"
-  [(set (match_operand:V2SI                                        0 "register_operand" "=y")
-	(plus:V2SI
-	  (mult:V2SI
-	    (vec_select:V2SI (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
-	                     (parallel [(const_int 1) (const_int 3)]))
-	    (vec_select:V2SI (sign_extend:V4SI (match_operand:V4HI 2 "register_operand" "y"))
-	                     (parallel [(const_int 0) (const_int 2)])))
-	  (mult:V2SI
-	    (vec_select:V2SI (sign_extend:V4SI (match_dup 1))
-	                     (parallel [(const_int 0) (const_int 2)]))
-	    (vec_select:V2SI (sign_extend:V4SI (match_dup 2))
-	                     (parallel [(const_int 1) (const_int 3)])))))]
- "TARGET_REALLY_IWMMXT"
-  "wmaddsx%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-	(set_attr "type" "wmmx_wmadd")]
-)
-
-(define_insn "iwmmxt_wmaddux"
-  [(set (match_operand:V2SI                                        0 "register_operand" "=y")
-	(plus:V2SI
-	  (mult:V2SI
-	    (vec_select:V2SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
-	                     (parallel [(const_int 1) (const_int 3)]))
-	    (vec_select:V2SI (zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y"))
-	                     (parallel [(const_int 0) (const_int 2)])))
-	  (mult:V2SI
-	    (vec_select:V2SI (zero_extend:V4SI (match_dup 1))
-	                     (parallel [(const_int 0) (const_int 2)]))
-	    (vec_select:V2SI (zero_extend:V4SI (match_dup 2))
-	                     (parallel [(const_int 1) (const_int 3)])))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmaddux%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmadd")]
-)
-
-(define_insn "iwmmxt_wmaddsn"
- [(set (match_operand:V2SI                                     0 "register_operand" "=y")
-    (minus:V2SI
-      (mult:V2SI
-        (vec_select:V2SI (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
-	                 (parallel [(const_int 0) (const_int 2)]))
-        (vec_select:V2SI (sign_extend:V4SI (match_operand:V4HI 2 "register_operand" "y"))
-	                 (parallel [(const_int 0) (const_int 2)])))
-      (mult:V2SI
-        (vec_select:V2SI (sign_extend:V4SI (match_dup 1))
-	                 (parallel [(const_int 1) (const_int 3)]))
-        (vec_select:V2SI (sign_extend:V4SI (match_dup 2))
-	                 (parallel [(const_int 1) (const_int 3)])))))]
- "TARGET_REALLY_IWMMXT"
- "wmaddsn%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
-  (set_attr "type" "wmmx_wmadd")]
-)
-
-(define_insn "iwmmxt_wmaddun"
-  [(set (match_operand:V2SI                                        0 "register_operand" "=y")
-	(minus:V2SI
-	  (mult:V2SI
-	    (vec_select:V2SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
-	                     (parallel [(const_int 0) (const_int 2)]))
-	    (vec_select:V2SI (zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y"))
-	                     (parallel [(const_int 0) (const_int 2)])))
-	  (mult:V2SI
-	    (vec_select:V2SI (zero_extend:V4SI (match_dup 1))
-	                     (parallel [(const_int 1) (const_int 3)]))
-	    (vec_select:V2SI (zero_extend:V4SI (match_dup 2))
-	                     (parallel [(const_int 1) (const_int 3)])))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmaddun%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmadd")]
-)
-
-(define_insn "iwmmxt_wmulwsm"
-  [(set (match_operand:V2SI                         0 "register_operand" "=y")
-	(truncate:V2SI
-	  (ashiftrt:V2DI
-	    (mult:V2DI
-	      (sign_extend:V2DI (match_operand:V2SI 1 "register_operand" "y"))
-	      (sign_extend:V2DI (match_operand:V2SI 2 "register_operand" "y")))
-	    (const_int 32))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmulwsm%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmulw")]
-)
-
-(define_insn "iwmmxt_wmulwum"
-  [(set (match_operand:V2SI                         0 "register_operand" "=y")
-	(truncate:V2SI
-          (lshiftrt:V2DI
-	    (mult:V2DI
-	      (zero_extend:V2DI (match_operand:V2SI 1 "register_operand" "y"))
-	      (zero_extend:V2DI (match_operand:V2SI 2 "register_operand" "y")))
-	    (const_int 32))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmulwum%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmulw")]
-)
-
-(define_insn "iwmmxt_wmulsmr"
-  [(set (match_operand:V4HI                           0 "register_operand" "=y")
-	(truncate:V4HI
-	  (ashiftrt:V4SI
-	    (plus:V4SI
-	      (mult:V4SI
-	        (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
-		(sign_extend:V4SI (match_operand:V4HI 2 "register_operand" "y")))
-	      (const_vector:V4SI [(const_int 32768)
-	                          (const_int 32768)
-				  (const_int 32768)]))
-	    (const_int 16))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmulsmr%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmul")]
-)
-
-(define_insn "iwmmxt_wmulumr"
-  [(set (match_operand:V4HI                           0 "register_operand" "=y")
-	(truncate:V4HI
-	  (lshiftrt:V4SI
-	    (plus:V4SI
-	      (mult:V4SI
-	        (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
-		(zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y")))
-	      (const_vector:V4SI [(const_int 32768)
-				  (const_int 32768)
-				  (const_int 32768)
-				  (const_int 32768)]))
-	  (const_int 16))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmulumr%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmul")]
-)
-
-(define_insn "iwmmxt_wmulwsmr"
-  [(set (match_operand:V2SI                           0 "register_operand" "=y")
-	(truncate:V2SI
-	  (ashiftrt:V2DI
-	    (plus:V2DI
-	      (mult:V2DI
-	        (sign_extend:V2DI (match_operand:V2SI 1 "register_operand" "y"))
-		(sign_extend:V2DI (match_operand:V2SI 2 "register_operand" "y")))
-	      (const_vector:V2DI [(const_int 2147483648)
-				  (const_int 2147483648)]))
-	    (const_int 32))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmulwsmr%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmul")]
-)
-
-(define_insn "iwmmxt_wmulwumr"
-  [(set (match_operand:V2SI                           0 "register_operand" "=y")
-	(truncate:V2SI
-	  (lshiftrt:V2DI
-	    (plus:V2DI
-	      (mult:V2DI
-	        (zero_extend:V2DI (match_operand:V2SI 1 "register_operand" "y"))
-		(zero_extend:V2DI (match_operand:V2SI 2 "register_operand" "y")))
-	      (const_vector:V2DI [(const_int 2147483648)
-			          (const_int 2147483648)]))
-	    (const_int 32))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmulwumr%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmulw")]
-)
-
-(define_insn "iwmmxt_wmulwl"
-  [(set (match_operand:V2SI   0 "register_operand" "=y")
-        (mult:V2SI
-          (match_operand:V2SI 1 "register_operand" "y")
-	  (match_operand:V2SI 2 "register_operand" "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wmulwl%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmulw")]
-)
-
-(define_insn "iwmmxt_wqmulm"
-  [(set (match_operand:V4HI            0 "register_operand" "=y")
-        (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "y")
-		      (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WQMULM))]
-  "TARGET_REALLY_IWMMXT"
-  "wqmulm%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wqmulm")]
-)
-
-(define_insn "iwmmxt_wqmulwm"
-  [(set (match_operand:V2SI               0 "register_operand" "=y")
-	(unspec:V2SI [(match_operand:V2SI 1 "register_operand" "y")
-		      (match_operand:V2SI 2 "register_operand" "y")] UNSPEC_WQMULWM))]
-  "TARGET_REALLY_IWMMXT"
-  "wqmulwm%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wqmulwm")]
-)
-
-(define_insn "iwmmxt_wqmulmr"
-  [(set (match_operand:V4HI               0 "register_operand" "=y")
-	(unspec:V4HI [(match_operand:V4HI 1 "register_operand" "y")
-		      (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WQMULMR))]
-  "TARGET_REALLY_IWMMXT"
-  "wqmulmr%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wqmulm")]
-)
-
-(define_insn "iwmmxt_wqmulwmr"
-  [(set (match_operand:V2SI            0 "register_operand" "=y")
-        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "y")
-		      (match_operand:V2SI 2 "register_operand" "y")] UNSPEC_WQMULWMR))]
-  "TARGET_REALLY_IWMMXT"
-  "wqmulwmr%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wqmulwm")]
-)
-
-(define_insn "iwmmxt_waddbhusm"
-  [(set (match_operand:V8QI                          0 "register_operand" "=y")
-	(vec_concat:V8QI
-	  (const_vector:V4QI [(const_int 0) (const_int 0) (const_int 0) (const_int 0)])
-	  (us_truncate:V4QI
-	    (ss_plus:V4HI
-	      (match_operand:V4HI                    1 "register_operand" "y")
-	      (zero_extend:V4HI
-	        (vec_select:V4QI (match_operand:V8QI 2 "register_operand" "y")
-	                         (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7)])))))))]
-  "TARGET_REALLY_IWMMXT"
-  "waddbhusm%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_waddbhus")]
-)
-
-(define_insn "iwmmxt_waddbhusl"
-  [(set (match_operand:V8QI                          0 "register_operand" "=y")
-	(vec_concat:V8QI
-	  (us_truncate:V4QI
-	    (ss_plus:V4HI
-	      (match_operand:V4HI                    1 "register_operand" "y")
-	      (zero_extend:V4HI
-		(vec_select:V4QI (match_operand:V8QI 2 "register_operand" "y")
-		                 (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)])))))
-	  (const_vector:V4QI [(const_int 0) (const_int 0) (const_int 0) (const_int 0)])))]
-  "TARGET_REALLY_IWMMXT"
-  "waddbhusl%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_waddbhus")]
-)
-
-(define_insn "iwmmxt_wqmiabb"
-  [(set (match_operand:V2SI	                             0 "register_operand" "=y")
-	(unspec:V2SI [(match_operand:V2SI                    1 "register_operand" "0")
-		      (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 0))
-		      (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 32))
-		      (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 0))
-		      (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 32))] UNSPEC_WQMIAxy))]
-  "TARGET_REALLY_IWMMXT"
-  "wqmiabb%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wqmiaxy")]
-)
-
-(define_insn "iwmmxt_wqmiabt"
-  [(set (match_operand:V2SI	                             0 "register_operand" "=y")
-	(unspec:V2SI [(match_operand:V2SI                    1 "register_operand" "0")
-	              (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 0))
-		      (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 32))
-		      (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 16))
-		      (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 48))] UNSPEC_WQMIAxy))]
-  "TARGET_REALLY_IWMMXT"
-  "wqmiabt%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wqmiaxy")]
-)
-
-(define_insn "iwmmxt_wqmiatb"
-  [(set (match_operand:V2SI                                  0 "register_operand" "=y")
-        (unspec:V2SI [(match_operand:V2SI                    1 "register_operand" "0")
-	              (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 16))
-	              (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 48))
-	              (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 0))
-	              (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 32))] UNSPEC_WQMIAxy))]
-  "TARGET_REALLY_IWMMXT"
-  "wqmiatb%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wqmiaxy")]
-)
-
-(define_insn "iwmmxt_wqmiatt"
-  [(set (match_operand:V2SI                                  0 "register_operand" "=y")
-        (unspec:V2SI [(match_operand:V2SI                    1 "register_operand" "0")
-	              (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 16))
-	              (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 48))
-	              (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 16))
-	              (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 48))] UNSPEC_WQMIAxy))]
-  "TARGET_REALLY_IWMMXT"
-  "wqmiatt%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wqmiaxy")]
-)
-
-(define_insn "iwmmxt_wqmiabbn"
-  [(set (match_operand:V2SI                                  0 "register_operand" "=y")
-        (unspec:V2SI [(match_operand:V2SI                    1 "register_operand" "0")
-                      (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 0))
-	              (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 32))
-	              (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 0))
-	              (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 32))] UNSPEC_WQMIAxyn))]
-  "TARGET_REALLY_IWMMXT"
-  "wqmiabbn%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wqmiaxy")]
-)
-
-(define_insn "iwmmxt_wqmiabtn"
-  [(set (match_operand:V2SI                                  0 "register_operand" "=y")
-        (unspec:V2SI [(match_operand:V2SI                    1 "register_operand" "0")
-                      (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 0))
-	              (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 32))
-	              (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 16))
-	              (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 48))] UNSPEC_WQMIAxyn))]
-  "TARGET_REALLY_IWMMXT"
-  "wqmiabtn%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wqmiaxy")]
-)
-
-(define_insn "iwmmxt_wqmiatbn"
-  [(set (match_operand:V2SI                                  0 "register_operand" "=y")
-        (unspec:V2SI [(match_operand:V2SI                    1 "register_operand" "0")
-                      (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 16))
-	              (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 48))
-	              (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 0))
-	              (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 32))] UNSPEC_WQMIAxyn))]
-  "TARGET_REALLY_IWMMXT"
-  "wqmiatbn%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wqmiaxy")]
-)
-
-(define_insn "iwmmxt_wqmiattn"
- [(set (match_operand:V2SI                                  0 "register_operand" "=y")
-       (unspec:V2SI [(match_operand:V2SI                    1 "register_operand" "0")
-                     (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 16))
-	             (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 48))
-	             (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 16))
-	             (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 48))] UNSPEC_WQMIAxyn))]
-  "TARGET_REALLY_IWMMXT"
-  "wqmiattn%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wqmiaxy")]
-)
-
-(define_insn "iwmmxt_wmiabb"
-  [(set	(match_operand:DI	                          0 "register_operand" "=y")
-	(plus:DI (match_operand:DI	                  1 "register_operand" "0")
-		 (plus:DI
-		   (mult:DI
-		     (sign_extend:DI
-		       (vec_select:HI (match_operand:V4HI 2 "register_operand" "y")
-				      (parallel [(const_int 0)])))
-		     (sign_extend:DI
-		       (vec_select:HI (match_operand:V4HI 3 "register_operand" "y")
-				      (parallel [(const_int 0)]))))
-		   (mult:DI
-		     (sign_extend:DI
-		       (vec_select:HI (match_dup 2)
-			              (parallel [(const_int 2)])))
-		     (sign_extend:DI
-		       (vec_select:HI (match_dup 3)
-				      (parallel [(const_int 2)])))))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmiabb%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmiaxy")]
-)
-
-(define_insn "iwmmxt_wmiabt"
-  [(set	(match_operand:DI	                          0 "register_operand" "=y")
-	(plus:DI (match_operand:DI	                  1 "register_operand" "0")
-		 (plus:DI
-		   (mult:DI
-		     (sign_extend:DI
-		       (vec_select:HI (match_operand:V4HI 2 "register_operand" "y")
-				      (parallel [(const_int 0)])))
-		     (sign_extend:DI
-		       (vec_select:HI (match_operand:V4HI 3 "register_operand" "y")
-				      (parallel [(const_int 1)]))))
-		   (mult:DI
-		     (sign_extend:DI
-		       (vec_select:HI (match_dup 2)
-				      (parallel [(const_int 2)])))
-		     (sign_extend:DI
-		       (vec_select:HI (match_dup 3)
-				      (parallel [(const_int 3)])))))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmiabt%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmiaxy")]
-)
-
-(define_insn "iwmmxt_wmiatb"
-  [(set	(match_operand:DI	                          0 "register_operand" "=y")
-	(plus:DI (match_operand:DI	                  1 "register_operand" "0")
-		 (plus:DI
-		   (mult:DI
-		     (sign_extend:DI
-		       (vec_select:HI (match_operand:V4HI 2 "register_operand" "y")
-				      (parallel [(const_int 1)])))
-		     (sign_extend:DI
-		       (vec_select:HI (match_operand:V4HI 3 "register_operand" "y")
-				      (parallel [(const_int 0)]))))
-		   (mult:DI
-		     (sign_extend:DI
-		       (vec_select:HI (match_dup 2)
-				      (parallel [(const_int 3)])))
-		     (sign_extend:DI
-		       (vec_select:HI (match_dup 3)
-				      (parallel [(const_int 2)])))))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmiatb%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmiaxy")]
-)
-
-(define_insn "iwmmxt_wmiatt"
-  [(set	(match_operand:DI	                   0 "register_operand" "=y")
-        (plus:DI (match_operand:DI	           1 "register_operand" "0")
-          (plus:DI
-            (mult:DI
-              (sign_extend:DI
-                (vec_select:HI (match_operand:V4HI 2 "register_operand" "y")
-	                       (parallel [(const_int 1)])))
-	      (sign_extend:DI
-	        (vec_select:HI (match_operand:V4HI 3 "register_operand" "y")
-	                       (parallel [(const_int 1)]))))
-            (mult:DI
-	      (sign_extend:DI
-                (vec_select:HI (match_dup 2)
-	                       (parallel [(const_int 3)])))
-              (sign_extend:DI
-                (vec_select:HI (match_dup 3)
-	                       (parallel [(const_int 3)])))))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmiatt%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmiaxy")]
-)
-
-(define_insn "iwmmxt_wmiabbn"
-  [(set	(match_operand:DI	                           0 "register_operand" "=y")
-	(minus:DI (match_operand:DI	                   1 "register_operand" "0")
-		  (plus:DI
-		    (mult:DI
-		      (sign_extend:DI
-			(vec_select:HI (match_operand:V4HI 2 "register_operand" "y")
-				       (parallel [(const_int 0)])))
-		      (sign_extend:DI
-		        (vec_select:HI (match_operand:V4HI 3 "register_operand" "y")
-				       (parallel [(const_int 0)]))))
-		    (mult:DI
-		      (sign_extend:DI
-			(vec_select:HI (match_dup 2)
-				       (parallel [(const_int 2)])))
-		      (sign_extend:DI
-		        (vec_select:HI (match_dup 3)
-				       (parallel [(const_int 2)])))))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmiabbn%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmiaxy")]
-)
-
-(define_insn "iwmmxt_wmiabtn"
-  [(set	(match_operand:DI	                           0 "register_operand" "=y")
-	(minus:DI (match_operand:DI	                   1 "register_operand" "0")
-		  (plus:DI
-		    (mult:DI
-		      (sign_extend:DI
-			(vec_select:HI (match_operand:V4HI 2 "register_operand" "y")
-				       (parallel [(const_int 0)])))
-		      (sign_extend:DI
-		        (vec_select:HI (match_operand:V4HI 3 "register_operand" "y")
-				       (parallel [(const_int 1)]))))
-		    (mult:DI
-		      (sign_extend:DI
-		        (vec_select:HI (match_dup 2)
-				       (parallel [(const_int 2)])))
-		      (sign_extend:DI
-			(vec_select:HI (match_dup 3)
-				       (parallel [(const_int 3)])))))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmiabtn%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmiaxy")]
-)
-
-(define_insn "iwmmxt_wmiatbn"
-  [(set (match_operand:DI	                           0 "register_operand" "=y")
-	(minus:DI (match_operand:DI	                   1 "register_operand" "0")
-		  (plus:DI
-		    (mult:DI
-		      (sign_extend:DI
-			(vec_select:HI (match_operand:V4HI 2 "register_operand" "y")
-				       (parallel [(const_int 1)])))
-		      (sign_extend:DI
-		        (vec_select:HI (match_operand:V4HI 3 "register_operand" "y")
-				       (parallel [(const_int 0)]))))
-		    (mult:DI
-		      (sign_extend:DI
-		        (vec_select:HI (match_dup 2)
-				       (parallel [(const_int 3)])))
-		      (sign_extend:DI
-			(vec_select:HI (match_dup 3)
-				       (parallel [(const_int 2)])))))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmiatbn%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmiaxy")]
-)
-
-(define_insn "iwmmxt_wmiattn"
-  [(set (match_operand:DI	                           0 "register_operand" "=y")
-	(minus:DI (match_operand:DI	                   1 "register_operand" "0")
-		  (plus:DI
-		    (mult:DI
-		      (sign_extend:DI
-			(vec_select:HI (match_operand:V4HI 2 "register_operand" "y")
-				       (parallel [(const_int 1)])))
-		      (sign_extend:DI
-			(vec_select:HI (match_operand:V4HI 3 "register_operand" "y")
-				       (parallel [(const_int 1)]))))
-		    (mult:DI
-		      (sign_extend:DI
-			(vec_select:HI (match_dup 2)
-				       (parallel [(const_int 3)])))
-		      (sign_extend:DI
-			(vec_select:HI (match_dup 3)
-				       (parallel [(const_int 3)])))))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmiattn%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmiaxy")]
-)
-
-(define_insn "iwmmxt_wmiawbb"
-  [(set (match_operand:DI	0 "register_operand" "=y")
-	(plus:DI
-	  (match_operand:DI      1 "register_operand" "0")
-	  (mult:DI
-	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 0)])))
-	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 0)]))))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmiawbb%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmiawxy")]
-)
-
-(define_insn "iwmmxt_wmiawbt"
-  [(set (match_operand:DI	                               0 "register_operand" "=y")
-	(plus:DI
-	  (match_operand:DI                                    1 "register_operand" "0")
-	  (mult:DI
-	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 0)])))
-	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 1)]))))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmiawbt%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmiawxy")]
-)
-
-(define_insn "iwmmxt_wmiawtb"
-  [(set (match_operand:DI	                               0 "register_operand" "=y")
-	(plus:DI
-	  (match_operand:DI                                    1 "register_operand" "0")
-	  (mult:DI
-	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 1)])))
-	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 0)]))))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmiawtb%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmiawxy")]
-)
-
-(define_insn "iwmmxt_wmiawtt"
-[(set (match_operand:DI	                                     0 "register_operand" "=y")
-      (plus:DI
-	(match_operand:DI                                    1 "register_operand" "0")
-	(mult:DI
-	  (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 1)])))
-	  (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 1)]))))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmiawtt%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmiawxy")]
-)
-
-(define_insn "iwmmxt_wmiawbbn"
-  [(set (match_operand:DI	                               0 "register_operand" "=y")
-	(minus:DI
-	  (match_operand:DI                                    1 "register_operand" "0")
-	  (mult:DI
-	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 0)])))
-	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 0)]))))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmiawbbn%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmiawxy")]
-)
-
-(define_insn "iwmmxt_wmiawbtn"
-  [(set (match_operand:DI	                               0 "register_operand" "=y")
-	(minus:DI
-	  (match_operand:DI                                    1 "register_operand" "0")
-	  (mult:DI
-	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 0)])))
-	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 1)]))))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmiawbtn%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmiawxy")]
-)
-
-(define_insn "iwmmxt_wmiawtbn"
-  [(set (match_operand:DI	                               0 "register_operand" "=y")
-	(minus:DI
-	  (match_operand:DI                                    1 "register_operand" "0")
-	  (mult:DI
-	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 1)])))
-	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 0)]))))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmiawtbn%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmiawxy")]
-)
-
-(define_insn "iwmmxt_wmiawttn"
-  [(set (match_operand:DI	                               0 "register_operand" "=y")
-	(minus:DI
-	  (match_operand:DI                                    1 "register_operand" "0")
-	  (mult:DI
-	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 1)])))
-	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 1)]))))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmiawttn%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmiawxy")]
-)
-
-(define_insn "iwmmxt_wmerge"
-  [(set (match_operand:DI         0 "register_operand" "=y")
-	(ior:DI
-	  (ashift:DI
-	    (match_operand:DI     2 "register_operand" "y")
-	    (minus:SI
-	      (const_int 64)
-	      (mult:SI
-	        (match_operand:SI 3 "immediate_operand" "i")
-		(const_int 8))))
-	  (lshiftrt:DI
-	    (ashift:DI
-	      (match_operand:DI   1 "register_operand" "y")
-	      (mult:SI
-	        (match_dup 3)
-		(const_int 8)))
-	    (mult:SI
-	      (match_dup 3)
-	      (const_int 8)))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmerge%?\\t%0, %1, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmerge")]
-)
-
-(define_insn "iwmmxt_tandc<mode>3"
-  [(set (reg:CC CC_REGNUM)
-	(subreg:CC (unspec:VMMX [(const_int 0)] UNSPEC_TANDC) 0))
-   (unspec:CC [(reg:SI 15)] UNSPEC_TANDC)]
-  "TARGET_REALLY_IWMMXT"
-  "tandc<MMX_char>%?\\t r15"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_tandc")]
-)
-
-(define_insn "iwmmxt_torc<mode>3"
-  [(set (reg:CC CC_REGNUM)
-	(subreg:CC (unspec:VMMX [(const_int 0)] UNSPEC_TORC) 0))
-   (unspec:CC [(reg:SI 15)] UNSPEC_TORC)]
-  "TARGET_REALLY_IWMMXT"
-  "torc<MMX_char>%?\\t r15"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_torc")]
-)
-
-(define_insn "iwmmxt_torvsc<mode>3"
-  [(set (reg:CC CC_REGNUM)
-	(subreg:CC (unspec:VMMX [(const_int 0)] UNSPEC_TORVSC) 0))
-   (unspec:CC [(reg:SI 15)] UNSPEC_TORVSC)]
-  "TARGET_REALLY_IWMMXT"
-  "torvsc<MMX_char>%?\\t r15"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_torvsc")]
-)
-
-(define_insn "iwmmxt_textrc<mode>3"
-  [(set (reg:CC CC_REGNUM)
-	(subreg:CC (unspec:VMMX [(const_int 0)
-		                 (match_operand:SI 0 "immediate_operand" "i")] UNSPEC_TEXTRC) 0))
-   (unspec:CC [(reg:SI 15)] UNSPEC_TEXTRC)]
-  "TARGET_REALLY_IWMMXT"
-  "textrc<MMX_char>%?\\t r15, %0"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_textrc")]
-)
diff --git a/gcc/config/arm/marvell-f-iwmmxt.md b/gcc/config/arm/marvell-f-iwmmxt.md
deleted file mode 100644
index c9c7b00..0000000
--- a/gcc/config/arm/marvell-f-iwmmxt.md
+++ /dev/null
@@ -1,189 +0,0 @@
-;; Marvell WMMX2 pipeline description
-;; Copyright (C) 2011-2025 Free Software Foundation, Inc.
-;; Written by Marvell, Inc.
-
-;; This file is part of GCC.
-
-;; GCC is free software; you can redistribute it and/or modify it
-;; under the terms of the GNU General Public License as published
-;; by the Free Software Foundation; either version 3, or (at your
-;; option) any later version.
-
-;; GCC is distributed in the hope that it will be useful, but WITHOUT
-;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-;; License for more details.
-
-;; You should have received a copy of the GNU General Public License
-;; along with GCC; see the file COPYING3.  If not see
-;; <http://www.gnu.org/licenses/>.
-
-
-(define_automaton "marvell_f_iwmmxt")
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; Pipelines
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; This is a 7-stage pipelines:
-;;
-;;    MD | MI | ME1 | ME2 | ME3 | ME4 | MW
-;;
-;; There are various bypasses modelled to a greater or lesser extent.
-;;
-;; Latencies in this file correspond to the number of cycles after
-;; the issue stage that it takes for the result of the instruction to
-;; be computed, or for its side-effects to occur.
-
-(define_cpu_unit "mf_iwmmxt_MD" "marvell_f_iwmmxt")
-(define_cpu_unit "mf_iwmmxt_MI" "marvell_f_iwmmxt")
-(define_cpu_unit "mf_iwmmxt_ME1" "marvell_f_iwmmxt")
-(define_cpu_unit "mf_iwmmxt_ME2" "marvell_f_iwmmxt")
-(define_cpu_unit "mf_iwmmxt_ME3" "marvell_f_iwmmxt")
-(define_cpu_unit "mf_iwmmxt_ME4" "marvell_f_iwmmxt")
-(define_cpu_unit "mf_iwmmxt_MW" "marvell_f_iwmmxt")
-
-(define_reservation "mf_iwmmxt_ME"
-      "mf_iwmmxt_ME1,mf_iwmmxt_ME2,mf_iwmmxt_ME3,mf_iwmmxt_ME4"
-)
-
-(define_reservation "mf_iwmmxt_pipeline"
-      "mf_iwmmxt_MD, mf_iwmmxt_MI, mf_iwmmxt_ME, mf_iwmmxt_MW"
-)
-
-;; An attribute to indicate whether our reservations are applicable.
-(define_attr "marvell_f_iwmmxt" "yes,no"
-  (const (if_then_else (symbol_ref "arm_arch_iwmmxt")
-                       (const_string "yes") (const_string "no"))))
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; instruction classes
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; An attribute appended to instructions for classification
-
-(define_attr "wmmxt_shift" "yes,no"
-  (if_then_else (eq_attr "type" "wmmx_wror, wmmx_wsll, wmmx_wsra, wmmx_wsrl")
-		(const_string "yes") (const_string "no"))
-)
-
-(define_attr "wmmxt_pack" "yes,no"
-  (if_then_else (eq_attr "type" "wmmx_waligni, wmmx_walignr, wmmx_wmerge,\
-                                 wmmx_wpack, wmmx_wshufh, wmmx_wunpckeh,\
-                                 wmmx_wunpckih, wmmx_wunpckel, wmmx_wunpckil")
-		(const_string "yes") (const_string "no"))
-)
-
-(define_attr "wmmxt_mult_c1" "yes,no"
-  (if_then_else (eq_attr "type" "wmmx_wmac, wmmx_wmadd, wmmx_wmiaxy,\
-                                 wmmx_wmiawxy, wmmx_wmulw, wmmx_wqmiaxy,\
-                                 wmmx_wqmulwm")
-		(const_string "yes") (const_string "no"))
-)
-
-(define_attr "wmmxt_mult_c2" "yes,no"
-  (if_then_else (eq_attr "type" "wmmx_wmul, wmmx_wqmulm")
-		(const_string "yes") (const_string "no"))
-)
-
-(define_attr "wmmxt_alu_c1" "yes,no"
-  (if_then_else (eq_attr "type" "wmmx_wabs, wmmx_wabsdiff, wmmx_wand,\
-                                 wmmx_wandn, wmmx_wmov, wmmx_wor, wmmx_wxor")
-	        (const_string "yes") (const_string "no"))
-)
-
-(define_attr "wmmxt_alu_c2" "yes,no"
-  (if_then_else (eq_attr "type" "wmmx_wacc, wmmx_wadd, wmmx_waddsubhx,\
-                                 wmmx_wavg2, wmmx_wavg4, wmmx_wcmpeq,\
-                                 wmmx_wcmpgt, wmmx_wmax, wmmx_wmin,\
-                                 wmmx_wsub, wmmx_waddbhus, wmmx_wsubaddhx")
-		(const_string "yes") (const_string "no"))
-)
-
-(define_attr "wmmxt_alu_c3" "yes,no"
-  (if_then_else (eq_attr "type" "wmmx_wsad")
-	        (const_string "yes") (const_string "no"))
-)
-
-(define_attr "wmmxt_transfer_c1" "yes,no"
-  (if_then_else (eq_attr "type" "wmmx_tbcst, wmmx_tinsr,\
-                                 wmmx_tmcr, wmmx_tmcrr")
-                (const_string "yes") (const_string "no"))
-)
-
-(define_attr "wmmxt_transfer_c2" "yes,no"
-  (if_then_else (eq_attr "type" "wmmx_textrm, wmmx_tmovmsk,\
-                                 wmmx_tmrc, wmmx_tmrrc")
-	        (const_string "yes") (const_string "no"))
-)
-
-(define_attr "wmmxt_transfer_c3" "yes,no"
-  (if_then_else (eq_attr "type" "wmmx_tmia, wmmx_tmiaph, wmmx_tmiaxy")
-	        (const_string "yes") (const_string "no"))
-)
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; Main description
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(define_insn_reservation "marvell_f_iwmmxt_alu_c1" 1
-  (and (eq_attr "marvell_f_iwmmxt" "yes")
-       (eq_attr "wmmxt_alu_c1" "yes"))
-  "mf_iwmmxt_pipeline")
-
-(define_insn_reservation "marvell_f_iwmmxt_pack" 1
-  (and (eq_attr "marvell_f_iwmmxt" "yes")
-       (eq_attr "wmmxt_pack" "yes"))
-  "mf_iwmmxt_pipeline")
-
-(define_insn_reservation "marvell_f_iwmmxt_shift" 1
-  (and (eq_attr "marvell_f_iwmmxt" "yes")
-       (eq_attr "wmmxt_shift" "yes"))
-  "mf_iwmmxt_pipeline")
-
-(define_insn_reservation "marvell_f_iwmmxt_transfer_c1" 1
-  (and (eq_attr "marvell_f_iwmmxt" "yes")
-       (eq_attr "wmmxt_transfer_c1" "yes"))
-  "mf_iwmmxt_pipeline")
-
-(define_insn_reservation "marvell_f_iwmmxt_transfer_c2" 5
-  (and (eq_attr "marvell_f_iwmmxt" "yes")
-       (eq_attr "wmmxt_transfer_c2" "yes"))
-  "mf_iwmmxt_pipeline")
-
-(define_insn_reservation "marvell_f_iwmmxt_alu_c2" 2
-  (and (eq_attr "marvell_f_iwmmxt" "yes")
-       (eq_attr "wmmxt_alu_c2" "yes"))
-  "mf_iwmmxt_pipeline")
-
-(define_insn_reservation "marvell_f_iwmmxt_alu_c3" 3
-  (and (eq_attr "marvell_f_iwmmxt" "yes")
-       (eq_attr "wmmxt_alu_c3" "yes"))
-  "mf_iwmmxt_pipeline")
-
-(define_insn_reservation "marvell_f_iwmmxt_transfer_c3" 4
-  (and (eq_attr "marvell_f_iwmmxt" "yes")
-       (eq_attr "wmmxt_transfer_c3" "yes"))
-  "mf_iwmmxt_pipeline")
-
-(define_insn_reservation "marvell_f_iwmmxt_mult_c1" 4
-  (and (eq_attr "marvell_f_iwmmxt" "yes")
-       (eq_attr "wmmxt_mult_c1" "yes"))
-  "mf_iwmmxt_pipeline")
-
-;There is a forwarding path from ME3 stage
-(define_insn_reservation "marvell_f_iwmmxt_mult_c2" 3
-  (and (eq_attr "marvell_f_iwmmxt" "yes")
-       (eq_attr "wmmxt_mult_c2" "yes"))
-  "mf_iwmmxt_pipeline")
-
-(define_insn_reservation "marvell_f_iwmmxt_wstr" 0
-  (and (eq_attr "marvell_f_iwmmxt" "yes")
-       (eq_attr "type" "wmmx_wstr"))
-  "mf_iwmmxt_pipeline")
-
-;There is a forwarding path from MW stage
-(define_insn_reservation "marvell_f_iwmmxt_wldr" 5
-  (and (eq_attr "marvell_f_iwmmxt" "yes")
-       (eq_attr "type" "wmmx_wldr"))
-  "mf_iwmmxt_pipeline")
diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
index 75c06d9..c683ec2 100644
--- a/gcc/config/arm/predicates.md
+++ b/gcc/config/arm/predicates.md
@@ -806,14 +806,8 @@
 
 ;;-------------------------------------------------------------------------
 ;;
-;; iWMMXt predicates
-;;
-
-(define_predicate "imm_or_reg_operand"
-  (ior (match_operand 0 "immediate_operand")
-       (match_operand 0 "register_operand")))
-
 ;; Neon predicates
+;;
 
 (define_predicate "const_multiple_of_8_operand"
   (match_code "const_int")
@@ -907,7 +901,8 @@
 
 (define_predicate "mem_noofs_operand"
   (and (match_code "mem")
-       (match_code "reg" "0")))
+       (match_code "reg" "0")
+       (match_operand 0 "memory_operand")))
 
 (define_predicate "call_insn_operand"
   (ior (and (match_code "symbol_ref")
diff --git a/gcc/config/arm/t-arm b/gcc/config/arm/t-arm
index 641f8f5..670f574 100644
--- a/gcc/config/arm/t-arm
+++ b/gcc/config/arm/t-arm
@@ -50,11 +50,8 @@ MD_INCLUDES=	$(srcdir)/config/arm/arm1020e.md \
 		$(srcdir)/config/arm/fa726te.md \
 		$(srcdir)/config/arm/fmp626.md \
 		$(srcdir)/config/arm/iterators.md \
-		$(srcdir)/config/arm/iwmmxt.md \
-		$(srcdir)/config/arm/iwmmxt2.md \
 		$(srcdir)/config/arm/ldmstm.md \
 		$(srcdir)/config/arm/ldrdstrd.md \
-		$(srcdir)/config/arm/marvell-f-iwmmxt.md \
 		$(srcdir)/config/arm/mve.md \
 		$(srcdir)/config/arm/neon.md \
 		$(srcdir)/config/arm/predicates.md \
diff --git a/gcc/config/arm/thumb1.md b/gcc/config/arm/thumb1.md
index f9e89e9..4da0086 100644
--- a/gcc/config/arm/thumb1.md
+++ b/gcc/config/arm/thumb1.md
@@ -1874,10 +1874,10 @@
 )
 
 (define_insn "*nonsecure_call_reg_thumb1_v5"
-  [(call (unspec:SI [(mem:SI (reg:SI R4_REGNUM))]
-		    UNSPEC_NONSECURE_MEM)
+  [(call (mem:SI (reg:SI R4_REGNUM))
 	 (match_operand 0 "" ""))
    (use (match_operand 1 "" ""))
+   (unspec:SI [(match_operand 2)]UNSPEC_NONSECURE_MEM)
    (clobber (reg:SI LR_REGNUM))]
   "TARGET_THUMB1 && use_cmse && !SIBLING_CALL_P (insn)"
   "bl\\t__gnu_cmse_nonsecure_call"
@@ -1919,11 +1919,10 @@
 
 (define_insn "*nonsecure_call_value_reg_thumb1_v5"
   [(set (match_operand 0 "" "")
-	(call (unspec:SI
-	       [(mem:SI (reg:SI R4_REGNUM))]
-	       UNSPEC_NONSECURE_MEM)
+	(call (mem:SI (reg:SI R4_REGNUM))
 	      (match_operand 1 "" "")))
    (use (match_operand 2 "" ""))
+   (unspec:SI [(match_operand 3)] UNSPEC_NONSECURE_MEM)
    (clobber (reg:SI LR_REGNUM))]
   "TARGET_THUMB1 && use_cmse"
   "bl\\t__gnu_cmse_nonsecure_call"
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index 172c974..2c2026b 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -235,7 +235,7 @@
 (define_insn "*thumb2_movsi_insn"
   [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r,lk*r,m")
 	(match_operand:SI 1 "general_operand"	   "rk,I,Py,K,j,mi,lk*r"))]
-  "TARGET_THUMB2 && !TARGET_IWMMXT && !TARGET_HARD_FLOAT
+  "TARGET_THUMB2 && !TARGET_HARD_FLOAT
    && (   register_operand (operands[0], SImode)
        || register_operand (operands[1], SImode))"
 {
@@ -537,10 +537,10 @@
 )
 
 (define_insn "*nonsecure_call_reg_thumb2_fpcxt"
-  [(call (unspec:SI [(mem:SI (match_operand:SI 0 "s_register_operand" "l*r"))]
-		    UNSPEC_NONSECURE_MEM)
+  [(call (mem:SI (match_operand:SI 0 "s_register_operand" "l*r"))
 	 (match_operand 1 "" ""))
    (use (match_operand 2 "" ""))
+   (unspec:SI [(match_operand 3)] UNSPEC_NONSECURE_MEM)
    (clobber (reg:SI LR_REGNUM))]
   "TARGET_THUMB2 && use_cmse && TARGET_HAVE_FPCXT_CMSE"
   "blxns\\t%0"
@@ -549,10 +549,10 @@
 )
 
 (define_insn "*nonsecure_call_reg_thumb2"
-  [(call (unspec:SI [(mem:SI (reg:SI R4_REGNUM))]
-		    UNSPEC_NONSECURE_MEM)
+  [(call (mem:SI (reg:SI R4_REGNUM))
 	 (match_operand 0 "" ""))
    (use (match_operand 1 "" ""))
+   (unspec:SI [(match_operand 2)] UNSPEC_NONSECURE_MEM)
    (clobber (reg:SI LR_REGNUM))]
   "TARGET_THUMB2 && use_cmse && !TARGET_HAVE_FPCXT_CMSE"
   "bl\\t__gnu_cmse_nonsecure_call"
@@ -573,11 +573,10 @@
 
 (define_insn "*nonsecure_call_value_reg_thumb2_fpcxt"
   [(set (match_operand 0 "" "")
-	(call
-	 (unspec:SI [(mem:SI (match_operand:SI 1 "register_operand" "l*r"))]
-		    UNSPEC_NONSECURE_MEM)
-	 (match_operand 2 "" "")))
+	(call (mem:SI (match_operand:SI 1 "register_operand" "l*r"))
+	(match_operand 2 "" "")))
    (use (match_operand 3 "" ""))
+   (unspec:SI [(match_operand 4)] UNSPEC_NONSECURE_MEM)
    (clobber (reg:SI LR_REGNUM))]
   "TARGET_THUMB2 && use_cmse && TARGET_HAVE_FPCXT_CMSE"
   "blxns\\t%1"
@@ -587,10 +586,10 @@
 
 (define_insn "*nonsecure_call_value_reg_thumb2"
   [(set (match_operand 0 "" "")
-	(call
-	 (unspec:SI [(mem:SI (reg:SI R4_REGNUM))] UNSPEC_NONSECURE_MEM)
-	 (match_operand 1 "" "")))
+	(call (mem:SI (reg:SI R4_REGNUM))
+	      (match_operand 1 "" "")))
    (use (match_operand 2 "" ""))
+   (unspec:SI [(match_operand 3)] UNSPEC_NONSECURE_MEM)
    (clobber (reg:SI LR_REGNUM))]
   "TARGET_THUMB2 && use_cmse && !TARGET_HAVE_FPCXT_CMSE"
   "bl\\t__gnu_cmse_nonsecure_call"
diff --git a/gcc/config/arm/types.md b/gcc/config/arm/types.md
index b72c871..e517b91 100644
--- a/gcc/config/arm/types.md
+++ b/gcc/config/arm/types.md
@@ -184,70 +184,6 @@
 ; untyped            insn without type information - default, and error,
 ;                    case.
 ;
-; The classification below is for instructions used by the Wireless MMX
-; Technology. Each attribute value is used to classify an instruction of the
-; same name or family.
-;
-; wmmx_tandc
-; wmmx_tbcst
-; wmmx_textrc
-; wmmx_textrm
-; wmmx_tinsr
-; wmmx_tmcr
-; wmmx_tmcrr
-; wmmx_tmia
-; wmmx_tmiaph
-; wmmx_tmiaxy
-; wmmx_tmrc
-; wmmx_tmrrc
-; wmmx_tmovmsk
-; wmmx_torc
-; wmmx_torvsc
-; wmmx_wabs
-; wmmx_wdiff
-; wmmx_wacc
-; wmmx_wadd
-; wmmx_waddbhus
-; wmmx_waddsubhx
-; wmmx_waligni
-; wmmx_walignr
-; wmmx_wand
-; wmmx_wandn
-; wmmx_wavg2
-; wmmx_wavg4
-; wmmx_wcmpeq
-; wmmx_wcmpgt
-; wmmx_wmac
-; wmmx_wmadd
-; wmmx_wmax
-; wmmx_wmerge
-; wmmx_wmiawxy
-; wmmx_wmiaxy
-; wmmx_wmin
-; wmmx_wmov
-; wmmx_wmul
-; wmmx_wmulw
-; wmmx_wldr
-; wmmx_wor
-; wmmx_wpack
-; wmmx_wqmiaxy
-; wmmx_wqmulm
-; wmmx_wqmulwm
-; wmmx_wror
-; wmmx_wsad
-; wmmx_wshufh
-; wmmx_wsll
-; wmmx_wsra
-; wmmx_wsrl
-; wmmx_wstr
-; wmmx_wsub
-; wmmx_wsubaddhx
-; wmmx_wunpckeh
-; wmmx_wunpckel
-; wmmx_wunpckih
-; wmmx_wunpckil
-; wmmx_wxor
-;
 ; The classification below is for NEON instructions. If a new neon type is
 ; added, please ensure this is added to the is_neon_type attribute below too.
 ;
@@ -714,65 +650,6 @@
   umull,\
   umulls,\
   untyped,\
-  wmmx_tandc,\
-  wmmx_tbcst,\
-  wmmx_textrc,\
-  wmmx_textrm,\
-  wmmx_tinsr,\
-  wmmx_tmcr,\
-  wmmx_tmcrr,\
-  wmmx_tmia,\
-  wmmx_tmiaph,\
-  wmmx_tmiaxy,\
-  wmmx_tmrc,\
-  wmmx_tmrrc,\
-  wmmx_tmovmsk,\
-  wmmx_torc,\
-  wmmx_torvsc,\
-  wmmx_wabs,\
-  wmmx_wabsdiff,\
-  wmmx_wacc,\
-  wmmx_wadd,\
-  wmmx_waddbhus,\
-  wmmx_waddsubhx,\
-  wmmx_waligni,\
-  wmmx_walignr,\
-  wmmx_wand,\
-  wmmx_wandn,\
-  wmmx_wavg2,\
-  wmmx_wavg4,\
-  wmmx_wcmpeq,\
-  wmmx_wcmpgt,\
-  wmmx_wmac,\
-  wmmx_wmadd,\
-  wmmx_wmax,\
-  wmmx_wmerge,\
-  wmmx_wmiawxy,\
-  wmmx_wmiaxy,\
-  wmmx_wmin,\
-  wmmx_wmov,\
-  wmmx_wmul,\
-  wmmx_wmulw,\
-  wmmx_wldr,\
-  wmmx_wor,\
-  wmmx_wpack,\
-  wmmx_wqmiaxy,\
-  wmmx_wqmulm,\
-  wmmx_wqmulwm,\
-  wmmx_wror,\
-  wmmx_wsad,\
-  wmmx_wshufh,\
-  wmmx_wsll,\
-  wmmx_wsra,\
-  wmmx_wsrl,\
-  wmmx_wstr,\
-  wmmx_wsub,\
-  wmmx_wsubaddhx,\
-  wmmx_wunpckeh,\
-  wmmx_wunpckel,\
-  wmmx_wunpckih,\
-  wmmx_wunpckil,\
-  wmmx_wxor,\
 \
   neon_add,\
   neon_add_q,\
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index a03609d..c1ee972 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -21,7 +21,6 @@
 ;; UNSPEC Usage:
 ;; Note: sin and cos are no-longer used.
 ;; Unspec enumerators for Neon are defined in neon.md.
-;; Unspec enumerators for iwmmxt2 are defined in iwmmxt2.md
 
 (define_c_enum "unspec" [
   UNSPEC_PUSH_MULT      ; `push multiple' operation:
@@ -42,17 +41,6 @@
                         ; and stack frame generation.  Operand 0 is the
                         ; register to "use".
   UNSPEC_CHECK_ARCH     ; Set CCs to indicate 26-bit or 32-bit mode.
-  UNSPEC_WSHUFH         ; Used by the intrinsic form of the iWMMXt WSHUFH instruction.
-  UNSPEC_WACC           ; Used by the intrinsic form of the iWMMXt WACC instruction.
-  UNSPEC_TMOVMSK        ; Used by the intrinsic form of the iWMMXt TMOVMSK instruction.
-  UNSPEC_WSAD           ; Used by the intrinsic form of the iWMMXt WSAD instruction.
-  UNSPEC_WSADZ          ; Used by the intrinsic form of the iWMMXt WSADZ instruction.
-  UNSPEC_WMACS          ; Used by the intrinsic form of the iWMMXt WMACS instruction.
-  UNSPEC_WMACU          ; Used by the intrinsic form of the iWMMXt WMACU instruction.
-  UNSPEC_WMACSZ         ; Used by the intrinsic form of the iWMMXt WMACSZ instruction.
-  UNSPEC_WMACUZ         ; Used by the intrinsic form of the iWMMXt WMACUZ instruction.
-  UNSPEC_CLRDI          ; Used by the intrinsic form of the iWMMXt CLRDI instruction.
-  UNSPEC_WALIGNI        ; Used by the intrinsic form of the iWMMXt WALIGN instruction.
   UNSPEC_TLS            ; A symbol that has been treated properly for TLS usage.
   UNSPEC_PIC_LABEL      ; A label used for PIC access that does not appear in the
                         ; instruction stream.
@@ -164,18 +152,6 @@
 
 
 (define_c_enum "unspec" [
-  UNSPEC_WADDC		; Used by the intrinsic form of the iWMMXt WADDC instruction.
-  UNSPEC_WABS		; Used by the intrinsic form of the iWMMXt WABS instruction.
-  UNSPEC_WQMULWMR	; Used by the intrinsic form of the iWMMXt WQMULWMR instruction.
-  UNSPEC_WQMULMR	; Used by the intrinsic form of the iWMMXt WQMULMR instruction.
-  UNSPEC_WQMULWM	; Used by the intrinsic form of the iWMMXt WQMULWM instruction.
-  UNSPEC_WQMULM		; Used by the intrinsic form of the iWMMXt WQMULM instruction.
-  UNSPEC_WQMIAxyn	; Used by the intrinsic form of the iWMMXt WMIAxyn instruction.
-  UNSPEC_WQMIAxy	; Used by the intrinsic form of the iWMMXt WMIAxy instruction.
-  UNSPEC_TANDC		; Used by the intrinsic form of the iWMMXt TANDC instruction.
-  UNSPEC_TORC		; Used by the intrinsic form of the iWMMXt TORC instruction.
-  UNSPEC_TORVSC		; Used by the intrinsic form of the iWMMXt TORVSC instruction.
-  UNSPEC_TEXTRC		; Used by the intrinsic form of the iWMMXt TEXTRC instruction.
   UNSPEC_GET_FPSCR_NZCVQC	; Represent fetch of FPSCR_nzcvqc content.
 ])
 
@@ -205,12 +181,7 @@
                         ;   a 64-bit object.
   VUNSPEC_POOL_16       ; `pool-entry(16)'.  An entry in the constant pool for
                         ;   a 128-bit object.
-  VUNSPEC_TMRC          ; Used by the iWMMXt TMRC instruction.
-  VUNSPEC_TMCR          ; Used by the iWMMXt TMCR instruction.
   VUNSPEC_ALIGN8        ; 8-byte alignment version of VUNSPEC_ALIGN
-  VUNSPEC_WCMP_EQ       ; Used by the iWMMXt WCMPEQ instructions
-  VUNSPEC_WCMP_GTU      ; Used by the iWMMXt WCMPGTU instructions
-  VUNSPEC_WCMP_GT       ; Used by the iwMMXT WCMPGT instructions
   VUNSPEC_EH_RETURN     ; Use to override the return address for exception
                         ; handling.
   VUNSPEC_ATOMIC_CAS	; Represent an atomic compare swap.
diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
index a485d05..061165e 100644
--- a/gcc/config/arm/vec-common.md
+++ b/gcc/config/arm/vec-common.md
@@ -1,4 +1,4 @@
-;; Machine Description for shared bits common to IWMMXT and Neon.
+;; Machine Description for shared bits common to Neon and MVE.
 ;; Copyright (C) 2006-2025 Free Software Foundation, Inc.
 ;; Written by CodeSourcery.
 ;;
@@ -24,7 +24,6 @@
   [(set (match_operand:VNIM1 0 "nonimmediate_operand")
 	(match_operand:VNIM1 1 "general_operand"))]
   "TARGET_NEON
-   || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))
    || (TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
    || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
    {
@@ -46,8 +45,7 @@
 (define_expand "mov<mode>"
   [(set (match_operand:VNINOTM1 0 "nonimmediate_operand")
 	(match_operand:VNINOTM1 1 "general_operand"))]
-  "TARGET_NEON
-   || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
+  "TARGET_NEON"
 {
   gcc_checking_assert (aligned_operand (operands[0], <MODE>mode));
   gcc_checking_assert (aligned_operand (operands[1], <MODE>mode));
@@ -83,7 +81,7 @@
 })
 
 ;; Vector arithmetic.  Expanders are blank, then unnamed insns implement
-;; patterns separately for Neon, IWMMXT and MVE.
+;; patterns separately for Neon and MVE.
 
 (define_expand "add<mode>3"
   [(set (match_operand:VDQ 0 "s_register_operand")
@@ -103,10 +101,7 @@
   [(set (match_operand:VDQWH 0 "s_register_operand")
 	(mult:VDQWH (match_operand:VDQWH 1 "s_register_operand")
 		    (match_operand:VDQWH 2 "s_register_operand")))]
-  "ARM_HAVE_<MODE>_ARITH
-   && (!TARGET_REALLY_IWMMXT
-       || <MODE>mode == V4HImode
-       || <MODE>mode == V2SImode)"
+  "ARM_HAVE_<MODE>_ARITH"
 )
 
 (define_expand "smin<mode>3"
@@ -216,13 +211,13 @@
 (define_expand "one_cmpl<mode>2"
   [(set (match_operand:VDQ 0 "s_register_operand")
 	(not:VDQ (match_operand:VDQ 1 "s_register_operand")))]
-  "ARM_HAVE_<MODE>_ARITH && !TARGET_REALLY_IWMMXT"
+  "ARM_HAVE_<MODE>_ARITH"
 )
 
 (define_expand "<absneg_str><mode>2"
   [(set (match_operand:VDQWH 0 "s_register_operand" "")
 	(ABSNEG:VDQWH (match_operand:VDQWH 1 "s_register_operand" "")))]
-  "ARM_HAVE_<MODE>_ARITH && !TARGET_REALLY_IWMMXT"
+  "ARM_HAVE_<MODE>_ARITH"
 )
 
 (define_expand "cadd<rot><mode>3"
@@ -295,8 +290,7 @@
  [(set (match_operand:VDQ 0 "nonimmediate_operand")
 	(unspec:VDQ [(match_operand:VDQ 1 "general_operand")]
 	 UNSPEC_MISALIGNED_ACCESS))]
- "ARM_HAVE_<MODE>_LDST && !BYTES_BIG_ENDIAN
-  && unaligned_access && !TARGET_REALLY_IWMMXT"
+ "ARM_HAVE_<MODE>_LDST && !BYTES_BIG_ENDIAN && unaligned_access"
 {
   rtx *memloc;
   bool for_store = false;
@@ -373,7 +367,7 @@
 	(unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w,w")
 		       (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Ds")]
 	 VSHLQ))]
-  "ARM_HAVE_<MODE>_ARITH && !TARGET_REALLY_IWMMXT"
+  "ARM_HAVE_<MODE>_ARITH"
   "@
    <mve_insn>.<supf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
    * return neon_output_shift_immediate (\"vshl\", 'i', &operands[2], <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode), true);"
@@ -385,7 +379,7 @@
   [(set (match_operand:VDQIW 0 "s_register_operand" "")
 	(ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
 		      (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "")))]
-  "ARM_HAVE_<MODE>_ARITH && !TARGET_REALLY_IWMMXT"
+  "ARM_HAVE_<MODE>_ARITH"
 {
   emit_insn (gen_mve_vshlq_u<mode> (operands[0], operands[1], operands[2]));
   DONE;
@@ -398,7 +392,7 @@
   [(set (match_operand:VDQIW 0 "s_register_operand")
 	(ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand")
 			(match_operand:VDQIW 2 "imm_rshift_or_reg_neon")))]
-  "ARM_HAVE_<MODE>_ARITH && !TARGET_REALLY_IWMMXT"
+  "ARM_HAVE_<MODE>_ARITH"
 {
   if (s_register_operand (operands[2], <MODE>mode))
     {
@@ -416,7 +410,7 @@
   [(set (match_operand:VDQIW 0 "s_register_operand")
 	(lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand")
 			(match_operand:VDQIW 2 "imm_rshift_or_reg_neon")))]
-  "ARM_HAVE_<MODE>_ARITH && !TARGET_REALLY_IWMMXT"
+  "ARM_HAVE_<MODE>_ARITH"
 {
   if (s_register_operand (operands[2], <MODE>mode))
     {
@@ -606,8 +600,7 @@
 (define_expand "clz<mode>2"
  [(set (match_operand:VDQIW 0 "s_register_operand")
        (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand")))]
-  "ARM_HAVE_<MODE>_ARITH
-   && !TARGET_REALLY_IWMMXT"
+  "ARM_HAVE_<MODE>_ARITH"
 )
 (define_expand "vec_init<mode><V_elem_l>"
   [(match_operand:VDQX 0 "s_register_operand")
diff --git a/gcc/config/avr/avr-dimode.md b/gcc/config/avr/avr-dimode.md
index 903bfbf..66ba5a9 100644
--- a/gcc/config/avr/avr-dimode.md
+++ b/gcc/config/avr/avr-dimode.md
@@ -101,10 +101,8 @@
   "avr_have_dimode"
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:ALL8 ACC_A)
-                   (plus:ALL8 (reg:ALL8 ACC_A)
-                              (reg:ALL8 ACC_B)))
-   (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*add<mode>3_insn"
   [(set (reg:ALL8 ACC_A)
@@ -122,10 +120,8 @@
   "avr_have_dimode"
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:DI ACC_A)
-                   (plus:DI (reg:DI ACC_A)
-                            (sign_extend:DI (reg:QI REG_X))))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*adddi3_const8_insn"
   [(set (reg:DI ACC_A)
@@ -146,12 +142,10 @@
                    (match_operand:ALL8 0 "const_operand" "n Ynn")))]
   "avr_have_dimode
    && !s8_operand (operands[0], VOIDmode)"
-   "#"
-   "&& reload_completed"
-   [(parallel [(set (reg:ALL8 ACC_A)
-                    (plus:ALL8 (reg:ALL8 ACC_A)
-                               (match_dup 0)))
-               (clobber (reg:CC REG_CC))])])
+  "#"
+  "&& reload_completed"
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*add<mode>3_const_insn"
   [(set (reg:ALL8 ACC_A)
@@ -211,10 +205,8 @@
   "avr_have_dimode"
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:ALL8 ACC_A)
-                   (minus:ALL8 (reg:ALL8 ACC_A)
-                               (reg:ALL8 ACC_B)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*sub<mode>3_insn"
   [(set (reg:ALL8 ACC_A)
@@ -236,10 +228,8 @@
   "avr_have_dimode"
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:ALL8 ACC_A)
-                   (minus:ALL8 (reg:ALL8 ACC_A)
-                               (match_dup 0)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*sub<mode>3_const_insn"
   [(set (reg:ALL8 ACC_A)
@@ -288,10 +278,8 @@
   "avr_have_dimode"
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:ALL8S ACC_A)
-                   (ss_addsub:ALL8S (reg:ALL8S ACC_A)
-                                    (reg:ALL8S ACC_B)))
-             (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*<code_stdname><mode>3_insn"
   [(set (reg:ALL8S ACC_A)
@@ -309,10 +297,8 @@
   "avr_have_dimode"
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:ALL8S ACC_A)
-                   (ss_addsub:ALL8S (reg:ALL8S ACC_A)
-                                    (match_dup 0)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*<code_stdname><mode>3_const_insn"
   [(set (reg:ALL8S ACC_A)
@@ -361,10 +347,8 @@
   "avr_have_dimode"
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:ALL8U ACC_A)
-                   (us_addsub:ALL8U (reg:ALL8U ACC_A)
-                                    (reg:ALL8U ACC_B)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*<code_stdname><mode>3_insn"
   [(set (reg:ALL8U ACC_A)
@@ -382,10 +366,8 @@
   "avr_have_dimode"
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:ALL8U ACC_A)
-                   (us_addsub:ALL8U (reg:ALL8U ACC_A)
-                                    (match_operand:ALL8U 0 "const_operand" "n Ynn")))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*<code_stdname><mode>3_const_insn"
   [(set (reg:ALL8U ACC_A)
@@ -421,9 +403,8 @@
   "avr_have_dimode"
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:DI ACC_A)
-                   (neg:DI (reg:DI ACC_A)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*negdi2_insn"
   [(set (reg:DI ACC_A)
@@ -500,7 +481,7 @@
   "avr_have_dimode"
   "#"
   "&& reload_completed"
-  [(const_int 0)]
+  [(scratch)]
   {
     emit_insn (gen_compare_<mode>2 ());
     emit_jump_insn (gen_conditional_jump (operands[0], operands[1]));
@@ -529,7 +510,7 @@
   "avr_have_dimode"
   "#"
   "&& reload_completed"
-  [(const_int 0)]
+  [(scratch)]
   {
     emit_insn (gen_compare_const8_di2 ());
     emit_jump_insn (gen_conditional_jump (operands[0], operands[1]));
@@ -556,7 +537,7 @@
    && !s8_operand (operands[1], VOIDmode)"
   "#"
   "&& reload_completed"
-  [(const_int 0)]
+  [(scratch)]
   {
     emit_insn (gen_compare_const_<mode>2 (operands[1], operands[3]));
     emit_jump_insn (gen_conditional_jump (operands[0], operands[2]));
@@ -629,10 +610,8 @@
   "avr_have_dimode"
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:ALL8 ACC_A)
-                   (di_shifts:ALL8 (reg:ALL8 ACC_A)
-                                   (reg:QI 16)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*<code_stdname><mode>3_insn"
   [(set (reg:ALL8 ACC_A)
@@ -674,14 +653,10 @@
    (clobber (reg:HI REG_Z))]
   "avr_have_dimode
    && AVR_HAVE_MUL"
-   "#"
-   "&& reload_completed"
-   [(parallel [(set (reg:DI ACC_A)
-                    (mult:DI (any_extend:DI (reg:SI 18))
-                             (any_extend:DI (reg:SI 22))))
-               (clobber (reg:HI REG_X))
-               (clobber (reg:HI REG_Z))
-               (clobber (reg:CC REG_CC))])])
+  "#"
+  "&& reload_completed"
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*<extend_u>mulsidi3_insn"
   [(set (reg:DI ACC_A)
diff --git a/gcc/config/avr/avr-fixed.md b/gcc/config/avr/avr-fixed.md
index ce46beb..22061fc 100644
--- a/gcc/config/avr/avr-fixed.md
+++ b/gcc/config/avr/avr-fixed.md
@@ -62,10 +62,8 @@
   "<FIXED_B:MODE>mode != <FIXED_A:MODE>mode"
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (fract_convert:FIXED_A
-                    (match_dup 1)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*fract<FIXED_B:mode><FIXED_A:mode>2"
   [(set (match_operand:FIXED_A 0 "register_operand" "=r")
@@ -86,10 +84,8 @@
   "<FIXED_B:MODE>mode != <FIXED_A:MODE>mode"
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (unsigned_fract_convert:FIXED_A
-                    (match_dup 1)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*fractuns<FIXED_B:mode><FIXED_A:mode>2"
   [(set (match_operand:FIXED_A 0 "register_operand" "=r")
@@ -124,10 +120,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (ss_addsub:ALL124S (match_dup 1)
-                                      (match_dup 2)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*<code_stdname><mode>3"
   [(set (match_operand:ALL124S 0 "register_operand"                          "=??d,d")
@@ -149,10 +143,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (us_addsub:ALL124U (match_dup 1)
-                                      (match_dup 2)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*<code_stdname><mode>3"
   [(set (match_operand:ALL124U 0 "register_operand"                          "=??r,d")
@@ -189,9 +181,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (ss_neg:QQ (match_dup 1)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*ssnegqq2"
   [(set (match_operand:QQ 0 "register_operand"            "=r")
@@ -207,9 +198,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (ss_abs:QQ (match_dup 1)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*ssabsqq2"
   [(set (match_operand:QQ 0 "register_operand"            "=r")
@@ -241,9 +231,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:ALL2S 24)
-                   (ss_abs_neg:ALL2S (reg:ALL2S 24)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*<code_stdname><mode>2"
   [(set (reg:ALL2S 24)
@@ -261,9 +250,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:ALL4S 22)
-                   (ss_abs_neg:ALL4S (reg:ALL4S 22)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*<code_stdname><mode>2"
   [(set (reg:ALL4S 22)
@@ -296,10 +284,8 @@
   "AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (mult:QQ (match_dup 1)
-                            (match_dup 2)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*mulqq3_enh"
   [(set (match_operand:QQ 0 "register_operand"         "=r")
@@ -317,10 +303,8 @@
   "AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (mult:UQQ (match_dup 1)
-                             (match_dup 2)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*muluqq3_enh"
   [(set (match_operand:UQQ 0 "register_operand"          "=r")
@@ -377,12 +361,8 @@
   "!AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:QQ 23)
-                   (mult:QQ (reg:QQ 24)
-                            (reg:QQ 25)))
-              (clobber (reg:QI 22))
-              (clobber (reg:HI 24))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*mulqq3.call"
   [(set (reg:QQ 23)
@@ -425,11 +405,8 @@
   "AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:ALL2QA 24)
-                   (mult:ALL2QA (reg:ALL2QA 18)
-                                (reg:ALL2QA 26)))
-              (clobber (reg:HI 22))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*mul<mode>3.call"
   [(set (reg:ALL2QA 24)
@@ -468,10 +445,8 @@
   "AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:ALL4A 24)
-                   (mult:ALL4A (reg:ALL4A 16)
-                               (reg:ALL4A 20)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*mul<mode>3.call"
   [(set (reg:ALL4A 24)
@@ -514,11 +489,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:ALL1Q 24)
-                   (usdiv:ALL1Q (reg:ALL1Q 25)
-                                (reg:ALL1Q 22)))
-              (clobber (reg:QI 25))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*<code><mode>3.call"
   [(set (reg:ALL1Q 24)
@@ -560,12 +532,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:ALL2QA 24)
-                   (usdiv:ALL2QA (reg:ALL2QA 26)
-                                 (reg:ALL2QA 22)))
-              (clobber (reg:HI 26))
-              (clobber (reg:QI 21))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*<code><mode>3.call"
   [(set (reg:ALL2QA 24)
@@ -608,12 +576,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:ALL4A 22)
-                   (usdiv:ALL4A (reg:ALL4A 24)
-                                (reg:ALL4A 18)))
-              (clobber (reg:HI 26))
-              (clobber (reg:HI 30))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*<code><mode>3.call"
   [(set (reg:ALL4A 22)
@@ -684,12 +648,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (unspec:ALL124QA [(match_dup 1)
-                                     (match_dup 2)
-                                     (const_int 0)]
-                                    UNSPEC_ROUND))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*round<mode>3_const"
   [(set (match_operand:ALL124QA 0 "register_operand"                  "=d")
@@ -714,11 +674,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:ALL1Q 24)
-                   (unspec:ALL1Q [(reg:ALL1Q 22)
-                                  (reg:QI 24)] UNSPEC_ROUND))
-              (clobber (reg:ALL1Q 22))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*round<mode>3.libgcc"
   [(set (reg:ALL1Q 24)
@@ -740,11 +697,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:ALL2QA 24)
-                   (unspec:ALL2QA [(reg:ALL2QA 22)
-                                   (reg:QI 24)] UNSPEC_ROUND))
-              (clobber (reg:ALL2QA 22))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*round<mode>3.libgcc"
   [(set (reg:ALL2QA 24)
@@ -766,11 +720,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:ALL4QA 22)
-                   (unspec:ALL4QA [(reg:ALL4QA 18)
-                                   (reg:QI 24)] UNSPEC_ROUND))
-              (clobber (reg:ALL4QA 18))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*round<mode>3.libgcc"
   [(set (reg:ALL4QA 22)
diff --git a/gcc/config/avr/avr-log.cc b/gcc/config/avr/avr-log.cc
index fadb3ca..972ba6b 100644
--- a/gcc/config/avr/avr-log.cc
+++ b/gcc/config/avr/avr-log.cc
@@ -373,7 +373,6 @@ avr_log_set_avr_log (void)
       SET_DUMP_DETAIL (insn_addresses);
       SET_DUMP_DETAIL (legitimate_address_p);
       SET_DUMP_DETAIL (legitimize_address);
-      SET_DUMP_DETAIL (legitimize_reload_address);
       SET_DUMP_DETAIL (progmem);
       SET_DUMP_DETAIL (rtx_costs);
 
diff --git a/gcc/config/avr/avr-mcus.def b/gcc/config/avr/avr-mcus.def
index ad64050..2e7c8ac 100644
--- a/gcc/config/avr/avr-mcus.def
+++ b/gcc/config/avr/avr-mcus.def
@@ -313,6 +313,10 @@ AVR_MCU ("avr64da28",        ARCH_AVRXMEGA2, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR
 AVR_MCU ("avr64da32",        ARCH_AVRXMEGA2, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR64DA32__",   0x6000, 0x0, 0x10000, 0)
 AVR_MCU ("avr64da48",        ARCH_AVRXMEGA2, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR64DA48__",   0x6000, 0x0, 0x10000, 0)
 AVR_MCU ("avr64da64",        ARCH_AVRXMEGA2, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR64DA64__",   0x6000, 0x0, 0x10000, 0)
+AVR_MCU ("avr64da28s",       ARCH_AVRXMEGA2, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR64DA28S__",  0x6000, 0x0, 0x10000, 0)
+AVR_MCU ("avr64da32s",       ARCH_AVRXMEGA2, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR64DA32S__",  0x6000, 0x0, 0x10000, 0)
+AVR_MCU ("avr64da48s",       ARCH_AVRXMEGA2, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR64DA48S__",  0x6000, 0x0, 0x10000, 0)
+AVR_MCU ("avr64da64s",       ARCH_AVRXMEGA2, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR64DA64S__",  0x6000, 0x0, 0x10000, 0)
 AVR_MCU ("avr64db28",        ARCH_AVRXMEGA2, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR64DB28__",   0x6000, 0x0, 0x10000, 0)
 AVR_MCU ("avr64db32",        ARCH_AVRXMEGA2, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR64DB32__",   0x6000, 0x0, 0x10000, 0)
 AVR_MCU ("avr64db48",        ARCH_AVRXMEGA2, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR64DB48__",   0x6000, 0x0, 0x10000, 0)
@@ -389,6 +393,9 @@ AVR_MCU ("avr16du32",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR
 AVR_MCU ("avr32da28",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR32DA28__",   0x7000, 0x0, 0x8000, 0x8000)
 AVR_MCU ("avr32da32",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR32DA32__",   0x7000, 0x0, 0x8000, 0x8000)
 AVR_MCU ("avr32da48",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR32DA48__",   0x7000, 0x0, 0x8000, 0x8000)
+AVR_MCU ("avr32da28s",       ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR32DA28S__",  0x7000, 0x0, 0x8000, 0x8000)
+AVR_MCU ("avr32da32s",       ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR32DA32S__",  0x7000, 0x0, 0x8000, 0x8000)
+AVR_MCU ("avr32da48s",       ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR32DA48S__",  0x7000, 0x0, 0x8000, 0x8000)
 AVR_MCU ("avr32db28",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR32DB28__",   0x7000, 0x0, 0x8000, 0x8000)
 AVR_MCU ("avr32db32",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR32DB32__",   0x7000, 0x0, 0x8000, 0x8000)
 AVR_MCU ("avr32db48",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR32DB48__",   0x7000, 0x0, 0x8000, 0x8000)
@@ -427,6 +434,10 @@ AVR_MCU ("avr128da28",       ARCH_AVRXMEGA4, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR
 AVR_MCU ("avr128da32",       ARCH_AVRXMEGA4, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR128DA32__",  0x4000, 0x0, 0x20000, 0)
 AVR_MCU ("avr128da48",       ARCH_AVRXMEGA4, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR128DA48__",  0x4000, 0x0, 0x20000, 0)
 AVR_MCU ("avr128da64",       ARCH_AVRXMEGA4, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR128DA64__",  0x4000, 0x0, 0x20000, 0)
+AVR_MCU ("avr128da28s",      ARCH_AVRXMEGA4, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR128DA28S__", 0x4000, 0x0, 0x20000, 0)
+AVR_MCU ("avr128da32s",      ARCH_AVRXMEGA4, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR128DA32S__", 0x4000, 0x0, 0x20000, 0)
+AVR_MCU ("avr128da48s",      ARCH_AVRXMEGA4, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR128DA48S__", 0x4000, 0x0, 0x20000, 0)
+AVR_MCU ("avr128da64s",      ARCH_AVRXMEGA4, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR128DA64S__", 0x4000, 0x0, 0x20000, 0)
 AVR_MCU ("avr128db28",       ARCH_AVRXMEGA4, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR128DB28__",  0x4000, 0x0, 0x20000, 0)
 AVR_MCU ("avr128db32",       ARCH_AVRXMEGA4, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR128DB32__",  0x4000, 0x0, 0x20000, 0)
 AVR_MCU ("avr128db48",       ARCH_AVRXMEGA4, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR128DB48__",  0x4000, 0x0, 0x20000, 0)
diff --git a/gcc/config/avr/avr-passes.cc b/gcc/config/avr/avr-passes.cc
index 2c21e7b..69df6d2 100644
--- a/gcc/config/avr/avr-passes.cc
+++ b/gcc/config/avr/avr-passes.cc
@@ -3167,8 +3167,7 @@ bbinfo_t::optimize_one_block (bool &changed)
 		    || (bbinfo_t::try_split_any_p && od.try_split_any (this))
 		    || (bbinfo_t::try_mem0_p && od.try_mem0 (this)));
 
-      rtx_insn *new_insns = get_insns ();
-      end_sequence ();
+      rtx_insn *new_insns = end_sequence ();
 
       gcc_assert (found == (od.n_new_insns >= 0));
 
@@ -3943,10 +3942,7 @@ avr_parallel_insn_from_insns (rtx_insn *i[5])
 			 PATTERN (i[3]), PATTERN (i[4]));
   start_sequence ();
   emit (gen_rtx_PARALLEL (VOIDmode, vec));
-  rtx_insn *insn = get_insns ();
-  end_sequence ();
-
-  return insn;
+  return end_sequence ();
 }
 
 
@@ -4124,9 +4120,8 @@ avr_optimize_casesi (rtx_insn *insns[5], rtx *xop)
   JUMP_LABEL (cbranch) = xop[4];
   ++LABEL_NUSES (xop[4]);
 
-  rtx_insn *seq1 = get_insns ();
   rtx_insn *last1 = get_last_insn ();
-  end_sequence ();
+  rtx_insn *seq1 = end_sequence ();
 
   emit_insn_after (seq1, insns[2]);
 
@@ -4145,9 +4140,8 @@ avr_optimize_casesi (rtx_insn *insns[5], rtx *xop)
 
   emit_insn (pat_4);
 
-  rtx_insn *seq2 = get_insns ();
   rtx_insn *last2 = get_last_insn ();
-  end_sequence ();
+  rtx_insn *seq2 = end_sequence ();
 
   emit_insn_after (seq2, insns[3]);
 
@@ -4849,6 +4843,137 @@ avr_pass_fuse_add::execute1 (function *func)
 
 
 //////////////////////////////////////////////////////////////////////////////
+// Fuse 2 move insns after combine.
+
+static const pass_data avr_pass_data_2moves =
+{
+  RTL_PASS,	    // type
+  "",		    // name (will be patched)
+  OPTGROUP_NONE,    // optinfo_flags
+  TV_DF_SCAN,	    // tv_id
+  0,		    // properties_required
+  0,		    // properties_provided
+  0,		    // properties_destroyed
+  0,		    // todo_flags_start
+  0		    // todo_flags_finish
+};
+
+class avr_pass_2moves : public rtl_opt_pass
+{
+public:
+  avr_pass_2moves (gcc::context *ctxt, const char *name)
+    : rtl_opt_pass (avr_pass_data_2moves, ctxt)
+  {
+    this->name = name;
+  }
+
+  unsigned int execute (function *func) final override
+  {
+    if (optimize && avropt_fuse_move2)
+      {
+	bool changed = false;
+	basic_block bb;
+
+	FOR_EACH_BB_FN (bb, func)
+	  {
+	    changed |= optimize_2moves_bb (bb);
+	  }
+
+	if (changed)
+	  {
+	    df_note_add_problem ();
+	    df_analyze ();
+	  }
+      }
+
+    return 0;
+  }
+
+  bool optimize_2moves (rtx_insn *, rtx_insn *);
+  bool optimize_2moves_bb (basic_block);
+}; // avr_pass_2moves
+
+bool
+avr_pass_2moves::optimize_2moves_bb (basic_block bb)
+{
+  bool changed = false;
+  rtx_insn *insn1 = nullptr;
+  rtx_insn *insn2 = nullptr;
+  rtx_insn *curr;
+
+  FOR_BB_INSNS (bb, curr)
+    {
+      if (insn1 && INSN_P (insn1)
+	  && insn2 && INSN_P (insn2))
+	changed |= optimize_2moves (insn1, insn2);
+
+      insn1 = insn2;
+      insn2 = curr;
+    }
+
+  return changed;
+}
+
+bool
+avr_pass_2moves::optimize_2moves (rtx_insn *insn1, rtx_insn *insn2)
+{
+  bool good = false;
+  bool bad = false;
+  rtx set1, dest1, src1;
+  rtx set2, dest2, src2;
+
+  if ((set1 = single_set (insn1))
+      && (set2 = single_set (insn2))
+      && (src1 = SET_SRC (set1))
+      && REG_P (src2 = SET_SRC (set2))
+      && REG_P (dest1 = SET_DEST (set1))
+      && REG_P (dest2 = SET_DEST (set2))
+      && rtx_equal_p (dest1, src2)
+      // Now we have:
+      // insn1: dest1 = src1
+      // insn2: dest2 = dest1
+      && REGNO (dest1) >= FIRST_PSEUDO_REGISTER
+      // Paranoia.
+      && GET_CODE (PATTERN (insn1)) != PARALLEL
+      && GET_CODE (PATTERN (insn2)) != PARALLEL
+      && (rtx_equal_p (dest2, src1)
+	  || !reg_overlap_mentioned_p (dest2, src1)))
+    {
+      avr_dump ("\n;; Found 2moves:\n%r\n%r\n", insn1, insn2);
+      avr_dump (";; reg %d: insn uses uids:", REGNO (dest1));
+
+      // Go check that dest1 is used exactly once, namely by insn2.
+
+      df_ref use = DF_REG_USE_CHAIN (REGNO (dest1));
+      for (; use; use = DF_REF_NEXT_REG (use))
+	{
+	  rtx_insn *user = DF_REF_INSN (use);
+	  avr_dump (" %d", INSN_UID (user));
+	  good |= INSN_UID (user) == INSN_UID (insn2);
+	  bad |= INSN_UID (user) != INSN_UID (insn2);
+	}
+      avr_dump (".\n");
+
+      if (good && !bad
+	  // Propagate src1 to insn2:
+	  // insn1: # Deleted
+	  // insn2: dest2 = src1
+	  && validate_change (insn2, &SET_SRC (set2), src1, false))
+	{
+	  SET_INSN_DELETED (insn1);
+	  return true;
+	}
+    }
+
+  if (good && !bad)
+    avr_dump (";; Failed\n");
+
+  return false;
+}
+
+
+
+//////////////////////////////////////////////////////////////////////////////
 // Split insns with nonzero_bits() after combine.
 
 static const pass_data avr_pass_data_split_nzb =
@@ -5710,6 +5835,14 @@ make_avr_pass_casesi (gcc::context *ctxt)
   return new avr_pass_casesi (ctxt, "avr-casesi");
 }
 
+// Optimize 2 consecutive moves after combine.
+
+rtl_opt_pass *
+make_avr_pass_2moves (gcc::context *ctxt)
+{
+  return new avr_pass_2moves (ctxt, "avr-2moves");
+}
+
 rtl_opt_pass *
 make_avr_pass_split_nzb (gcc::context *ctxt)
 {
diff --git a/gcc/config/avr/avr-passes.def b/gcc/config/avr/avr-passes.def
index eb60a93..d668c7f 100644
--- a/gcc/config/avr/avr-passes.def
+++ b/gcc/config/avr/avr-passes.def
@@ -74,6 +74,14 @@ INSERT_PASS_BEFORE (pass_free_cfg, 1, avr_pass_recompute_notes);
 
 INSERT_PASS_AFTER (pass_expand, 1, avr_pass_casesi);
 
+/* Insn combine may come up with superfluous reg-reg moves, where the combine
+   people say that these are no problem since reg-alloc is supposed to optimize
+   them.  The issue is that the lower-subreg pass sitting between combine and
+   reg-alloc may split such moves, coming up with a zoo of subregs which are
+   only handled poorly by the register allocator.  */
+
+INSERT_PASS_AFTER (pass_combine, 1, avr_pass_2moves);
+
 /* Some combine insns have nonzero_bits() in their condition, though insns
    should not use such stuff in their condition.  Therefore, we split such
    insn into something without nonzero_bits() in their condition right after
diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h
index ca30136..8ba1945 100644
--- a/gcc/config/avr/avr-protos.h
+++ b/gcc/config/avr/avr-protos.h
@@ -146,7 +146,6 @@ extern void out_shift_with_cnt (const char *templ, rtx_insn *insn,
 extern enum reg_class avr_mode_code_base_reg_class (machine_mode, addr_space_t, rtx_code, rtx_code);
 extern bool avr_regno_mode_code_ok_for_base_p (int, machine_mode, addr_space_t, rtx_code, rtx_code);
 extern rtx avr_incoming_return_addr_rtx (void);
-extern rtx avr_legitimize_reload_address (rtx*, machine_mode, int, int, int, int, rtx (*)(rtx,int));
 extern bool avr_adiw_reg_p (rtx);
 extern bool avr_mem_flash_p (rtx);
 extern bool avr_mem_flashx_p (rtx);
@@ -168,6 +167,8 @@ regmask (machine_mode mode, unsigned regno)
 
 extern void avr_fix_inputs (rtx*, unsigned, unsigned);
 extern bool avr_emit3_fix_outputs (rtx (*)(rtx,rtx,rtx), rtx*, unsigned, unsigned);
+extern rtx avr_add_ccclobber (rtx_insn *);
+#define DONE_ADD_CCC emit (avr_add_ccclobber (curr_insn)); DONE;
 
 extern rtx lpm_reg_rtx;
 extern rtx lpm_addr_reg_rtx;
@@ -208,6 +209,7 @@ extern rtl_opt_pass *make_avr_pass_casesi (gcc::context *);
 extern rtl_opt_pass *make_avr_pass_ifelse (gcc::context *);
 extern rtl_opt_pass *make_avr_pass_split_nzb (gcc::context *);
 extern rtl_opt_pass *make_avr_pass_split_after_peephole2 (gcc::context *);
+extern rtl_opt_pass *make_avr_pass_2moves (gcc::context *);
 #ifdef RTX_CODE
 extern bool avr_casei_sequence_check_operands (rtx *xop);
 extern bool avr_split_fake_addressing_move (rtx_insn *insn, rtx *operands);
@@ -238,7 +240,6 @@ typedef struct
   unsigned insn_addresses :1;
   unsigned legitimate_address_p :1;
   unsigned legitimize_address :1;
-  unsigned legitimize_reload_address :1;
   unsigned progmem :1;
   unsigned rtx_costs :1;
 } avr_log_t;
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index b192a12..ae49d4d 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -411,6 +411,29 @@ avr_to_int_mode (rtx x)
 }
 
 
+/* Return the pattern of INSN, but with added (clobber (reg:CC REG_CC)).
+   The pattern of INSN must be a PARALLEL or a SET.  INSN is unchanged.  */
+
+rtx
+avr_add_ccclobber (rtx_insn *insn)
+{
+  rtx pat = PATTERN (insn);
+  gcc_assert (GET_CODE (pat) == SET || GET_CODE (pat) == PARALLEL);
+
+  int newlen = GET_CODE (pat) == SET ? 2 : 1 + XVECLEN (pat, 0);
+  rtx newpat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (newlen));
+  rtx elt0 = GET_CODE (pat) == SET ? pat : XVECEXP (pat, 0, 0);
+
+  XVECEXP (newpat, 0, 0) = copy_rtx (elt0);
+  XVECEXP (newpat, 0, newlen - 1) = gen_rtx_CLOBBER (VOIDmode, cc_reg_rtx);
+
+  for (int i = 1; i < newlen - 1; ++i)
+    XVECEXP (newpat, 0, i) = copy_rtx (XVECEXP (pat, 0, i));
+
+  return newpat;
+}
+
+
 /* Return true if hard register REG supports the ADIW and SBIW instructions.  */
 
 bool
@@ -430,13 +453,6 @@ avr_ld_regno_p (int regno)
 }
 
 
-static bool
-ra_in_progress ()
-{
-  return avropt_lra_p ? lra_in_progress : reload_in_progress;
-}
-
-
 /* Set `avr_arch' as specified by `-mmcu='.
    Return true on success.  */
 
@@ -1660,8 +1676,7 @@ avr_prologue_setup_frame (HOST_WIDE_INT size, HARD_REG_SET set)
 							-size_cfa)));
 	    }
 
-	  fp_plus_insns = get_insns ();
-	  end_sequence ();
+	  fp_plus_insns = end_sequence ();
 
 	  /************  Method 2: Adjust Stack pointer  ************/
 
@@ -1693,8 +1708,7 @@ avr_prologue_setup_frame (HOST_WIDE_INT size, HARD_REG_SET set)
 		  RTX_FRAME_RELATED_P (insn) = 1;
 		}
 
-	      sp_plus_insns = get_insns ();
-	      end_sequence ();
+	      sp_plus_insns = end_sequence ();
 
 	      /************ Use shortest method  ************/
 
@@ -2060,8 +2074,7 @@ avr_expand_epilogue (bool sibcall_p)
       emit_insn (gen_movhi_sp_r (stack_pointer_rtx, fp,
 				 GEN_INT (irq_state)));
 
-      rtx_insn *fp_plus_insns = get_insns ();
-      end_sequence ();
+      rtx_insn *fp_plus_insns = end_sequence ();
 
       /********** Method 2: Adjust Stack pointer  **********/
 
@@ -2072,8 +2085,7 @@ avr_expand_epilogue (bool sibcall_p)
 	  emit_move_insn (stack_pointer_rtx,
 			  plus_constant (Pmode, stack_pointer_rtx, size));
 
-	  rtx_insn *sp_plus_insns = get_insns ();
-	  end_sequence ();
+	  rtx_insn *sp_plus_insns = end_sequence ();
 
 	  /************ Use shortest method  ************/
 
@@ -2328,8 +2340,8 @@ avr_legitimate_address_p (machine_mode mode, rtx x, bool strict)
   if (avr_log.legitimate_address_p)
     {
       avr_edump ("\n%?: ret=%d, mode=%m strict=%d "
-		 "reload_completed=%d ra_in_progress=%d %s:",
-		 ok, mode, strict, reload_completed, ra_in_progress (),
+		 "reload_completed=%d lra_in_progress=%d %s:",
+		 ok, mode, strict, reload_completed, lra_in_progress,
 		 reg_renumber ? "(reg_renumber)" : "");
 
       if (GET_CODE (x) == PLUS
@@ -2399,88 +2411,6 @@ avr_legitimize_address (rtx x, rtx oldx, machine_mode mode)
 }
 
 
-/* Implement `LEGITIMIZE_RELOAD_ADDRESS'.  */
-/* This will allow register R26/27 to be used where it is no worse than normal
-   base pointers R28/29 or R30/31.  For example, if base offset is greater
-   than 63 bytes or for R++ or --R addressing.  */
-
-rtx
-avr_legitimize_reload_address (rtx *px, machine_mode mode, int opnum,
-			       int type, int addr_type, int /*ind_levels*/,
-			       rtx (*mk_memloc)(rtx,int))
-{
-  rtx x = *px;
-
-  if (avr_log.legitimize_reload_address)
-    avr_edump ("\n%?:%m %r\n", mode, x);
-
-  if (1 && (GET_CODE (x) == POST_INC
-	    || GET_CODE (x) == PRE_DEC))
-    {
-      push_reload (XEXP (x, 0), XEXP (x, 0), &XEXP (x, 0), &XEXP (x, 0),
-		   POINTER_REGS, GET_MODE (x), GET_MODE (x), 0, 0,
-		   opnum, RELOAD_OTHER);
-
-      if (avr_log.legitimize_reload_address)
-	avr_edump (" RCLASS.1 = %R\n IN = %r\n OUT = %r\n",
-		   POINTER_REGS, XEXP (x, 0), XEXP (x, 0));
-
-      return x;
-    }
-
-  if (GET_CODE (x) == PLUS
-      && REG_P (XEXP (x, 0))
-      && reg_equiv_constant (REGNO (XEXP (x, 0))) == 0
-      && CONST_INT_P (XEXP (x, 1))
-      && INTVAL (XEXP (x, 1)) >= 1)
-    {
-      bool fit = INTVAL (XEXP (x, 1)) <= MAX_LD_OFFSET (mode);
-
-      if (fit)
-	{
-	  if (reg_equiv_address (REGNO (XEXP (x, 0))) != 0)
-	    {
-	      int regno = REGNO (XEXP (x, 0));
-	      rtx mem = mk_memloc (x, regno);
-
-	      push_reload (XEXP (mem, 0), NULL_RTX, &XEXP (mem, 0), NULL,
-			   POINTER_REGS, Pmode, VOIDmode, 0, 0,
-			   1, (enum reload_type) addr_type);
-
-	      if (avr_log.legitimize_reload_address)
-		avr_edump (" RCLASS.2 = %R\n IN = %r\n OUT = %r\n",
-			   POINTER_REGS, XEXP (mem, 0), NULL_RTX);
-
-	      push_reload (mem, NULL_RTX, &XEXP (x, 0), NULL,
-			   BASE_POINTER_REGS, GET_MODE (x), VOIDmode, 0, 0,
-			   opnum, (enum reload_type) type);
-
-	      if (avr_log.legitimize_reload_address)
-		avr_edump (" RCLASS.2 = %R\n IN = %r\n OUT = %r\n",
-			   BASE_POINTER_REGS, mem, NULL_RTX);
-
-	      return x;
-	    }
-	}
-      else if (! (frame_pointer_needed
-		  && XEXP (x, 0) == frame_pointer_rtx))
-	{
-	  push_reload (x, NULL_RTX, px, NULL,
-		       POINTER_REGS, GET_MODE (x), VOIDmode, 0, 0,
-		       opnum, (enum reload_type) type);
-
-	  if (avr_log.legitimize_reload_address)
-	    avr_edump (" RCLASS.3 = %R\n IN = %r\n OUT = %r\n",
-		       POINTER_REGS, x, NULL_RTX);
-
-	  return x;
-	}
-    }
-
-  return NULL_RTX;
-}
-
-
 /* Helper function to print assembler resp. track instruction
    sequence lengths.  Always return "".
 
@@ -12828,6 +12758,16 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
       return true;
 
     case SIGN_EXTEND:
+      if (GET_CODE (XEXP (x, 0)) == ASHIFT
+	  && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
+	{
+	  // "*sext.ashift<QIPSI:mode><HISI:mode>2_split"
+	  int m0 = GET_MODE_SIZE (GET_MODE (XEXP (x, 0)));
+	  int m1 = GET_MODE_SIZE (mode);
+	  *total = COSTS_N_INSNS (m0 * INTVAL (XEXP (XEXP (x, 0), 1))
+				  + m1 - m0);
+	  return true;
+	}
       *total = COSTS_N_INSNS (n_bytes + 2
 			      - GET_MODE_SIZE (GET_MODE (XEXP (x, 0))));
       *total += avr_operand_rtx_cost (XEXP (x, 0), GET_MODE (XEXP (x, 0)),
@@ -13940,8 +13880,8 @@ extra_constraint_Q (rtx x)
 	    || xx == arg_pointer_rtx);
 
       if (avr_log.constraints)
-	avr_edump ("\n%?=%d reload_completed=%d ra_in_progress=%d\n %r\n",
-		   ok, reload_completed, ra_in_progress (), x);
+	avr_edump ("\n%?=%d reload_completed=%d lra_in_progress=%d\n %r\n",
+		   ok, reload_completed, lra_in_progress, x);
     }
 
   return ok;
@@ -14146,17 +14086,6 @@ avr_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
   if (GET_MODE_SIZE (mode) == 1)
     return true;
 
-  /* FIXME: Ideally, the following test is not needed.
-	However, it turned out that it can reduce the number
-	of spill fails.  AVR and it's poor endowment with
-	address registers is extreme stress test for reload.  */
-
-  if (GET_MODE_SIZE (mode) >= 4
-      && regno >= REG_X
-      // This problem only concerned the old reload.
-      && ! avropt_lra_p)
-    return false;
-
   /* All modes larger than 8 bits should start in an even register.  */
 
   return !(regno & 1);
@@ -14422,6 +14351,13 @@ avr_output_addr_vec (rtx_insn *labl, rtx table)
   // Output the label that precedes the table.
 
   ASM_OUTPUT_ALIGN (stream, 1);
+
+  char s_labl[40];
+  targetm.asm_out.generate_internal_label (s_labl, "L",
+					   CODE_LABEL_NUMBER (labl));
+  ASM_OUTPUT_TYPE_DIRECTIVE (stream, s_labl,
+			     AVR_HAVE_JMP_CALL ? "object" : "function");
+
   targetm.asm_out.internal_label (stream, "L", CODE_LABEL_NUMBER (labl));
 
   // Output the table's content.
@@ -14911,8 +14847,8 @@ avr_addr_space_legitimate_address_p (machine_mode mode, rtx x, bool strict,
   if (avr_log.legitimate_address_p)
     {
       avr_edump ("\n%?: ret=%b, mode=%m strict=%d "
-		 "reload_completed=%d ra_in_progress=%d %s:",
-		 ok, mode, strict, reload_completed, ra_in_progress (),
+		 "reload_completed=%d lra_in_progress=%d %s:",
+		 ok, mode, strict, reload_completed, lra_in_progress,
 		 reg_renumber ? "(reg_renumber)" : "");
 
       if (GET_CODE (x) == PLUS
@@ -14988,10 +14924,11 @@ avr_addr_space_convert (rtx src, tree type_old, tree type_new)
 
       /* Linearize memory: RAM has bit 23 set.  When as_new = __flashx then
 	 this is basically UB since __flashx mistreats RAM addresses, but there
-	 is no way to bail out.  (Though -Waddr-space-convert will tell.)  */
+	 is no way to bail out.  (Though -Waddr-space-convert will tell.)
+	 ...but PR121277 is confusing, in particular when NULL is coming in. */
 
       int msb = ADDR_SPACE_GENERIC_P (as_old)
-	? 0x80
+	? as_new == ADDR_SPACE_MEMX ? 0x80 : 0x00
 	: avr_addrspace[as_old].segment;
 
       src = force_reg (Pmode, src);
@@ -15089,10 +15026,16 @@ avr_convert_to_type (tree type, tree expr)
 	  const char *name_old = avr_addrspace[as_old].name;
 	  const char *name_new = avr_addrspace[as_new].name;
 
-	  warning (OPT_Waddr_space_convert,
-		   "conversion from address space %qs to address space %qs",
-		   ADDR_SPACE_GENERIC_P (as_old) ? "generic" : name_old,
-		   ADDR_SPACE_GENERIC_P (as_new) ? "generic" : name_new);
+	  // Be relaxed when NULL is used, and when 0x0 stands for
+	  // address 0x0.
+	  bool nowarn = (expr == null_pointer_node
+			 && (as_new == ADDR_SPACE_FLASHX
+			     || as_new == ADDR_SPACE_FLASH));
+	  if (!nowarn)
+	    warning (OPT_Waddr_space_convert,
+		     "conversion from address space %qs to address space %qs",
+		     ADDR_SPACE_GENERIC_P (as_old) ? "generic" : name_old,
+		     ADDR_SPACE_GENERIC_P (as_new) ? "generic" : name_new);
 
 	  return fold_build1_loc (loc, ADDR_SPACE_CONVERT_EXPR, type, expr);
 	}
@@ -16683,15 +16626,6 @@ avr_unwind_word_mode ()
   return Pmode;
 }
 
-
-/* Implement `TARGET_LRA_P'.  */
-
-static bool
-avr_use_lra_p ()
-{
-  return avropt_lra_p;
-}
-
 
 
 /* Initialize the GCC target structure.  */
@@ -16833,9 +16767,6 @@ avr_use_lra_p ()
 #undef  TARGET_CONVERT_TO_TYPE
 #define TARGET_CONVERT_TO_TYPE avr_convert_to_type
 
-#undef TARGET_LRA_P
-#define TARGET_LRA_P avr_use_lra_p
-
 #undef  TARGET_ADDR_SPACE_SUBSET_P
 #define TARGET_ADDR_SPACE_SUBSET_P avr_addr_space_subset_p
 
diff --git a/gcc/config/avr/avr.h b/gcc/config/avr/avr.h
index cb818c3..335f9fa5 100644
--- a/gcc/config/avr/avr.h
+++ b/gcc/config/avr/avr.h
@@ -309,12 +309,6 @@ enum reg_class {
 
 #define STATIC_CHAIN_REGNUM ((AVR_TINY) ? 18 :2)
 
-#define RELOAD_ELIMINABLE_REGS {				\
-    { ARG_POINTER_REGNUM, STACK_POINTER_REGNUM },               \
-    { ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM },               \
-    { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM },             \
-    { FRAME_POINTER_REGNUM + 1, STACK_POINTER_REGNUM + 1 } }
-
 #define ELIMINABLE_REGS						\
   {								\
     { ARG_POINTER_REGNUM, STACK_POINTER_REGNUM },		\
@@ -358,18 +352,6 @@ typedef struct avr_args
 
 #define MAX_REGS_PER_ADDRESS 1
 
-#define LEGITIMIZE_RELOAD_ADDRESS(X,MODE,OPNUM,TYPE,IND_L,WIN)          \
-  do {                                                                  \
-    rtx new_x = avr_legitimize_reload_address (&(X), MODE, OPNUM, TYPE, \
-                                               ADDR_TYPE (TYPE),        \
-                                               IND_L, make_memloc);     \
-    if (new_x)                                                          \
-      {                                                                 \
-        X = new_x;                                                      \
-        goto WIN;                                                       \
-      }                                                                 \
-  } while (0)
-
 /* We increase branch costs after reload in order to keep basic-block
    reordering from introducing out-of-line jumps and to prefer fall-through
    edges instead.  The default branch costs are 0, mainly because otherwise
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index 1c4e44d..60b1f60 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -574,9 +574,8 @@
    && REG_Z == REGNO (XEXP (operands[0], 0))"
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:MOVMODE 22)
-                   (match_dup 0))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*load_<mode>_libgcc"
   [(set (reg:MOVMODE 22)
@@ -716,17 +715,15 @@
     || avr_load_libgcc_insn_p (insn, ADDR_SPACE_FLASHX, true)"
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:MOVMODE REG_22)
-                   (match_dup 0))
-              (clobber (reg:CC REG_CC))])]
-  {
-    operands[0] = SET_SRC (single_set (curr_insn));
-  })
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*fxload_<mode>_libgcc"
   [(set (reg:MOVMODE REG_22)
         (mem:MOVMODE (lo_sum:PSI (reg:QI REG_21)
                                  (reg:HI REG_Z))))
+   (clobber (reg:QI REG_21))
+   (clobber (reg:HI REG_Z))
    (clobber (reg:CC REG_CC))]
   "reload_completed
    && (avr_load_libgcc_insn_p (insn, ADDR_SPACE_MEMX, true)
@@ -849,9 +846,8 @@
     || reg_or_0_operand (operands[1], <MODE>mode)"
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (match_dup 1))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 ;; "movqi_insn"
 ;; "movqq_insn" "movuqq_insn"
@@ -960,9 +956,8 @@
    || reg_or_0_operand (operands[1], <MODE>mode)"
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (match_dup 1))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*mov<mode>"
   [(set (match_operand:ALL2 0 "nonimmediate_operand" "=r,r  ,r,m    ,d,*r,q,r")
@@ -1133,9 +1128,8 @@
    || const0_rtx == operands[1]"
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (match_dup 1))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*movpsi"
   [(set (match_operand:PSI 0 "nonimmediate_operand" "=r,r,r ,Qm,!d,r")
@@ -1193,9 +1187,8 @@
    || reg_or_0_operand (operands[1], <MODE>mode)"
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (match_dup 1))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*mov<mode>"
   [(set (match_operand:ALL4 0 "nonimmediate_operand" "=r,r  ,r ,Qm   ,!d,r")
@@ -1241,9 +1234,8 @@
    || reg_or_0_operand (operands[1], SFmode)"
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (match_dup 1))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*movsf"
   [(set (match_operand:SF 0 "nonimmediate_operand" "=r,r,r ,Qm,!d,r")
@@ -1322,16 +1314,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (mem:BLK (reg:HI REG_X))
-                   (mem:BLK (reg:HI REG_Z)))
-              (unspec [(match_dup 0)]
-                      UNSPEC_CPYMEM)
-              (use (match_dup 1))
-              (clobber (reg:HI REG_X))
-              (clobber (reg:HI REG_Z))
-              (clobber (reg:QI LPM_REGNO))
-              (clobber (match_dup 2))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*cpymem_<mode>"
   [(set (mem:BLK (reg:HI REG_X))
@@ -1378,22 +1362,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (mem:BLK (reg:HI REG_X))
-                   (match_dup 2))
-              (unspec [(match_dup 0)]
-                      UNSPEC_CPYMEM)
-              (use (reg:QIHI 24))
-              (clobber (reg:HI REG_X))
-              (clobber (reg:HI REG_Z))
-              (clobber (reg:QI LPM_REGNO))
-              (clobber (reg:HI 24))
-              (clobber (reg:QI 23))
-              (clobber (mem:QI (match_dup 1)))
-              (clobber (reg:CC REG_CC))])]
-  {
-    rtx xset = XVECEXP (PATTERN (curr_insn), 0, 0);
-    operands[2] = SET_SRC (xset);
-  })
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*cpymemx_<mode>"
   [(set (mem:BLK (reg:HI REG_X))
@@ -1457,13 +1427,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (mem:BLK (match_dup 0))
-                   (const_int 0))
-              (use (match_dup 1))
-              (use (match_dup 2))
-              (clobber (match_dup 3))
-              (clobber (match_dup 4))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*clrmemqi"
   [(set (mem:BLK (match_operand:HI 0 "register_operand" "e"))
@@ -1488,14 +1453,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (mem:BLK (match_dup 0))
-                   (const_int 0))
-              (use (match_dup 1))
-              (use (match_dup 2))
-              (clobber (match_dup 3))
-              (clobber (match_dup 4))
-              (clobber (reg:CC REG_CC))])]
-  ""
+  [(scratch)]
+  { DONE_ADD_CCC }
   [(set_attr "isa" "adiw,*")])
 
 
@@ -1546,13 +1505,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel
-      [(set (match_dup 0)
-            (unspec:HI [(mem:BLK (match_dup 1))
-                        (const_int 0)
-                        (match_dup 2)]
-                       UNSPEC_STRLEN))
-       (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*strlenhi"
   [(set (match_operand:HI 0 "register_operand"                      "=e")
@@ -1577,10 +1531,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (plus:ALL1 (match_dup 1)
-                              (match_dup 2)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*add<mode>3"
   [(set (match_operand:ALL1 0 "register_operand"            "=r,d    ,r  ,r  ,r  ,r")
@@ -1636,10 +1588,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (plus:HI (zero_extend:HI (match_dup 1))
-                            (match_dup 2)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*addhi3_zero_extend"
   [(set (match_operand:HI 0 "register_operand"                         "=r,*?r")
@@ -1659,10 +1609,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (plus:HI (match_dup 1)
-                            (zero_extend:HI (match_dup 2))))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*addhi3_zero_extend1"
   [(set (match_operand:HI 0 "register_operand"                         "=r")
@@ -1680,10 +1628,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (plus:HI (zero_extend:HI (match_dup 1))
-                            (match_dup 2)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*addhi3_zero_extend.const"
   [(set (match_operand:HI 0 "register_operand"                         "=d")
@@ -1719,11 +1665,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (plus:HI (ashift:HI (zero_extend:HI (match_dup 1))
-                                       (const_int 1))
-                            (match_dup 2)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*addhi3_zero_extend.ashift1"
   [(set (match_operand:HI 0 "register_operand"                                    "=r")
@@ -1748,11 +1691,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (plus:HI (zero_extend:HI (match_dup 1))
-                            (zero_extend:HI (match_dup 2))))
-              (clobber (reg:CC REG_CC))])])
-
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*usum_widenqihi3"
   [(set (match_operand:HI 0 "register_operand"                          "=r")
@@ -1770,10 +1710,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (minus:HI (zero_extend:HI (match_dup 1))
-                             (zero_extend:HI (match_dup 2))))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*udiff_widenqihi3"
   [(set (match_operand:HI 0 "register_operand"                           "=r")
@@ -1793,7 +1731,7 @@
     return avr_out_addto_sp (operands, NULL);
   }
   ""
-  [(const_int 0)]
+  [(scratch)]
   {
     // Do not attempt to split this pattern. This FAIL is necessary
     // to prevent the splitter from matching *add<ALL2>3_split, splitting
@@ -1905,11 +1843,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (plus:ALL2 (match_dup 1)
-                              (match_dup 2)))
-              (clobber (match_dup 3))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 ;; "*addhi3_clobber"
 ;; "*addhq3_clobber"  "*adduhq3_clobber"
@@ -1939,11 +1874,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (plus:ALL4 (match_dup 1)
-                              (match_dup 2)))
-              (clobber (match_dup 3))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*add<mode>3"
   [(set (match_operand:ALL4 0 "register_operand"          "=??r,d ,r")
@@ -1975,10 +1907,8 @@
    && (<HISI:SIZE> > 2 || <CODE> == SIGN_EXTEND)"
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (plus:HISI (any_extend:HISI (match_dup 1))
-                              (match_dup 2)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 ;;                            "*addhi3.sign_extend.qi"
 ;; "*addpsi3.zero_extend.qi"  "*addpsi3.sign_extend.qi"
@@ -2015,10 +1945,8 @@
   "<HISI:SIZE> > <QIPSI:SIZE>"
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (minus:HISI (match_dup 1)
-                               (any_extend:HISI (match_dup 2))))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 ;; "*subhi3.zero_extend.qi"   "*subhi3.sign_extend.qi"
 ;; "*subpsi3.zero_extend.qi"  "*subpsi3.sign_extend.qi"
@@ -2049,11 +1977,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (plus:PSI (match_dup 1)
-                             (match_dup 2)))
-              (clobber (match_dup 3 ))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*addpsi3"
   [(set (match_operand:PSI 0 "register_operand"         "=??r,d ,d,r")
@@ -2075,10 +2000,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (minus:PSI (match_dup 1)
-                              (match_dup 2)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*subpsi3"
   [(set (match_operand:PSI 0 "register_operand"           "=r")
@@ -2102,10 +2025,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (minus:ALL1 (match_dup 1)
-                               (match_dup 2)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*sub<mode>3"
   [(set (match_operand:ALL1 0 "register_operand"                    "=??r,d    ,r  ,r  ,r  ,r")
@@ -2133,11 +2054,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (minus:ALL2 (match_dup 1)
-                               (match_dup 2)))
-              (clobber (match_dup 3))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*sub<mode>3"
   [(set (match_operand:ALL2 0 "register_operand"                    "=??r,d    ,*r")
@@ -2163,11 +2081,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (minus:ALL4 (match_dup 1)
-                               (match_dup 2)))
-              (clobber (match_dup 3))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*sub<mode>3"
   [(set (match_operand:ALL4 0 "register_operand"                    "=??r,d    ,r")
@@ -2205,10 +2120,8 @@
   "AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (mult:QI (match_dup 1)
-                            (match_dup 2)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*mulqi3_enh"
   [(set (match_operand:QI 0 "register_operand" "=r")
@@ -2239,10 +2152,8 @@
   "!AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:QI 24)
-                   (mult:QI (reg:QI 24) (reg:QI 22)))
-              (clobber (reg:QI 22))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*mulqi3_call"
   [(set (reg:QI 24)
@@ -2265,12 +2176,8 @@
   "AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (truncate:QI
-                    (lshiftrt:HI (mult:HI (any_extend:HI (match_dup 1))
-                                          (any_extend:HI (match_dup 2)))
-                                 (const_int 8))))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*<extend_su>mulqi3_highpart"
   [(set (match_operand:QI 0 "register_operand"                                       "=r")
@@ -2357,21 +2264,21 @@
                                         (const_int 0))))
               (clobber (reg:CC REG_CC))])])
 
-;; *subqi3.lt0   *subqi3.ge0
-;; *subhi3.lt0   *subhi3.ge0
-;; *subpsi3.lt0  *subpsi3.ge0
-;; *subsi3.lt0   *subsi3.ge0
-(define_insn "*sub<QISI:mode>3.<code>0"
-  [(set (match_operand:QISI 0 "register_operand"                        "=r")
-        (minus:QISI (match_operand:QISI 1 "register_operand"             "0")
-                    (gelt:QISI (match_operand:QISI2 2 "register_operand" "r")
-                               (const_int 0))))
-   (clobber (reg:CC REG_CC))]
-  "reload_completed"
-  {
-    return avr_out_add_msb (insn, operands, <CODE>, nullptr);
-  }
-  [(set_attr "adjust_len" "add_<code>0")])
+;; *addqi3.lt0_split   *addqi3.ge0_split
+;; *addhi3.lt0_split   *addhi3.ge0_split
+;; *addpsi3.lt0_split  *addpsi3.ge0_split
+;; *addsi3.lt0_split   *addsi3.ge0_split
+(define_insn_and_split "*add<QISI:mode>3.<code>0_split"
+  [(set (match_operand:QISI 0 "register_operand"                       "=r")
+        (plus:QISI (gelt:QISI (match_operand:QISI2 1 "register_operand" "r")
+                              (const_int 0))
+                   (match_operand:QISI 2 "register_operand"             "0")))]
+  ""
+  "#"
+  "&& reload_completed"
+  ; *add<QISI:mode>3.<code>0
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 ;; *addqi3.lt0   *addqi3.ge0
 ;; *addhi3.lt0   *addhi3.ge0
@@ -2389,25 +2296,6 @@
   }
   [(set_attr "adjust_len" "add_<code>0")])
 
-;; *addqi3.lt0_split   *addqi3.ge0_split
-;; *addhi3.lt0_split   *addhi3.ge0_split
-;; *addpsi3.lt0_split  *addpsi3.ge0_split
-;; *addsi3.lt0_split   *addsi3.ge0_split
-(define_insn_and_split "*add<QISI:mode>3.<code>0_split"
-  [(set (match_operand:QISI 0 "register_operand"                       "=r")
-        (plus:QISI (gelt:QISI (match_operand:QISI2 1 "register_operand" "r")
-                              (const_int 0))
-                   (match_operand:QISI 2 "register_operand"             "0")))]
-  ""
-  "#"
-  "&& reload_completed"
-  [; *add<QISI:mode>3.<code>0
-   (parallel [(set (match_dup 0)
-                   (plus:QISI (gelt:QISI (match_dup 1)
-                                         (const_int 0))
-                              (match_dup 2)))
-              (clobber (reg:CC REG_CC))])])
-
 ;; *subqi3.lt0_split   *subqi3.ge0_split
 ;; *subhi3.lt0_split   *subhi3.ge0_split
 ;; *subpsi3.lt0_split  *subpsi3.ge0_split
@@ -2420,13 +2308,25 @@
   ""
   "#"
   "&& reload_completed"
-  [; *sub<QISI:mode>3.<code>0
-   (parallel [(set (match_dup 0)
-                   (minus:QISI (match_dup 1)
-                               (gelt:QISI (match_dup 2)
-                                          (const_int 0))))
-              (clobber (reg:CC REG_CC))])])
+  ; *sub<QISI:mode>3.<code>0
+  [(scratch)]
+  { DONE_ADD_CCC })
 
+;; *subqi3.lt0   *subqi3.ge0
+;; *subhi3.lt0   *subhi3.ge0
+;; *subpsi3.lt0  *subpsi3.ge0
+;; *subsi3.lt0   *subsi3.ge0
+(define_insn "*sub<QISI:mode>3.<code>0"
+  [(set (match_operand:QISI 0 "register_operand"                        "=r")
+        (minus:QISI (match_operand:QISI 1 "register_operand"             "0")
+                    (gelt:QISI (match_operand:QISI2 2 "register_operand" "r")
+                               (const_int 0))))
+   (clobber (reg:CC REG_CC))]
+  "reload_completed"
+  {
+    return avr_out_add_msb (insn, operands, <CODE>, nullptr);
+  }
+  [(set_attr "adjust_len" "add_<code>0")])
 
 (define_insn_and_split "*umulqihi3.call_split"
   [(set (reg:HI 24)
@@ -2437,12 +2337,8 @@
   "!AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:HI 24)
-                   (mult:HI (zero_extend:HI (reg:QI 22))
-                            (zero_extend:HI (reg:QI 24))))
-              (clobber (reg:QI 21))
-              (clobber (reg:HI 22))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*umulqihi3.call"
   [(set (reg:HI 24)
@@ -2465,10 +2361,8 @@
   "AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (mult:HI (any_extend:HI (match_dup 1))
-                            (any_extend:HI (match_dup 2))))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "<extend_u>mulqihi3"
   [(set (match_operand:HI 0 "register_operand"                         "=r")
@@ -2488,10 +2382,8 @@
   "AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (mult:HI (zero_extend:HI (match_dup 1))
-                            (sign_extend:HI (match_dup 2))))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*usmulqihi3"
   [(set (match_operand:HI 0 "register_operand"                         "=r")
@@ -2513,10 +2405,8 @@
   "AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (mult:HI (sign_extend:HI (match_dup 1))
-                            (zero_extend:HI (match_dup 2))))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*sumulqihi3"
   [(set (match_operand:HI 0 "register_operand"                         "=r")
@@ -2538,10 +2428,8 @@
   "AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (mult:HI (not:HI (zero_extend:HI (not:QI (match_dup 1))))
-                            (sign_extend:HI (match_dup 2))))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*osmulqihi3"
   [(set (match_operand:HI 0 "register_operand"                                        "=&r")
@@ -2562,10 +2450,8 @@
   "AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (mult:HI (not:HI (zero_extend:HI (not:QI (match_dup 1))))
-                            (zero_extend:HI (match_dup 2))))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*oumulqihi3"
   [(set (match_operand:HI 0 "register_operand"                                        "=&r")
@@ -2592,11 +2478,8 @@
   "AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (plus:QI (mult:QI (match_dup 1)
-                                     (match_dup 2))
-                            (match_dup 3)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*maddqi4"
   [(set (match_operand:QI 0 "register_operand"                  "=r")
@@ -2618,11 +2501,8 @@
   "AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (minus:QI (match_dup 3)
-                             (mult:QI (match_dup 1)
-                                      (match_dup 2))))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*msubqi4"
   [(set (match_operand:QI 0 "register_operand"                   "=r")
@@ -2701,11 +2581,8 @@
   "AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (plus:HI (mult:HI (any_extend:HI (match_dup 1))
-                                     (any_extend:HI (match_dup 2)))
-                            (match_dup 3)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*<extend_u>maddqihi4"
   [(set (match_operand:HI 0 "register_operand"                                  "=r")
@@ -2730,11 +2607,8 @@
   "AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (minus:HI (match_dup 3)
-                             (mult:HI (any_extend:HI (match_dup 1))
-                                      (any_extend:HI (match_dup 2)))))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*<extend_u>msubqihi4"
   [(set (match_operand:HI 0 "register_operand"                                  "=r")
@@ -2761,11 +2635,8 @@
    && <any_extend:CODE> != <any_extend2:CODE>"
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (plus:HI (mult:HI (any_extend:HI  (match_dup 1))
-                                     (any_extend2:HI (match_dup 2)))
-                            (match_dup 3)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*<any_extend:extend_su><any_extend2:extend_su>msubqihi4"
   [(set (match_operand:HI 0 "register_operand"                                  "=r")
@@ -2796,11 +2667,8 @@
    && <any_extend:CODE> != <any_extend2:CODE>"
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (minus:HI (match_dup 3)
-                             (mult:HI (any_extend:HI  (match_dup 1))
-                                      (any_extend2:HI (match_dup 2)))))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*<any_extend:extend_su><any_extend2:extend_su>msubqihi4"
   [(set (match_operand:HI 0 "register_operand"                                   "=r")
@@ -3068,16 +2936,14 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (ashift:HI (sign_extend:HI (match_dup 1))
-                              (const_int 1)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*ashiftqihi2.signx.1"
   [(set (match_operand:HI 0 "register_operand"                           "=r,*r")
         (ashift:HI (sign_extend:HI (match_operand:QI 1 "register_operand" "0,r"))
                    (const_int 1)))
-   (clobber (reg:CC REG_CC)) ]
+   (clobber (reg:CC REG_CC))]
   "reload_completed"
   "@
 	lsl %A0\;sbc %B0,%B0
@@ -3138,6 +3004,41 @@
     operands[2] = gen_int_mode (1 << INTVAL (operands[2]), QImode);
   })
 
+(define_insn_and_split "*sext.ashift<QIPSI:mode><HISI:mode>2_split"
+  [(set (match_operand:HISI 0 "register_operand"                                 "=r")
+        (sign_extend:HISI (ashift:QIPSI (match_operand:QIPSI 1 "register_operand" "0")
+                                        (match_operand:QI 2 "const_int_operand" "PKC03"))))]
+  "<HISI:SIZE> > <QIPSI:SIZE>
+   && IN_RANGE (INTVAL (operands[2]), 1, 2 + (<QIPSI:SIZE> <= 2))"
+  "#"
+  "&& reload_completed"
+  [(scratch)]
+  { DONE_ADD_CCC })
+
+(define_insn "*sext.ashift<QIPSI:mode><HISI:mode>2"
+  [(set (match_operand:HISI 0 "register_operand"                                 "=r")
+        (sign_extend:HISI (ashift:QIPSI (match_operand:QIPSI 1 "register_operand" "0")
+                                        (match_operand:QI 2 "const_int_operand" "PKC03"))))
+   (clobber (reg:CC REG_CC))]
+  "reload_completed
+   && <HISI:SIZE> > <QIPSI:SIZE>
+   && IN_RANGE (INTVAL (operands[2]), 1, 2 + (<QIPSI:SIZE> <= 2))"
+  {
+    const int regno = REGNO (operands[0]);
+    // The shift.
+    for (int s = 0; s < (int) INTVAL (operands[2]); ++s)
+      for (int b = 0; b < <QIPSI:SIZE>; ++b)
+        output_asm_insn (b == 0 ? "lsl %0" : "rol %0",
+                         &all_regs_rtx[regno + b]);
+    // Sign-extend can use carry.
+    for (int b = <QIPSI:SIZE>; b < <HISI:SIZE>; ++b)
+      output_asm_insn ("sbc %0,%0", &all_regs_rtx[regno + b]);
+    return "";
+  }
+  [(set (attr "length")
+        (plus (symbol_ref "<QIPSI:SIZE> * INTVAL (operands[2])")
+              (symbol_ref "<HISI:SIZE> - <QIPSI:SIZE>")))])
+
 ;******************************************************************************
 ; mul HI: $1 = sign-/zero-/one-extend, $2 = reg
 ;******************************************************************************
@@ -3149,10 +3050,8 @@
   "AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (mult:HI (sign_extend:HI (match_dup 1))
-                            (match_dup 2)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*mulsqihi3"
   [(set (match_operand:HI 0 "register_operand"                        "=&r")
@@ -3174,10 +3073,8 @@
   "AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (mult:HI (zero_extend:HI (match_dup 1))
-                            (match_dup 2)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*muluqihi3"
   [(set (match_operand:HI 0 "register_operand"                        "=&r")
@@ -3201,10 +3098,8 @@
   "AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (mult:HI (not:HI (zero_extend:HI (not:QI (match_dup 1))))
-                            (match_dup 2)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*muloqihi3"
   [(set (match_operand:HI 0 "register_operand"                                        "=&r")
@@ -3273,10 +3168,8 @@
   "AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (mult:HI (match_dup 1)
-                            (match_dup 2)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*mulhi3_enh"
   [(set (match_operand:HI 0 "register_operand" "=&r")
@@ -3315,11 +3208,8 @@
   "!AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:HI 24)
-                   (mult:HI (reg:HI 24) (reg:HI 22)))
-              (clobber (reg:HI 22))
-              (clobber (reg:QI 21))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*mulhi3_call"
   [(set (reg:HI 24)
@@ -3715,11 +3605,8 @@
   "AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:SI 22)
-                   (mult:SI (reg:SI 22)
-                            (reg:SI 18)))
-              (clobber (reg:HI 26))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn_and_split "*mulsi3_call_pr118012_split"
   [(set (reg:SI 22)
@@ -3733,13 +3620,8 @@
    && ! AVR_TINY"
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:SI 22)
-                   (mult:SI (reg:SI 22)
-                            (reg:SI 18)))
-              (clobber (reg:SI 18))
-              (clobber (reg:HI 26))
-              (clobber (reg:HI 30))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*mulsi3_call"
   [(set (reg:SI 22)
@@ -3775,10 +3657,8 @@
   "AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:SI 22)
-                   (mult:SI (any_extend:SI (reg:HI 18))
-                            (any_extend:SI (reg:HI 26))))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*<extend_u>mulhisi3_call"
   [(set (reg:SI 22)
@@ -3800,12 +3680,8 @@
   "AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:HI 24)
-                   (truncate:HI (lshiftrt:SI (mult:SI (any_extend:SI (reg:HI 18))
-                                                      (any_extend:SI (reg:HI 26)))
-                                             (const_int 16))))
-              (clobber (reg:HI 22))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*<extend_su>mulhi3_highpart_call"
   [(set (reg:HI 24)
@@ -3825,10 +3701,8 @@
   "AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:SI 22)
-                   (mult:SI (zero_extend:SI (reg:HI 18))
-                            (sign_extend:SI (reg:HI 26))))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*usmulhisi3_call"
   [(set (reg:SI 22)
@@ -3846,10 +3720,8 @@
   "AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:SI 22)
-                   (mult:SI (any_extend:SI (reg:HI 26))
-                            (reg:SI 18)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*mul<extend_su>hisi3_call"
   [(set (reg:SI 22)
@@ -3867,10 +3739,8 @@
   "AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:SI 22)
-                   (mult:SI (not:SI (zero_extend:SI (not:HI (reg:HI 26))))
-                            (reg:SI 18)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*mulohisi3_call"
   [(set (reg:SI 22)
@@ -3921,11 +3791,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:QI 24) (div:QI (reg:QI 24) (reg:QI 22)))
-              (set (reg:QI 25) (mod:QI (reg:QI 24) (reg:QI 22)))
-              (clobber (reg:QI 22))
-              (clobber (reg:QI 23))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*divmodqi4_call"
   [(set (reg:QI 24) (div:QI (reg:QI 24) (reg:QI 22)))
@@ -3965,10 +3832,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:QI 24) (udiv:QI (reg:QI 24) (reg:QI 22)))
-              (set (reg:QI 25) (umod:QI (reg:QI 24) (reg:QI 22)))
-              (clobber (reg:QI 23))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*udivmodqi4_call"
   [(set (reg:QI 24) (udiv:QI (reg:QI 24) (reg:QI 22)))
@@ -4009,11 +3874,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:HI 22) (div:HI (reg:HI 24) (reg:HI 22)))
-              (set (reg:HI 24) (mod:HI (reg:HI 24) (reg:HI 22)))
-              (clobber (reg:HI 26))
-              (clobber (reg:QI 21))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*divmodhi4_call"
   [(set (reg:HI 22) (div:HI (reg:HI 24) (reg:HI 22)))
@@ -4055,11 +3917,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:HI 22) (udiv:HI (reg:HI 24) (reg:HI 22)))
-              (set (reg:HI 24) (umod:HI (reg:HI 24) (reg:HI 22)))
-              (clobber (reg:HI 26))
-              (clobber (reg:QI 21))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*udivmodhi4_call"
   [(set (reg:HI 22) (udiv:HI (reg:HI 24) (reg:HI 22)))
@@ -4108,10 +3967,8 @@
   "AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (mult:PSI (zero_extend:PSI (match_dup 1))
-                             (zero_extend:PSI (match_dup 2))))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*umulqihipsi3"
   [(set (match_operand:PSI 0 "register_operand"                         "=&r")
@@ -4130,31 +3987,17 @@
 
 (define_insn_and_split "*umulhiqipsi3_split"
   [(set (match_operand:PSI 0 "register_operand"                         "=&r")
-        (mult:PSI (zero_extend:PSI (match_operand:HI 2 "register_operand" "r"))
-                  (zero_extend:PSI (match_operand:QI 1 "register_operand" "r"))))]
+        (mult:PSI (zero_extend:PSI (match_operand:HI 1 "register_operand" "r"))
+                  (zero_extend:PSI (match_operand:QI 2 "register_operand" "r"))))]
   "AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
+  ; "*umulqihipsi3"
   [(parallel [(set (match_dup 0)
                    (mult:PSI (zero_extend:PSI (match_dup 2))
                              (zero_extend:PSI (match_dup 1))))
               (clobber (reg:CC REG_CC))])])
 
-(define_insn "*umulhiqipsi3"
-  [(set (match_operand:PSI 0 "register_operand"                         "=&r")
-        (mult:PSI (zero_extend:PSI (match_operand:HI 2 "register_operand" "r"))
-                  (zero_extend:PSI (match_operand:QI 1 "register_operand" "r"))))
-   (clobber (reg:CC REG_CC))]
-  "AVR_HAVE_MUL && reload_completed"
-  "mul %1,%A2
-	movw %A0,r0
-	mul %1,%B2
-	add %B0,r0
-	mov %C0,r1
-	clr __zero_reg__
-	adc %C0,__zero_reg__"
-  [(set_attr "length" "7")])
-
 (define_expand "mulsqipsi3"
   [(parallel [(set (match_operand:PSI 0 "pseudo_register_operand" "")
                    (mult:PSI (sign_extend:PSI (match_operand:QI 1 "pseudo_register_operand" ""))
@@ -4225,10 +4068,8 @@
   "AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:PSI 18)
-                   (mult:PSI (sign_extend:PSI (reg:QI 25))
-                             (reg:PSI 22)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*mulsqipsi3.libgcc"
   [(set (reg:PSI 18)
@@ -4249,13 +4090,8 @@
   "AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:PSI 22)
-                   (mult:PSI (reg:PSI 22)
-                             (reg:PSI 18)))
-              (clobber (reg:QI 21))
-              (clobber (reg:QI 25))
-              (clobber (reg:HI 26))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*mulpsi3.libgcc"
   [(set (reg:PSI 22)
@@ -4307,12 +4143,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:PSI 22) (div:PSI (reg:PSI 22) (reg:PSI 18)))
-              (set (reg:PSI 18) (mod:PSI (reg:PSI 22) (reg:PSI 18)))
-              (clobber (reg:QI 21))
-              (clobber (reg:QI 25))
-              (clobber (reg:QI 26))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*divmodpsi4_call"
   [(set (reg:PSI 22) (div:PSI (reg:PSI 22) (reg:PSI 18)))
@@ -4356,12 +4188,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:PSI 22) (udiv:PSI (reg:PSI 22) (reg:PSI 18)))
-              (set (reg:PSI 18) (umod:PSI (reg:PSI 22) (reg:PSI 18)))
-              (clobber (reg:QI 21))
-              (clobber (reg:QI 25))
-              (clobber (reg:QI 26))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*udivmodpsi4_call"
   [(set (reg:PSI 22) (udiv:PSI (reg:PSI 22) (reg:PSI 18)))
@@ -4407,11 +4235,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:SI 18) (div:SI (reg:SI 22) (reg:SI 18)))
-              (set (reg:SI 22) (mod:SI (reg:SI 22) (reg:SI 18)))
-              (clobber (reg:HI 26))
-              (clobber (reg:HI 30))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*divmodsi4_call"
   [(set (reg:SI 18) (div:SI (reg:SI 22) (reg:SI 18)))
@@ -4454,11 +4279,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:SI 18) (udiv:SI (reg:SI 22) (reg:SI 18)))
-              (set (reg:SI 22) (umod:SI (reg:SI 22) (reg:SI 18)))
-              (clobber (reg:HI 26))
-              (clobber (reg:HI 30))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*udivmodsi4_call"
   [(set (reg:SI 18) (udiv:SI (reg:SI 22) (reg:SI 18)))
@@ -4480,10 +4302,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (and:QI (match_dup 1)
-                           (match_dup 2)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*andqi3"
   [(set (match_operand:QI 0 "register_operand"       "=??r,d,*l ,r")
@@ -4507,11 +4327,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (and:HI (match_dup 1)
-                           (match_dup 2)))
-              (clobber (match_dup 3))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*andhi3"
   [(set (match_operand:HI 0 "register_operand"       "=??r,d,d,r  ,r  ,r")
@@ -4541,11 +4358,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (and:PSI (match_dup 1)
-                            (match_dup 2)))
-              (clobber (match_dup 3))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*andpsi3"
   [(set (match_operand:PSI 0 "register_operand"        "=??r,d,r  ,r  ,r")
@@ -4576,11 +4390,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (and:SI (match_dup 1)
-                           (match_dup 2)))
-              (clobber (match_dup 3))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*andsi3"
   [(set (match_operand:SI 0 "register_operand"       "=??r,d,r  ,r  ,r")
@@ -4630,10 +4441,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (ior:QI (match_dup 1)
-                           (match_dup 2)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*iorqi3"
   [(set (match_operand:QI 0 "register_operand"       "=??r,d,*l")
@@ -4655,11 +4464,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (ior:HI (match_dup 1)
-                           (match_dup 2)))
-              (clobber (match_dup 3))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*iorhi3"
   [(set (match_operand:HI 0 "register_operand"       "=??r,d,d,r  ,r")
@@ -4687,11 +4493,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (ior:PSI (match_dup 1)
-                            (match_dup 2)))
-              (clobber (match_dup 3))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*iorpsi3"
   [(set (match_operand:PSI 0 "register_operand"        "=??r,d,r  ,r")
@@ -4719,11 +4522,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (ior:SI (match_dup 1)
-                           (match_dup 2)))
-              (clobber (match_dup 3))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*iorsi3"
   [(set (match_operand:SI 0 "register_operand"       "=??r,d,r  ,r")
@@ -4754,10 +4554,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (xor:QI (match_dup 1)
-                           (match_dup 2)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*xorqi3"
   [(set (match_operand:QI 0 "register_operand" "=r")
@@ -4776,11 +4574,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (xor:HI (match_dup 1)
-                           (match_dup 2)))
-              (clobber (match_dup 3))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*xorhi3"
   [(set (match_operand:HI 0 "register_operand"       "=??r,r  ,d  ,r")
@@ -4806,11 +4601,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (xor:PSI (match_dup 1)
-                            (match_dup 2)))
-              (clobber (match_dup 3))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*xorpsi3"
   [(set (match_operand:PSI 0 "register_operand"        "=??r,r  ,d  ,r")
@@ -4838,11 +4630,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (xor:SI (match_dup 1)
-                           (match_dup 2)))
-              (clobber (match_dup 3))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*xorsi3"
   [(set (match_operand:SI 0 "register_operand"       "=??r,r  ,d  ,r")
@@ -4914,7 +4703,7 @@
               (clobber (reg:CC REG_CC))])]
   "optimize
    && reload_completed"
-  [(const_int 1)]
+  [(scratch)]
   {
     for (int i = 0; i < <SIZE>; i++)
       {
@@ -5022,10 +4811,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (rotate:QI (match_dup 1)
-                              (match_dup 2)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*rotlqi3"
   [(set (match_operand:QI 0 "register_operand"               "=r,r,r  ,r  ,r  ,r  ,r  ,r")
@@ -5095,10 +4882,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (rotate:HI (match_dup 1)
-                              (const_int 1)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*rotlhi2.1"
   [(set (match_operand:HI 0 "register_operand"           "=r")
@@ -5116,10 +4901,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (rotate:HI (match_dup 1)
-                              (const_int 15)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*rotlhi2.15"
   [(set (match_operand:HI 0 "register_operand"           "=r")
@@ -5137,10 +4920,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (rotate:PSI (match_dup 1)
-                               (const_int 1)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*rotlpsi2.1"
   [(set (match_operand:PSI 0 "register_operand"            "=r")
@@ -5158,10 +4939,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (rotate:PSI (match_dup 1)
-                               (const_int 23)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*rotlpsi2.23"
   [(set (match_operand:PSI 0 "register_operand"            "=r")
@@ -5179,10 +4958,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (rotate:SI (match_dup 1)
-                              (const_int 1)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*rotlsi2.1"
   [(set (match_operand:SI 0 "register_operand"           "=r")
@@ -5200,10 +4977,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (rotate:SI (match_dup 1)
-                              (const_int 31)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*rotlsi2.31"
   [(set (match_operand:SI 0 "register_operand"           "=r")
@@ -5235,7 +5010,7 @@
    && 0 == INTVAL (operands[2]) % 16"
   "#"
   "&& reload_completed"
-  [(const_int 0)]
+  [(scratch)]
   {
     avr_rotate_bytes (operands);
     DONE;
@@ -5259,7 +5034,7 @@
            && 0 == INTVAL (operands[2]) % 16))"
   "#"
   "&& reload_completed"
-  [(const_int 0)]
+  [(scratch)]
   {
     avr_rotate_bytes (operands);
     DONE;
@@ -5324,10 +5099,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (ashift:ALL1 (match_dup 1)
-                                (match_dup 2)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*ashl<mode>3"
   [(set (match_operand:ALL1 0 "register_operand"              "=r,r  ,r      ,r,r")
@@ -5351,11 +5124,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (ashift:ALL2 (match_dup 1)
-                                (match_dup 2)))
-              (clobber (reg:CC REG_CC))])]
-  ""
+  [(scratch)]
+  { DONE_ADD_CCC }
   [(set_attr "isa" "*,*,*,3op,*,*")])
 
 ;; "*ashlhi3"
@@ -5467,11 +5237,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (ashift:ALL4 (match_dup 1)
-                                (match_dup 2)))
-              (clobber (reg:CC REG_CC))])]
-  ""
+  [(scratch)]
+  { DONE_ADD_CCC }
   [(set_attr "isa" "*,*,*,3op,*,*")])
 
 (define_insn "*ashl<mode>3"
@@ -5710,12 +5477,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (ashift:PSI (match_dup 1)
-                               (match_dup 2)))
-              (clobber (match_dup 3))
-              (clobber (reg:CC REG_CC))])]
-  ""
+  [(scratch)]
+  { DONE_ADD_CCC }
   [(set_attr "isa" "*,*,*,3op,*")])
 
 (define_insn "*ashlpsi3"
@@ -5769,10 +5532,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (ashiftrt:ALL1 (match_dup 1)
-                                  (match_dup 2)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*ashr<mode>3"
   [(set (match_operand:ALL1 0 "register_operand"                  "=r,r              ,r      ,r")
@@ -5796,11 +5557,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (ashiftrt:ALL2 (match_dup 1)
-                                  (match_dup 2)))
-              (clobber (reg:CC REG_CC))])]
-   ""
+  [(scratch)]
+  { DONE_ADD_CCC }
    [(set_attr "isa" "*,*,*,3op,*,*")])
 
 ;; "*ashrhi3"
@@ -5827,12 +5585,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (ashiftrt:PSI (match_dup 1)
-                                 (match_dup 2)))
-              (clobber (match_dup 3))
-              (clobber (reg:CC REG_CC))])]
-  ""
+  [(scratch)]
+  { DONE_ADD_CCC }
   [(set_attr "isa" "*,*,*,3op,*")])
 
 (define_insn "*ashrpsi3"
@@ -5859,11 +5613,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (ashiftrt:ALL4 (match_dup 1)
-                                  (match_dup 2)))
-              (clobber (reg:CC REG_CC))])]
-  ""
+  [(scratch)]
+  { DONE_ADD_CCC }
   [(set_attr "isa" "*,*,*,3op,*,*")])
 
 (define_insn "*ashr<mode>3"
@@ -5974,10 +5725,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (lshiftrt:ALL1 (match_dup 1)
-                                  (match_dup 2)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*lshr<mode>3"
   [(set (match_operand:ALL1 0 "register_operand"                  "=r,r  ,r      ,r,r")
@@ -6000,11 +5749,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (lshiftrt:ALL2 (match_dup 1)
-                                  (match_dup 2)))
-              (clobber (reg:CC REG_CC))])]
-  ""
+  [(scratch)]
+  { DONE_ADD_CCC }
   [(set_attr "isa" "*,*,*,3op,*,*")])
 
 (define_insn "*lshr<mode>3"
@@ -6027,12 +5773,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (lshiftrt:PSI (match_dup 1)
-                                 (match_dup 2)))
-              (clobber (match_dup 3))
-              (clobber (reg:CC REG_CC))])]
-  ""
+  [(scratch)]
+  { DONE_ADD_CCC }
   [(set_attr "isa" "*,*,*,3op,*")])
 
 (define_insn "*lshrpsi3"
@@ -6059,11 +5801,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (lshiftrt:ALL4 (match_dup 1)
-                                  (match_dup 2)))
-              (clobber (reg:CC REG_CC))])]
-  ""
+  [(scratch)]
+  { DONE_ADD_CCC }
   [(set_attr "isa" "*,*,*,3op,*,*")])
 
 (define_insn "*lshr<mode>3"
@@ -6178,9 +5917,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (abs:QI (match_dup 1)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*absqi2"
   [(set (match_operand:QI 0 "register_operand" "=r")
@@ -6198,9 +5936,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (abs:SF (match_dup 1)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*abssf2"
   [(set (match_operand:SF 0 "register_operand" "=d,r")
@@ -6221,9 +5958,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (neg:QI (match_dup 1)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*negqi2"
   [(set (match_operand:QI 0 "register_operand" "=r")
@@ -6239,9 +5975,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (neg:HI (sign_extend:HI (match_dup 1))))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*negqihi2"
   [(set (match_operand:HI 0 "register_operand"                        "=r")
@@ -6257,9 +5992,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (neg:HI (match_dup 1)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*neghi2"
   [(set (match_operand:HI 0 "register_operand"        "=r,&r")
@@ -6277,9 +6011,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (neg:PSI (match_dup 1)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*negpsi2"
   [(set (match_operand:PSI 0 "register_operand"        "=!d,r,&r")
@@ -6298,10 +6031,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (neg:SI (match_dup 1)))
-              (clobber (reg:CC REG_CC))])]
-  ""
+  [(scratch)]
+  { DONE_ADD_CCC }
   [(set_attr "isa" "*,*,mov,movw")])
 
 (define_insn "*negsi2.libgcc"
@@ -6332,9 +6063,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (neg:SF (match_dup 1)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*negsf2"
   [(set (match_operand:SF 0 "register_operand" "=d,r")
@@ -6355,9 +6085,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (not:QI (match_dup 1)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*one_cmplqi2"
   [(set (match_operand:QI 0 "register_operand" "=r")
@@ -6373,9 +6102,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (not:HI (match_dup 1)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*one_cmplhi2"
   [(set (match_operand:HI 0 "register_operand" "=r")
@@ -6392,9 +6120,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (not:PSI (match_dup 1)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*one_cmplpsi2"
   [(set (match_operand:PSI 0 "register_operand" "=r")
@@ -6410,9 +6137,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (not:SI (match_dup 1)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*one_cmplsi2"
   [(set (match_operand:SI 0 "register_operand" "=r")
@@ -6441,9 +6167,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (sign_extend:HI (match_dup 1)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*extendqihi2"
   [(set (match_operand:HI 0 "register_operand" "=r,r")
@@ -6462,9 +6187,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (sign_extend:PSI (match_dup 1)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*extendqipsi2"
   [(set (match_operand:PSI 0 "register_operand" "=r,r")
@@ -6483,9 +6207,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (sign_extend:SI (match_dup 1)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*extendqisi2"
   [(set (match_operand:SI 0 "register_operand" "=r,r")
@@ -6504,9 +6227,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (sign_extend:PSI (match_dup 1)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*extendhipsi2"
   [(set (match_operand:PSI 0 "register_operand"                               "=r,r")
@@ -6525,9 +6247,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (sign_extend:SI (match_dup 1)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*extendhisi2"
   [(set (match_operand:SI 0 "register_operand"                               "=r,r")
@@ -6546,9 +6267,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (sign_extend:SI (match_dup 1)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*extendpsisi2"
   [(set (match_operand:SI 0 "register_operand"                                "=r")
@@ -6993,10 +6713,11 @@
    "#"
    "reload_completed"
    [(set (reg:CC REG_CC)
-         (compare:CC (match_dup 1) (match_dup 2)))
+         (compare:CC (match_dup 1)
+                     (match_dup 2)))
     (set (pc)
-         (if_then_else (match_op_dup 0
-                         [(reg:CC REG_CC) (const_int 0)])
+         (if_then_else (match_op_dup 0 [(reg:CC REG_CC)
+                                        (const_int 0)])
                        (label_ref (match_dup 3))
                        (pc)))])
 
@@ -7015,11 +6736,12 @@
    "#"
    "reload_completed"
    [(parallel [(set (reg:CC REG_CC)
-                    (compare:CC (match_dup 1) (match_dup 2)))
+                    (compare:CC (match_dup 1)
+                                (match_dup 2)))
                (clobber (match_dup 4))])
     (set (pc)
-         (if_then_else (match_op_dup 0
-                         [(reg:CC REG_CC) (const_int 0)])
+         (if_then_else (match_op_dup 0 [(reg:CC REG_CC)
+                                        (const_int 0)])
                        (label_ref (match_dup 3))
                        (pc)))]
    {
@@ -7042,11 +6764,12 @@
    "#"
    "reload_completed"
    [(parallel [(set (reg:CC REG_CC)
-                    (compare:CC (match_dup 1) (match_dup 2)))
+                    (compare:CC (match_dup 1)
+                                (match_dup 2)))
                (clobber (match_dup 4))])
     (set (pc)
-         (if_then_else (match_op_dup 0
-                         [(reg:CC REG_CC) (const_int 0)])
+         (if_then_else (match_op_dup 0 [(reg:CC REG_CC)
+                                        (const_int 0)])
                        (label_ref (match_dup 3))
                        (pc)))]
    {
@@ -7070,11 +6793,12 @@
    "#"
    "reload_completed"
    [(parallel [(set (reg:CC REG_CC)
-                    (compare:CC (match_dup 1) (match_dup 2)))
+                    (compare:CC (match_dup 1)
+                                (match_dup 2)))
                (clobber (match_dup 4))])
     (set (pc)
-         (if_then_else (match_op_dup 0
-                         [(reg:CC REG_CC) (const_int 0)])
+         (if_then_else (match_op_dup 0 [(reg:CC REG_CC)
+                                        (const_int 0)])
                        (label_ref (match_dup 3))
                        (pc)))]
    {
@@ -7629,17 +7353,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (pc)
-                   (if_then_else
-                    (match_op_dup 0
-                                  [(zero_extract:QIDI
-                                    (match_dup 1)
-                                    (const_int 1)
-                                    (match_dup 2))
-                                   (const_int 0)])
-                    (label_ref (match_dup 3))
-                    (pc)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*sbrx_branch<mode>"
   [(set (pc)
@@ -7682,13 +7397,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (pc)
-                   (if_then_else (match_op_dup 0 [(and:QISI (match_dup 1)
-                                                            (match_dup 2))
-                                                  (const_int 0)])
-                                 (label_ref (match_dup 3))
-                                 (pc)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*sbrx_and_branch<mode>"
   [(set (pc)
@@ -7929,14 +7639,8 @@
   "!AVR_HAVE_EIJMP_EICALL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (pc)
-                   (unspec:HI [(match_dup 0)]
-                              UNSPEC_INDEX_JMP))
-              (use (label_ref (match_dup 1)))
-              (clobber (match_dup 2))
-              (clobber (const_int 0))
-              (clobber (reg:CC REG_CC))])]
-  ""
+  [(scratch)]
+  { DONE_ADD_CCC }
   [(set_attr "isa" "rjmp,rjmp,jmp")])
 
 (define_insn "*tablejump"
@@ -7965,14 +7669,8 @@
   "AVR_HAVE_EIJMP_EICALL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (pc)
-                   (unspec:HI [(reg:HI REG_Z)]
-                              UNSPEC_INDEX_JMP))
-              (use (label_ref (match_dup 0)))
-              (clobber (reg:HI REG_Z))
-              (clobber (reg:QI 24))
-              (clobber (reg:CC REG_CC))])]
-  ""
+  [(scratch)]
+  { DONE_ADD_CCC }
   [(set_attr "isa" "eijmp")])
 
 
@@ -8143,17 +7841,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (pc)
-                   (if_then_else
-                    (match_operator 0 "eqne_operator"
-                                    [(zero_extract:QIHI
-                                      (mem:QI (match_dup 1))
-                                      (const_int 1)
-                                      (match_dup 2))
-                                     (const_int 0)])
-                    (label_ref (match_dup 3))
-                    (pc)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*sbix_branch"
   [(set (pc)
@@ -8191,14 +7880,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (pc)
-                   (if_then_else
-                    (match_operator 0 "gelt_operator"
-                                    [(mem:QI (match_dup 1))
-                                     (const_int 0)])
-                    (label_ref (match_dup 2))
-                    (pc)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*sbix_branch_bit7"
   [(set (pc)
@@ -8238,17 +7921,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (pc)
-                   (if_then_else
-                    (match_operator 0 "eqne_operator"
-                                    [(zero_extract:QIHI
-                                      (mem:QI (match_dup 1))
-                                      (const_int 1)
-                                      (match_dup 2))
-                                     (const_int 0)])
-                    (label_ref (match_dup 3))
-                    (pc)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*sbix_branch_tmp"
   [(set (pc)
@@ -8285,14 +7959,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (pc)
-                   (if_then_else
-                    (match_operator 0 "gelt_operator"
-                                    [(mem:QI (match_dup 1))
-                                     (const_int 0)])
-                    (label_ref (match_dup 2))
-                    (pc)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*sbix_branch_tmp_bit7"
   [(set (pc)
@@ -8745,13 +8413,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(unspec_volatile [(match_dup 0)
-                                (const_int 1)]
-                               UNSPECV_DELAY_CYCLES)
-              (set (match_dup 1)
-                   (unspec_volatile:BLK [(match_dup 1)] UNSPECV_MEMORY_BARRIER))
-              (clobber (match_dup 2))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*delay_cycles_1"
   [(unspec_volatile [(match_operand:QI 0 "const_int_operand" "n")
@@ -8777,14 +8440,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(unspec_volatile [(match_dup 0)
-                                (const_int 2)]
-                               UNSPECV_DELAY_CYCLES)
-              (set (match_dup 1)
-                   (unspec_volatile:BLK [(match_dup 1)] UNSPECV_MEMORY_BARRIER))
-              (clobber (match_dup 2))
-              (clobber (reg:CC REG_CC))])]
-  ""
+  [(scratch)]
+  { DONE_ADD_CCC }
   [(set_attr "isa" "adiw,no_adiw")])
 
 (define_insn "*delay_cycles_2"
@@ -8814,15 +8471,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(unspec_volatile [(match_dup 0)
-                                (const_int 3)]
-                               UNSPECV_DELAY_CYCLES)
-              (set (match_dup 1)
-                   (unspec_volatile:BLK [(match_dup 1)] UNSPECV_MEMORY_BARRIER))
-              (clobber (match_dup 2))
-              (clobber (match_dup 3))
-              (clobber (match_dup 4))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*delay_cycles_3"
   [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "n")
@@ -8857,16 +8507,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(unspec_volatile [(match_dup 0)
-                                (const_int 4)]
-                               UNSPECV_DELAY_CYCLES)
-              (set (match_dup 1)
-                   (unspec_volatile:BLK [(match_dup 1)] UNSPECV_MEMORY_BARRIER))
-              (clobber (match_dup 2))
-              (clobber (match_dup 3))
-              (clobber (match_dup 4))
-              (clobber (match_dup 5))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*delay_cycles_4"
   [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "n")
@@ -8903,12 +8545,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (unspec:QI [(match_dup 1)
-                               (match_dup 2)
-                               (match_dup 3)]
-                              UNSPEC_INSERT_BITS))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*insert_bits"
   [(set (match_operand:QI 0 "register_operand"              "=r  ,d  ,r")
@@ -9088,12 +8726,13 @@
    "#"
    "reload_completed"
    [(set (reg:CC REG_CC)
-         (compare:CC (match_dup 0) (const_int 0)))
+         (compare:CC (match_dup 0)
+                     (const_int 0)))
     (set (pc)
-         (if_then_else (ge (reg:CC REG_CC) (const_int 0))
+         (if_then_else (ge (reg:CC REG_CC)
+                           (const_int 0))
                        (label_ref (match_dup 1))
-                       (pc)))]
-   "")
+                       (pc)))])
 
 (define_expand "flash_segment"
   [(parallel [(match_operand:QI 0 "register_operand" "")
@@ -9196,9 +8835,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:HI 24)
-                   (parity:HI (reg:HI 24)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*parityhi2.libgcc"
   [(set (reg:HI 24)
@@ -9214,9 +8852,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:HI 24)
-                   (zero_extend:HI (parity:QI (reg:QI 24))))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*parityqihi2.libgcc"
   [(set (reg:HI 24)
@@ -9232,9 +8869,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:HI 24)
-                   (truncate:HI (parity:SI (reg:SI 22))))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*paritysihi2.libgcc"
   [(set (reg:HI 24)
@@ -9290,9 +8926,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:HI 24)
-                   (popcount:HI (reg:HI 24)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*popcounthi2.libgcc"
   [(set (reg:HI 24)
@@ -9308,9 +8943,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:HI 24)
-                   (truncate:HI (popcount:SI (reg:SI 22))))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*popcountsi2.libgcc"
   [(set (reg:HI 24)
@@ -9326,9 +8960,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:QI 24)
-                   (popcount:QI (reg:QI 24)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*popcountqi2.libgcc"
   [(set (reg:QI 24)
@@ -9382,10 +9015,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:HI 24)
-                   (clz:HI (reg:HI 24)))
-              (clobber (reg:QI 26))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*clzhi2.libgcc"
   [(set (reg:HI 24)
@@ -9403,10 +9034,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:HI 24)
-                   (truncate:HI (clz:SI (reg:SI 22))))
-              (clobber (reg:QI 26))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*clzsihi2.libgcc"
   [(set (reg:HI 24)
@@ -9451,10 +9080,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:HI 24)
-                   (ctz:HI (reg:HI 24)))
-              (clobber (reg:QI 26))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*ctzhi2.libgcc"
   [(set (reg:HI 24)
@@ -9473,11 +9100,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:HI 24)
-                   (truncate:HI (ctz:SI (reg:SI 22))))
-              (clobber (reg:QI 22))
-              (clobber (reg:QI 26))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*ctzsihi2.libgcc"
   [(set (reg:HI 24)
@@ -9523,10 +9147,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:HI 24)
-                   (ffs:HI (reg:HI 24)))
-              (clobber (reg:QI 26))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*ffshi2.libgcc"
   [(set (reg:HI 24)
@@ -9545,11 +9167,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:HI 24)
-                   (truncate:HI (ffs:SI (reg:SI 22))))
-              (clobber (reg:QI 22))
-              (clobber (reg:QI 26))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*ffssihi2.libgcc"
   [(set (reg:HI 24)
@@ -9594,9 +9213,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:SI 22)
-                   (bswap:SI (reg:SI 22)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*bswapsi2.libgcc"
   [(set (reg:SI 22)
@@ -9703,11 +9321,8 @@
   "AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (unspec:HI [(match_dup 1)
-                               (match_dup 2)]
-                              UNSPEC_FMUL))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*fmul_insn"
   [(set (match_operand:HI 0 "register_operand" "=r")
@@ -9729,11 +9344,8 @@
   "!AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:HI 22)
-                   (unspec:HI [(reg:QI 24)
-                               (reg:QI 25)] UNSPEC_FMUL))
-              (clobber (reg:HI 24))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*fmul.call"
   [(set (reg:HI 22)
@@ -9775,11 +9387,8 @@
   "AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (unspec:HI [(match_dup 1)
-                               (match_dup 2)]
-                              UNSPEC_FMULS))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*fmuls_insn"
   [(set (match_operand:HI 0 "register_operand" "=r")
@@ -9801,11 +9410,8 @@
   "!AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:HI 22)
-                   (unspec:HI [(reg:QI 24)
-                               (reg:QI 25)] UNSPEC_FMULS))
-              (clobber (reg:HI 24))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*fmuls.call"
   [(set (reg:HI 22)
@@ -9847,11 +9453,8 @@
   "AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (unspec:HI [(match_dup 1)
-                               (match_dup 2)]
-                              UNSPEC_FMULSU))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*fmulsu_insn"
   [(set (match_operand:HI 0 "register_operand" "=r")
@@ -9873,11 +9476,8 @@
   "!AVR_HAVE_MUL"
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:HI 22)
-                   (unspec:HI [(reg:QI 24)
-                               (reg:QI 25)] UNSPEC_FMULSU))
-              (clobber (reg:HI 24))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*fmulsu.call"
   [(set (reg:HI 22)
@@ -9998,11 +9598,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (zero_extract:QI (match_dup 0)
-                                    (const_int 1)
-                                    (match_dup 1))
-                   (match_dup 2))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*insv.reg"
   [(set (zero_extract:QI (match_operand:QI 0 "register_operand"    "+r,d,d,l,l")
@@ -10439,11 +10036,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (zero_extract:QI (not:QI (match_dup 1))
-                                    (const_int 1)
-                                    (match_dup 2)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*extzv.not"
   [(set (match_operand:QI 0 "register_operand" "=r")
@@ -10580,11 +10174,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (and:QISI (any_shift:QISI (match_dup 1)
-                                             (match_dup 2))
-                             (match_dup 3)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*insv.any_shift.<mode>"
   [(set (match_operand:QISI 0 "register_operand" "=r")
@@ -10647,11 +10238,8 @@
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (sign_extract:QISI (match_dup 1)
-                                      (const_int 1)
-                                      (match_dup 2)))
-              (clobber (reg:CC REG_CC))])])
+  [(scratch)]
+  { DONE_ADD_CCC })
 
 (define_insn "*sextr.<QISI:mode>.<QISI2:mode>"
   [(set (match_operand:QISI 0 "register_operand"                    "=r")
diff --git a/gcc/config/avr/avr.opt b/gcc/config/avr/avr.opt
index fcd2bf6..2bed8ea 100644
--- a/gcc/config/avr/avr.opt
+++ b/gcc/config/avr/avr.opt
@@ -18,10 +18,6 @@
 ; along with GCC; see the file COPYING3.  If not see
 ; <http://www.gnu.org/licenses/>.
 
-mlra
-Target Var(avropt_lra_p) UInteger Init(0) Optimization Undocumented
-Usa LRA for reload instead of the old reload framework.  This option is experimental, and it may be removed in future versions of the compiler.
-
 mcall-prologues
 Target Mask(CALL_PROLOGUES) Optimization
 Optimization. Use subroutines for function prologues and epilogues.
@@ -164,6 +160,10 @@ mfuse-move=
 Target Joined RejectNegative UInteger Var(avropt_fuse_move) Init(0) Optimization IntegerRange(0, 23)
 -mfuse-move=<0,23>	Optimization. Run a post-reload pass that tweaks move instructions.
 
+mfuse-move2
+Target Var(avropt_fuse_move2) Init(0) Optimization
+Optimization. Fuse some move insns after insn combine.
+
 mabsdata
 Target Mask(ABSDATA)
 Assume that all data in static storage can be accessed by LDS / STS instructions.  This option is only useful for reduced Tiny devices like ATtiny40.
diff --git a/gcc/config/avr/avr.opt.urls b/gcc/config/avr/avr.opt.urls
index 662fdee..fa560bc 100644
--- a/gcc/config/avr/avr.opt.urls
+++ b/gcc/config/avr/avr.opt.urls
@@ -1,7 +1,5 @@
 ; Autogenerated by regenerate-opt-urls.py from gcc/config/avr/avr.opt and generated HTML
 
-; skipping UrlSuffix for 'mlra' due to finding no URLs
-
 mcall-prologues
 UrlSuffix(gcc/AVR-Options.html#index-mcall-prologues)
 
@@ -92,6 +90,9 @@ UrlSuffix(gcc/AVR-Options.html#index-mfuse-move)
 mfuse-move=
 UrlSuffix(gcc/AVR-Options.html#index-mfuse-move)
 
+mfuse-move2
+UrlSuffix(gcc/AVR-Options.html#index-mfuse-move2)
+
 mabsdata
 UrlSuffix(gcc/AVR-Options.html#index-mabsdata)
 
diff --git a/gcc/config/avr/specs.h b/gcc/config/avr/specs.h
index ff269bf..c95c758 100644
--- a/gcc/config/avr/specs.h
+++ b/gcc/config/avr/specs.h
@@ -57,7 +57,7 @@ along with GCC; see the file COPYING3.  If not see
   "%(asm_errata_skip) "
 
 #define LINK_RELAX_SPEC                         \
-  "%{mrelax:--relax} "
+  "%{!r:%{mrelax:--relax}} "
 
 #undef  LINK_SPEC
 #define LINK_SPEC                               \
diff --git a/gcc/config/bfin/bfin.cc b/gcc/config/bfin/bfin.cc
index 6de22a4..2cf4e77 100644
--- a/gcc/config/bfin/bfin.cc
+++ b/gcc/config/bfin/bfin.cc
@@ -3784,8 +3784,7 @@ hwloop_optimize (hwloop_info loop)
 	}
     }
 
-  seq = get_insns ();
-  end_sequence ();
+  seq = end_sequence ();
 
   if (loop->incoming_src)
     {
diff --git a/gcc/config/c6x/c6x.cc b/gcc/config/c6x/c6x.cc
index eebff17..695a97e 100644
--- a/gcc/config/c6x/c6x.cc
+++ b/gcc/config/c6x/c6x.cc
@@ -1582,8 +1582,7 @@ c6x_expand_compare (rtx comparison, machine_mode mode)
 
 	  cmp = emit_library_call_value (libfunc, 0, LCT_CONST, SImode,
 					 op0, op_mode, op1, op_mode);
-	  insns = get_insns ();
-	  end_sequence ();
+	  insns = end_sequence ();
 
 	  emit_libcall_block (insns, cmp, cmp,
 			      gen_rtx_fmt_ee (code, SImode, op0, op1));
diff --git a/gcc/config/c6x/c6x.h b/gcc/config/c6x/c6x.h
index e7da250..50bad27 100644
--- a/gcc/config/c6x/c6x.h
+++ b/gcc/config/c6x/c6x.h
@@ -444,11 +444,9 @@ struct GTY(()) machine_function
 #define TARG_VEC_PERMUTE_COST        1
 #endif
 
-/* ttype entries (the only interesting data references used) are
-   sb-relative got-indirect (aka .ehtype).  */
+/* .ehtype ttype entries are sb-relative.  */
 #define ASM_PREFERRED_EH_DATA_FORMAT(code, data) \
-  (((code) == 0 && (data) == 1) ? (DW_EH_PE_datarel | DW_EH_PE_indirect) \
-				: DW_EH_PE_absptr)
+  (((code) == 0 && (data) == 1) ? DW_EH_PE_datarel : DW_EH_PE_absptr)
 
 /* This should be the same as the definition in elfos.h, plus the call
    to output special unwinding directives.  */
diff --git a/gcc/config/cris/cris.cc b/gcc/config/cris/cris.cc
index 42d616a..4acdd1d 100644
--- a/gcc/config/cris/cris.cc
+++ b/gcc/config/cris/cris.cc
@@ -2692,8 +2692,7 @@ cris_split_movdx (rtx *operands)
   else
     internal_error ("unknown dest");
 
-  val = get_insns ();
-  end_sequence ();
+  val = end_sequence ();
   return val;
 }
 
@@ -3712,9 +3711,11 @@ cris_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs,
   /* Determine if the source using MOF.  If it is, automatically
      clobbering MOF would cause it to have impossible constraints.  */
 
-  /* Look for a use of the MOF constraint letter: h.  */
+  /* Look for a use of the MOF constraint letter h or a hard register
+     constraint.  */
   for (unsigned i = 0, n = constraints.length(); i < n; ++i)
-    if (strchr (constraints[i], 'h') != NULL)
+    if (strchr (constraints[i], 'h') != NULL
+	|| strstr (constraints[i], "{mof}") != NULL)
       return NULL;
 
   /* Look for an output or an input that touches MOF.  */
diff --git a/gcc/config/cris/cris.h b/gcc/config/cris/cris.h
index 1681c79..f356679 100644
--- a/gcc/config/cris/cris.h
+++ b/gcc/config/cris/cris.h
@@ -171,7 +171,7 @@ extern int cris_cpu_version;
 
 /* For the cris-*-elf subtarget.  */
 #define CRIS_ASM_SUBTARGET_SPEC \
- "--em=criself %{!march=*:%{!mcpu=*:" CRIS_DEFAULT_ASM_ARCH_OPTION "}}"
+ "--emulation=criself %{!march=*:%{!mcpu=*:" CRIS_DEFAULT_ASM_ARCH_OPTION "}}"
 
 /* FIXME: We should propagate the -melf option to make the criself
    "emulation" unless a linker script is provided (-T*), but I don't know
diff --git a/gcc/config/cris/cris.md b/gcc/config/cris/cris.md
index 0c90c0e..8fe79f5 100644
--- a/gcc/config/cris/cris.md
+++ b/gcc/config/cris/cris.md
@@ -539,8 +539,7 @@
 		      operand_subword (op1, 0, 1, DImode));
       emit_move_insn (operand_subword (op0, 1, 1, DImode),
 		      operand_subword (op1, 1, 1, DImode));
-      insns = get_insns ();
-      end_sequence ();
+      insns = end_sequence ();
 
       emit_insn (insns);
       DONE;
diff --git a/gcc/config/csky/csky.cc b/gcc/config/csky/csky.cc
index 16db497..9888af1 100644
--- a/gcc/config/csky/csky.cc
+++ b/gcc/config/csky/csky.cc
@@ -2899,8 +2899,7 @@ csky_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
   *valuep = emit_library_call_value (get_tls_get_addr (),
 				     NULL_RTX, LCT_PURE, /* LCT_CONST?	*/
 				     Pmode, reg, Pmode);
-  insns = get_insns ();
-  end_sequence ();
+  insns = end_sequence ();
   return insns;
 }
 
diff --git a/gcc/config/darwin-driver.cc b/gcc/config/darwin-driver.cc
index 224e0a0..e83b7cd 100644
--- a/gcc/config/darwin-driver.cc
+++ b/gcc/config/darwin-driver.cc
@@ -64,7 +64,8 @@ validate_macosx_version_min (const char *version_str)
 
   major = strtoul (version_str, &end, 10);
 
-  /* macOS 10, 11, and 12 are known. clang accepts up to 99.  */
+  /* macOS 10, 11, 12, 13, 14, 15 and 26 are known.
+     clang accepts up to 99.  */
   if (major < 10 || major > 99)
     return NULL;
 
@@ -159,15 +160,16 @@ darwin_find_version_from_kernel (void)
   if (*version_p++ != '.')
     goto parse_failed;
 
-  /* Darwin20 sees a transition to macOS 11.  In this, it seems that the
-     mapping to macOS minor version and patch level is now always 0, 0
-     (at least for macOS 11 and 12).  */
-  if (major_vers >= 20)
-    {
-      /* Apple clang doesn't include the minor version or the patch level
-	 in the object file, nor does it pass it to ld  */
-      asprintf (&new_flag, "%d.00.00", major_vers - 9);
-    }
+  /* Darwin25 saw a transition to macOS 26.  */
+  if (major_vers >= 25)
+    /* Apple clang doesn't include the minor version or the patch level
+       in the object file, nor does it pass it to ld  */
+    asprintf (&new_flag, "%d.00.00", major_vers + 1);
+  /* Darwin20 saw a transition to macOS 11.  */
+  else if (major_vers >= 20)
+    /* Apple clang doesn't include the minor version or the patch level
+       in the object file, nor does it pass it to ld  */
+    asprintf (&new_flag, "%d.00.00", major_vers - 9);
   else if (major_vers - 4 <= 4)
     /* On 10.4 and earlier, the old linker is used which does not
        support three-component system versions.
diff --git a/gcc/config/darwin-sections.def b/gcc/config/darwin-sections.def
index 44adcc6..76587c2 100644
--- a/gcc/config/darwin-sections.def
+++ b/gcc/config/darwin-sections.def
@@ -215,3 +215,10 @@ DEF_SECTION (objc2_method_names_section, 0,
 
 DEF_SECTION (objc2_method_types_section, 0,
 	     ".section __TEXT, __objc_methtype, cstring_literals", 1)
+
+/* ASAN sections.  */
+
+DEF_SECTION (asan_string_section, 0, ".section __TEXT, __asan_cstring", 0)
+DEF_SECTION (asan_globals_section, 0, ".section __DATA, __asan_globals", 0)
+DEF_SECTION (asan_liveness_section, 0,
+	     ".section __DATA,__asan_liveness,regular,live_support", 0)
diff --git a/gcc/config/darwin.cc b/gcc/config/darwin.cc
index be2daed..75ac356 100644
--- a/gcc/config/darwin.cc
+++ b/gcc/config/darwin.cc
@@ -49,6 +49,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "optabs.h"
 #include "flags.h"
 #include "opts.h"
+#include "asan.h"
 
 /* Fix and Continue.
 
@@ -1298,6 +1299,39 @@ darwin_encode_section_info (tree decl, rtx rtl, int first)
      SYMBOL_FLAG_EXTERNAL.  */
   default_encode_section_info (decl, rtl, first);
 
+  if (CONSTANT_CLASS_P (decl))
+    {
+      bool is_str = TREE_CODE (decl) == STRING_CST;
+      rtx sym_ref = XEXP (rtl, 0);
+
+      /* Unless this is a string cst or we are in an anchored section we have
+	 nothing more to do here.  */
+      if (!is_str && !SYMBOL_REF_HAS_BLOCK_INFO_P (sym_ref))
+	return;
+
+      tree sym_decl = SYMBOL_REF_DECL (sym_ref);
+      const char *name = XSTR (sym_ref, 0);
+      gcc_checking_assert (strncmp ("*lC", name, 3) == 0);
+
+      char *buf;
+      if (is_str)
+	{
+	  bool for_asan = (flag_sanitize & SANITIZE_ADDRESS)
+			   && asan_protect_global (CONST_CAST_TREE (decl));
+	  /* When we are generating code for sanitized strings, the string
+	     internal symbols are made visible in the object.  */
+	  buf = xasprintf ("*%c.str.%s", for_asan ? 'l' : 'L', &name[3]);
+	}
+      else
+	/* Lets identify anchored constants with a different prefix, for the
+	   sake of inspection only.  */
+	buf = xasprintf ("*LaC%s", &name[3]);
+      if (sym_decl)
+	DECL_NAME (sym_decl) = get_identifier (buf);
+      XSTR (sym_ref, 0) = ggc_strdup (buf);
+      free (buf);
+    }
+
   if (! VAR_OR_FUNCTION_DECL_P (decl))
     return;
 
@@ -1683,6 +1717,17 @@ machopic_select_section (tree decl,
 
   ro = TREE_READONLY (decl) || TREE_CONSTANT (decl) ;
 
+  /* Trump categorize_decl_for_section () for ASAN stuff - the Darwin
+     categorisations are special.  */
+  if (flag_sanitize & SANITIZE_ADDRESS)
+    {
+      if (TREE_CODE (decl) == STRING_CST
+	  && asan_protect_global (CONST_CAST_TREE (decl)))
+	{
+	  return darwin_sections[asan_string_section];
+	}
+    }
+
   switch (categorize_decl_for_section (decl, reloc))
     {
     case SECCAT_TEXT:
@@ -1699,7 +1744,12 @@ machopic_select_section (tree decl,
       break;
 
     case SECCAT_RODATA_MERGE_STR_INIT:
-      base_section = darwin_mergeable_string_section (DECL_INITIAL (decl), align);
+      if ((flag_sanitize & SANITIZE_ADDRESS)
+	   && asan_protect_global (CONST_CAST_TREE (decl)))
+	/* or !flag_merge_constants */
+	return darwin_sections[asan_string_section];
+      else
+	return darwin_mergeable_string_section (DECL_INITIAL (decl), align);
       break;
 
     case SECCAT_RODATA_MERGE_CONST:
@@ -3297,11 +3347,16 @@ darwin_use_anchors_for_symbol_p (const_rtx symbol)
 {
   if (DARWIN_SECTION_ANCHORS && flag_section_anchors)
     {
-      section *sect;
-      /* If the section contains a zero-sized object it's ineligible.  */
-      sect = SYMBOL_REF_BLOCK (symbol)->sect;
-      /* This should have the effect of disabling anchors for vars that follow
-         any zero-sized one, in a given section.  */
+      tree decl = SYMBOL_REF_DECL (symbol);
+      /* If the symbol would be linker-visible, then it can split at that
+	 so we must disallow.  This is more strict than the default impl.
+	 TODO: add other cases.  */
+      if (decl && DECL_P (decl)
+	  && (TREE_PUBLIC (decl) || !DECL_ARTIFICIAL (decl)))
+	return false;
+
+      /* We mark sections containing unsuitable entries.  */
+      section *sect = SYMBOL_REF_BLOCK (symbol)->sect;
       if (sect->common.flags & SECTION_NO_ANCHOR)
 	return false;
 
diff --git a/gcc/config/darwin.h b/gcc/config/darwin.h
index 9b9a3fe..c3e28e2 100644
--- a/gcc/config/darwin.h
+++ b/gcc/config/darwin.h
@@ -287,6 +287,19 @@ extern GTY(()) int darwin_ms_struct;
 #define DARWIN_RDYNAMIC "%{rdynamic:%nrdynamic is not supported}"
 #endif
 
+#if LD64_HAS_NO_DEDUPLICATE
+/* What we want is "when the optimization level is debug OR when it is
+   a compile & link job with implied O0 optimization".  */
+#define DARWIN_LD_NO_DEDUPLICATE \
+  "%{O0|O1|O|Og: -no_deduplicate} \
+   %{!O*:\
+     %{.c|.cc|.C|.cpp|.cp|.c++|.cxx|.CPP|.m|.mm|.s|.S|.i|.ii|.mi|.mii|\
+       .f|.for|.ftn|.fpp|.f90|.f95|.f03|.f08|.f77|.F|.F90|.F95|.F03|.F08|\
+       .d|.mod: -no_deduplicate }} "
+#else
+#define DARWIN_LD_NO_DEDUPLICATE ""
+#endif
+
 #if LD64_HAS_MACOS_VERSION_MIN
 # define DARWIN_PLATFORM_ID \
   "%{mmacosx-version-min=*:-macos_version_min %*} "
@@ -403,10 +416,14 @@ extern GTY(()) int darwin_ms_struct;
     %(linker)" \
     DARWIN_LD_DEMANGLE \
     LINK_PLUGIN_SPEC \
+    DARWIN_LD_NO_DEDUPLICATE \
     "%{flto*:%<fcompare-debug*} \
      %{flto} %{fno-lto} %{flto=*} \
-    %l " \
+     %{static}%{!static:%{!dynamic:-dynamic}} \
+     %{force_cpusubtype_ALL:-arch %(darwin_arch)} \
+     %{!force_cpusubtype_ALL:-arch %(darwin_subarch)} "\
     DARWIN_PLATFORM_ID \
+    " %l " \
     LINK_COMPRESS_DEBUG_SPEC \
    "%X %{s} %{t} %{Z} %{u*} \
     %{e*} %{r} \
@@ -493,9 +510,8 @@ extern GTY(()) int darwin_ms_struct;
    Note that options taking arguments may appear multiple times on a command
    line with different arguments each time, so put a * after their names so
    all of them get passed.  */
-#define LINK_SPEC  \
-  "%{static}%{!static:%{!dynamic:-dynamic}} \
-   %:remove-outfile(-ldl) \
+#define LINK_SPEC \
+   "%:remove-outfile(-ldl) \
    %:remove-outfile(-lm) \
    %:remove-outfile(-lpthread) \
    %{fgnu-runtime: %{static|static-libgcc: \
@@ -511,9 +527,7 @@ extern GTY(()) int darwin_ms_struct;
    %{static|static-libgm2:%:replace-outfile(-lm2iso libm2iso.a%s)}\
    %{static|static-libgm2:%:replace-outfile(-lm2min libm2min.a%s)}\
    %{static|static-libgm2:%:replace-outfile(-lm2log libm2log.a%s)}\
-   %{static|static-libgm2:%:replace-outfile(-lm2cor libm2cor.a%s)}\
-  %{force_cpusubtype_ALL:-arch %(darwin_arch)} \
-   %{!force_cpusubtype_ALL:-arch %(darwin_subarch)} "\
+   %{static|static-libgm2:%:replace-outfile(-lm2cor libm2cor.a%s)} "\
    LINK_SYSROOT_SPEC \
    "%{!multiply_defined*:%{shared-libgcc: \
      %:version-compare(< 10.5 mmacosx-version-min= -multiply_defined) \
@@ -1005,6 +1019,8 @@ extern GTY(()) section * darwin_sections[NUM_DARWIN_SECTIONS];
       sprintf (LABEL, "*%s%ld", "lASAN", (long)(NUM));\
     else if (strcmp ("LTRAMP", PREFIX) == 0)	\
       sprintf (LABEL, "*%s%ld", "lTRAMP", (long)(NUM));\
+    else if (strncmp ("LANCHOR", PREFIX, 7) == 0)	\
+      sprintf (LABEL, "*%s%ld", "lANCHOR", (long)(NUM));\
     else						\
       sprintf (LABEL, "*%s%ld", PREFIX, (long)(NUM));	\
   } while (0)
diff --git a/gcc/config/epiphany/epiphany.cc b/gcc/config/epiphany/epiphany.cc
index 16626f8..f53a643 100644
--- a/gcc/config/epiphany/epiphany.cc
+++ b/gcc/config/epiphany/epiphany.cc
@@ -2816,12 +2816,16 @@ epiphany_vector_alignment_reachable (const_tree type, bool is_packed)
 
 static bool
 epiphany_support_vector_misalignment (machine_mode mode, const_tree type,
-				      int misalignment, bool is_packed)
+				      int misalignment, bool is_packed,
+				      bool is_gather_scatter)
 {
+  if (is_gather_scatter)
+    return true;
   if (GET_MODE_SIZE (mode) == 8 && misalignment % 4 == 0)
     return true;
   return default_builtin_support_vector_misalignment (mode, type, misalignment,
-						      is_packed);
+						      is_packed,
+						      is_gather_scatter);
 }
 
 /* STRUCTURE_SIZE_BOUNDARY seems a bit crude in how it enlarges small
diff --git a/gcc/config/epiphany/resolve-sw-modes.cc b/gcc/config/epiphany/resolve-sw-modes.cc
index 8ead531..1206839 100644
--- a/gcc/config/epiphany/resolve-sw-modes.cc
+++ b/gcc/config/epiphany/resolve-sw-modes.cc
@@ -169,8 +169,7 @@ pass_resolve_sw_modes::execute (function *fun)
 	  emit_set_fp_mode (EPIPHANY_MSW_ENTITY_ROUND_UNKNOWN,
 			    jilted_mode, FP_MODE_NONE,
 			    reg_class_contents[NO_REGS]);
-	  seq = get_insns ();
-	  end_sequence ();
+	  seq = end_sequence ();
 	  need_commit = true;
 	  insert_insn_on_edge (seq, e);
 	}
diff --git a/gcc/config/fr30/fr30.cc b/gcc/config/fr30/fr30.cc
index b956a4c..8dd7961 100644
--- a/gcc/config/fr30/fr30.cc
+++ b/gcc/config/fr30/fr30.cc
@@ -976,8 +976,7 @@ fr30_move_double (rtx * operands)
     /* This should have been prevented by the constraints on movdi_insn.  */
     gcc_unreachable ();
 
-  val = get_insns ();
-  end_sequence ();
+  val = end_sequence ();
 
   return val;
 }
diff --git a/gcc/config/frv/frv.cc b/gcc/config/frv/frv.cc
index e53a0a0..e52bd59 100644
--- a/gcc/config/frv/frv.cc
+++ b/gcc/config/frv/frv.cc
@@ -4759,8 +4759,7 @@ frv_split_scc (rtx dest, rtx test, rtx cc_reg, rtx cr_reg, HOST_WIDE_INT value)
 				gen_rtx_SET (dest, const0_rtx)));
 
   /* Finish up, return sequence.  */
-  ret = get_insns ();
-  end_sequence ();
+  ret = end_sequence ();
   return ret;
 }
 
@@ -4931,8 +4930,7 @@ frv_split_cond_move (rtx operands[])
     }
 
   /* Finish up, return sequence.  */
-  ret = get_insns ();
-  end_sequence ();
+  ret = end_sequence ();
   return ret;
 }
 
@@ -5062,8 +5060,7 @@ frv_split_minmax (rtx operands[])
     }
 
   /* Finish up, return sequence.  */
-  ret = get_insns ();
-  end_sequence ();
+  ret = end_sequence ();
   return ret;
 }
 
@@ -5101,8 +5098,7 @@ frv_split_abs (rtx operands[])
 				  gen_rtx_SET (dest, src)));
 
   /* Finish up, return sequence.  */
-  ret = get_insns ();
-  end_sequence ();
+  ret = end_sequence ();
   return ret;
 }
 
diff --git a/gcc/config/frv/frv.md b/gcc/config/frv/frv.md
index 1d8b8ae..8ecc633 100644
--- a/gcc/config/frv/frv.md
+++ b/gcc/config/frv/frv.md
@@ -2009,8 +2009,7 @@
 				gen_rtx_NE (CC_CCRmode, icr, const0_rtx),
 				gen_rtx_SET (dest, const0_rtx)));
 
-  operands[3] = get_insns ();
-  end_sequence ();
+  operands[3] = end_sequence ();
 }")
 
 ;; Reload CC_UNSmode for unsigned integer comparisons
@@ -2074,8 +2073,7 @@
 				gen_rtx_NE (CC_CCRmode, icr, const0_rtx),
 				gen_rtx_SET (dest, const0_rtx)));
 
-  operands[3] = get_insns ();
-  end_sequence ();
+  operands[3] = end_sequence ();
 }")
 
 ;; Reload CC_NZmode.  This is mostly the same as the CCmode and CC_UNSmode
@@ -2245,8 +2243,7 @@
 
   emit_insn (gen_andsi3 (int_op0, int_op0, GEN_INT (CC_MASK)));
 
-  operands[2] = get_insns ();
-  end_sequence ();
+  operands[2] = end_sequence ();
 }")
 
 ;; Move a gpr value to FCC.
@@ -2329,8 +2326,7 @@
 					    const0_rtx),
 				gen_rtx_SET (int_op0, const0_rtx)));
 
-  operands[2] = get_insns ();
-  end_sequence ();
+  operands[2] = end_sequence ();
 }")
 
 (define_split
@@ -2357,8 +2353,7 @@
   if (! ICR_P (REGNO (operands[0])))
     emit_insn (gen_movcc_ccr (operands[0], icr));
 
-  operands[2] = get_insns ();
-  end_sequence ();
+  operands[2] = end_sequence ();
 }")
 
 
diff --git a/gcc/config/gcn/gcn-devices.def b/gcc/config/gcn/gcn-devices.def
index af14203..426acf0 100644
--- a/gcc/config/gcn/gcn-devices.def
+++ b/gcc/config/gcn/gcn-devices.def
@@ -171,6 +171,28 @@ GCN_DEVICE(gfx90c, GFX90C, 0x32, ISA_GCN5,
 	   /* Generic Name */ GFX9_GENERIC
 	   )
 
+GCN_DEVICE(gfx942, GFX942, 0x4c, ISA_CDNA3,
+	   /* XNACK default */ HSACO_ATTR_ANY,
+	   /* SRAM_ECC default */ HSACO_ATTR_ANY,
+	   /* WAVE64 mode */ HSACO_ATTR_UNSUPPORTED,
+	   /* CU mode */ HSACO_ATTR_UNSUPPORTED,
+	   /* Max ISA VGPRs */ 512,
+	   /* Generic code obj version */ 0,  /* non-generic */
+	   /* Architecture Family */ GFX9,
+	   /* Generic Name */ NONE
+	   )
+
+GCN_DEVICE(gfx950, GFX950, 0x4f, ISA_CDNA3,
+	   /* XNACK default */ HSACO_ATTR_ANY,
+	   /* SRAM_ECC default */ HSACO_ATTR_ANY,
+	   /* WAVE64 mode */ HSACO_ATTR_UNSUPPORTED,
+	   /* CU mode */ HSACO_ATTR_UNSUPPORTED,
+	   /* Max ISA VGPRs */ 512,
+	   /* Generic code obj version */ 0,  /* non-generic */
+	   /* Architecture Family */ GFX9,
+	   /* Generic Name */ NONE
+	   )
+
 GCN_DEVICE(gfx9-generic, GFX9_GENERIC, 0x051, ISA_GCN5,
 	   /* XNACK default */ HSACO_ATTR_ANY,
 	   /* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
@@ -182,6 +204,17 @@ GCN_DEVICE(gfx9-generic, GFX9_GENERIC, 0x051, ISA_GCN5,
 	   /* Generic Name */ NONE
 	   )
 
+GCN_DEVICE(gfx9-4-generic, GFX9_4_GENERIC, 0x05f, ISA_CDNA3,
+	   /* XNACK default */ HSACO_ATTR_ANY,
+	   /* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
+	   /* WAVE64 mode */ HSACO_ATTR_UNSUPPORTED,
+	   /* CU mode */ HSACO_ATTR_UNSUPPORTED,
+	   /* Max ISA VGPRs */ 256,
+	   /* Generic code obj version */ 1,
+	   /* Architecture Family */ GFX9,
+	   /* Generic Name */ NONE
+	   )
+
 /* GCN GFX10.3 (RDNA 2) */
 
 GCN_DEVICE(gfx1030, GFX1030, 0x36, ISA_RDNA2,
diff --git a/gcc/config/gcn/gcn-opts.h b/gcc/config/gcn/gcn-opts.h
index 88f562d..0287400 100644
--- a/gcc/config/gcn/gcn-opts.h
+++ b/gcc/config/gcn/gcn-opts.h
@@ -33,7 +33,8 @@ extern enum gcn_isa {
   ISA_RDNA2,
   ISA_RDNA3,
   ISA_CDNA1,
-  ISA_CDNA2
+  ISA_CDNA2,
+  ISA_CDNA3
 } gcn_isa;
 
 #define TARGET_GCN5 (gcn_isa == ISA_GCN5)
@@ -41,6 +42,8 @@ extern enum gcn_isa {
 #define TARGET_CDNA1_PLUS (gcn_isa >= ISA_CDNA1)
 #define TARGET_CDNA2 (gcn_isa == ISA_CDNA2)
 #define TARGET_CDNA2_PLUS (gcn_isa >= ISA_CDNA2)
+#define TARGET_CDNA3 (gcn_isa == ISA_CDNA3)
+#define TARGET_CDNA3_PLUS (gcn_isa >= ISA_CDNA3)
 #define TARGET_RDNA2 (gcn_isa == ISA_RDNA2)
 #define TARGET_RDNA2_PLUS (gcn_isa >= ISA_RDNA2 && gcn_isa < ISA_CDNA1)
 #define TARGET_RDNA3 (gcn_isa == ISA_RDNA3)
@@ -79,20 +82,34 @@ enum hsaco_attr_type
 #define TARGET_DPP_FULL !TARGET_RDNA2_PLUS
 #define TARGET_DPP16 TARGET_RDNA2_PLUS
 #define TARGET_DPP8 TARGET_RDNA2_PLUS
+/* Device requires no manually inserted wait states; that's the
+   case for RDNA 2, 3 and 3.5 (but not for RNDA 4).  */
+#define TARGET_NO_MANUAL_NOPS TARGET_RDNA2_PLUS
 /* Device requires CDNA1-style manually inserted wait states for AVGPRs.  */
 #define TARGET_AVGPR_CDNA1_NOPS TARGET_CDNA1
+/* Device requires CDNA3-style manually inserted wait states.  */
+#define TARGET_CDNA3_NOPS TARGET_CDNA3
+/* Whether to use the 'globally coherent' (glc) or the 'scope' (sc0) flag
+   for non-scalar memory operations. The string starts on purpose with a space.
+   Note: for scalar memory operations (i.e. 's_...'), 'glc' is still used.
+   Note: on atomics, glc/sc0 denotes whether the pre-op operation should
+   be used.
+   CDNA3 also uses 'nt' instead of 'slc' and 'sc1' instead of 'scc'; however,
+   there is no non-scalar user so far.  */
+#define TARGET_GLC_NAME (TARGET_CDNA3 ? " sc0" : " glc")
 /* The metadata on different devices need different granularity.  */
 #define TARGET_VGPR_GRANULARITY \
   (TARGET_RDNA3 ? 12 \
    : TARGET_RDNA2_PLUS || TARGET_CDNA2_PLUS ? 8 \
    : 4)
 /* This mostly affects the metadata.  */
-#define TARGET_ARCHITECTED_FLAT_SCRATCH TARGET_RDNA3
+#define TARGET_ARCHITECTED_FLAT_SCRATCH (TARGET_RDNA3 || TARGET_CDNA3)
 /* Device has Sub-DWord Addressing instrucions.  */
 #define TARGET_SDWA (!TARGET_RDNA3)
 /* Different devices uses different cache control instructions.  */
-#define TARGET_WBINVL1_CACHE (!TARGET_RDNA2_PLUS)
+#define TARGET_WBINVL1_CACHE (!TARGET_RDNA2_PLUS && !TARGET_CDNA3)
 #define TARGET_GLn_CACHE TARGET_RDNA2_PLUS
+#define TARGET_TARGET_SC_CACHE TARGET_CDNA3
 /* Some devices have TGSPLIT, which needs at least metadata.  */
 #define TARGET_TGSPLIT TARGET_CDNA2_PLUS
 
diff --git a/gcc/config/gcn/gcn-tables.opt b/gcc/config/gcn/gcn-tables.opt
index 96ce9bd..4a381b3 100644
--- a/gcc/config/gcn/gcn-tables.opt
+++ b/gcc/config/gcn/gcn-tables.opt
@@ -49,9 +49,18 @@ EnumValue
 Enum(gpu_type) String(gfx90c) Value(PROCESSOR_GFX90C)
 
 EnumValue
+Enum(gpu_type) String(gfx942) Value(PROCESSOR_GFX942)
+
+EnumValue
+Enum(gpu_type) String(gfx950) Value(PROCESSOR_GFX950)
+
+EnumValue
 Enum(gpu_type) String(gfx9-generic) Value(PROCESSOR_GFX9_GENERIC)
 
 EnumValue
+Enum(gpu_type) String(gfx9-4-generic) Value(PROCESSOR_GFX9_4_GENERIC)
+
+EnumValue
 Enum(gpu_type) String(gfx1030) Value(PROCESSOR_GFX1030)
 
 EnumValue
diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index 977ad88..a34d2e3 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -811,7 +811,7 @@
   [(set_attr "type" "vop3a")
    (set_attr "length" "8")
    (set_attr "exec" "none")
-   (set_attr "laneselect" "yes")])
+   (set_attr "laneselect" "write")])
 
 ; FIXME: 64bit operations really should be splitters, but I am not sure how
 ; to represent vertical subregs.
@@ -828,7 +828,7 @@
   [(set_attr "type" "vmult")
    (set_attr "length" "16")
    (set_attr "exec" "none")
-   (set_attr "laneselect" "yes")])
+   (set_attr "laneselect" "write")])
 
 (define_expand "vec_set<mode>"
   [(set (match_operand:V_MOV 0 "register_operand")
@@ -854,7 +854,7 @@
   [(set_attr "type" "vop3a")
    (set_attr "length" "8")
    (set_attr "exec" "none")
-   (set_attr "laneselect" "yes")])
+   (set_attr "laneselect" "write")])
 
 (define_insn "*vec_set<mode>_1"
   [(set (match_operand:V_2REG 0 "register_operand"		   "=v")
@@ -871,7 +871,7 @@
   [(set_attr "type" "vmult")
    (set_attr "length" "16")
    (set_attr "exec" "none")
-   (set_attr "laneselect" "yes")])
+   (set_attr "laneselect" "write")])
 
 (define_insn "vec_duplicate<mode><exec>"
   [(set (match_operand:V_1REG 0 "register_operand"	   "=v")
@@ -910,7 +910,7 @@
   [(set_attr "type" "vop3a")
    (set_attr "length" "8")
    (set_attr "exec" "none")
-   (set_attr "laneselect" "yes")])
+   (set_attr "laneselect" "read")])
 
 (define_insn "vec_extract<mode><scalar_mode>"
   [(set (match_operand:<SCALAR_MODE> 0 "register_operand"  "=&Sg")
@@ -922,7 +922,7 @@
   [(set_attr "type" "vmult")
    (set_attr "length" "16")
    (set_attr "exec" "none")
-   (set_attr "laneselect" "yes")])
+   (set_attr "laneselect" "read")])
 
 (define_insn "vec_extract<mode><scalar_mode>"
   [(set (match_operand:<SCALAR_MODE> 0 "register_operand"  "=&Sg")
@@ -934,7 +934,7 @@
   [(set_attr "type" "vmult")
    (set_attr "length" "32")
    (set_attr "exec" "none")
-   (set_attr "laneselect" "yes")])
+   (set_attr "laneselect" "read")])
 
 (define_insn "vec_extract<V_1REG:mode><V_1REG_ALT:mode>_nop"
   [(set (match_operand:V_1REG_ALT 0 "register_operand" "=v,v")
@@ -1133,6 +1133,23 @@
     DONE;
   })
 
+(define_expand "gather_load<mode><vndi>"
+  [(match_operand:V_MOV 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (match_operand:<VnDI> 2 "register_operand")
+   (match_operand 3 "immediate_operand")
+   (match_operand:SI 4 "gcn_alu_operand")]
+  ""
+  {
+    rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
+					  operands[2], operands[4],
+					  INTVAL (operands[3]), NULL);
+
+    emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx,
+					      const0_rtx, const0_rtx));
+    DONE;
+  })
+
 ; Allow any address expression
 (define_expand "gather<mode>_expr<exec>"
   [(set (match_operand:V_MOV 0 "register_operand")
@@ -1161,7 +1178,7 @@
        && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
   {
     addr_space_t as = INTVAL (operands[3]);
-    const char *glc = INTVAL (operands[4]) ? " glc" : "";
+    const char *glc = INTVAL (operands[4]) ? TARGET_GLC_NAME : "";
 
     static char buf[200];
     if (AS_FLAT_P (as))
@@ -1175,6 +1192,7 @@
     return buf;
   }
   [(set_attr "type" "flat")
+   (set_attr "flatmemaccess" "load")
    (set_attr "length" "12")
    (set_attr "cdna" "*,cdna2,*,cdna2")
    (set_attr "xnack" "off,off,on,on")])
@@ -1221,7 +1239,7 @@
     && (((unsigned HOST_WIDE_INT)INTVAL(operands[3]) + 0x1000) < 0x2000))"
   {
     addr_space_t as = INTVAL (operands[4]);
-    const char *glc = INTVAL (operands[5]) ? " glc" : "";
+    const char *glc = INTVAL (operands[5]) ? TARGET_GLC_NAME : "";
 
     static char buf[200];
     if (AS_GLOBAL_P (as))
@@ -1233,6 +1251,7 @@
     return buf;
   }
   [(set_attr "type" "flat")
+   (set_attr "flatmemaccess" "load")
    (set_attr "length" "12")
    (set_attr "cdna" "*,cdna2,*,cdna2")
    (set_attr "xnack" "off,off,on,on")])
@@ -1259,6 +1278,23 @@
     DONE;
   })
 
+(define_expand "scatter_store<mode><vndi>"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:<VnDI> 1 "register_operand")
+   (match_operand 2 "immediate_operand")
+   (match_operand:SI 3 "gcn_alu_operand")
+   (match_operand:V_MOV 4 "register_operand")]
+  ""
+  {
+    rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
+					  operands[1], operands[3],
+					  INTVAL (operands[2]), NULL);
+
+    emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4],
+					       const0_rtx, const0_rtx));
+    DONE;
+  })
+
 ; Allow any address expression
 (define_expand "scatter<mode>_expr<exec_scatter>"
   [(set (mem:BLK (scratch))
@@ -1288,7 +1324,7 @@
 	&& (((unsigned HOST_WIDE_INT)INTVAL(operands[1]) + 0x1000) < 0x2000))"
   {
     addr_space_t as = INTVAL (operands[3]);
-    const char *glc = INTVAL (operands[4]) ? " glc" : "";
+    const char *glc = INTVAL (operands[4]) ? TARGET_GLC_NAME : "";
 
     static char buf[200];
     if (AS_FLAT_P (as))
@@ -1301,6 +1337,7 @@
     return buf;
   }
   [(set_attr "type" "flat")
+   (set_attr "flatmemaccess" "store")
    (set_attr "length" "12")
    (set_attr "cdna" "*,cdna2")])
 
@@ -1345,7 +1382,7 @@
     && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
   {
     addr_space_t as = INTVAL (operands[4]);
-    const char *glc = INTVAL (operands[5]) ? " glc" : "";
+    const char *glc = INTVAL (operands[5]) ? TARGET_GLC_NAME : "";
 
     static char buf[200];
     if (AS_GLOBAL_P (as))
@@ -1356,6 +1393,7 @@
     return buf;
   }
   [(set_attr "type" "flat")
+   (set_attr "flatmemaccess" "store")
    (set_attr "length" "12")
    (set_attr "cdna" "*,cdna2")])
 
@@ -1455,28 +1493,26 @@
 ;; }}}
 ;; {{{ ALU special case: add/sub
 
-(define_insn "add<mode>3<exec_clobber>"
+(define_insn "add<mode>3<exec>"
   [(set (match_operand:V_INT_1REG 0 "register_operand")
 	(plus:V_INT_1REG
 	  (match_operand:V_INT_1REG 1 "register_operand")
-	  (match_operand:V_INT_1REG 2 "gcn_alu_operand")))
-   (clobber (reg:DI VCC_REG))]
+	  (match_operand:V_INT_1REG 2 "gcn_alu_operand")))]
   ""
   {@ [cons: =0, %1, 2; attrs: type, length]
-  [v,v,vSvA;vop2,4] v_add_co_u32\t%0, vcc, %2, %1
+  [v,v,vSvA;vop2,4] {v_add_u32|v_add_nc_u32}\t%0, %2, %1
   [v,v,vSvB;vop2,8] ^
   })
 
-(define_insn "add<mode>3_dup<exec_clobber>"
+(define_insn "add<mode>3_dup<exec>"
   [(set (match_operand:V_INT_1REG 0 "register_operand")
 	(plus:V_INT_1REG
 	  (vec_duplicate:V_INT_1REG
 	    (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"))
-	  (match_operand:V_INT_1REG 1 "register_operand")))
-   (clobber (reg:DI VCC_REG))]
+	  (match_operand:V_INT_1REG 1 "register_operand")))]
   ""
   {@ [cons: =0, 1, 2; attrs: type, length]
-  [v,v,SvA;vop2,4] v_add_co_u32\t%0, vcc, %2, %1
+  [v,v,SvA;vop2,4] {v_add_u32|v_add_nc_u32}\t%0, %2, %1
   [v,v,SvB;vop2,8] ^
   })
 
@@ -1503,16 +1539,16 @@
 	(plus:V_SI
 	  (vec_duplicate:V_SI
 	    (match_operand:SI 1 "gcn_alu_operand"))
-	  (match_operand:V_SI 2 "register_operand")))
+	  (match_operand:V_SI 2 "gcn_alu_operand")))
    (set (match_operand:DI 3 "register_operand")
-	(ltu:DI (plus:V_SI (vec_duplicate:V_SI (match_dup 2))
-			   (match_dup 1))
-		(vec_duplicate:V_SI (match_dup 2))))]
+	(ltu:DI (plus:V_SI (vec_duplicate:V_SI (match_dup 1))
+			   (match_dup 2))
+		(match_dup 2)))]
   ""
   {@ [cons: =0, 1, 2, =3; attrs: type, length]
-  [v,SvA,v,cV;vop2 ,4] v_add_co_u32\t%0, %3, %1, %2
-  [v,SvB,v,cV;vop2 ,8] ^
-  [v,SvA,v,Sg;vop3b,8] ^
+  [v,SvA,vA,cV;vop2 ,4] v_add_co_u32\t%0, %3, %1, %2
+  [v,SvB,vA,cV;vop2 ,8] ^
+  [v,SvA,vA,Sg;vop3b,8] ^
   })
 
 ; v_addc does not accept an SGPR because the VCC read already counts as an
@@ -1551,16 +1587,15 @@
   [(set_attr "type" "vop2,vop3b")
    (set_attr "length" "4,8")])
 
-(define_insn "sub<mode>3<exec_clobber>"
+(define_insn "sub<mode>3<exec>"
   [(set (match_operand:V_INT_1REG 0 "register_operand"  "=  v,   v")
 	(minus:V_INT_1REG
 	  (match_operand:V_INT_1REG 1 "gcn_alu_operand" "vSvB,   v")
-	  (match_operand:V_INT_1REG 2 "gcn_alu_operand" "   v,vSvB")))
-   (clobber (reg:DI VCC_REG))]
+	  (match_operand:V_INT_1REG 2 "gcn_alu_operand" "   v,vSvB")))]
   ""
   "@
-   v_sub_co_u32\t%0, vcc, %1, %2
-   v_subrev_co_u32\t%0, vcc, %2, %1"
+   {v_sub_u32|v_sub_nc_u32}\t%0, %1, %2
+   {v_subrev_u32|v_subrev_nc_u32}\t%0, %2, %1"
   [(set_attr "type" "vop2")
    (set_attr "length" "8,8")])
 
@@ -1648,6 +1683,39 @@
   [(set_attr "type" "vmult")
    (set_attr "length" "8")])
 
+(define_insn_and_split "add<mode>3_dup"
+  [(set (match_operand:V_DI 0 "register_operand" "= v")
+	(plus:V_DI
+	  (vec_duplicate:V_DI
+	    (match_operand:DI 1 "register_operand"   "SvB"))
+	  (match_operand:V_DI 2 "gcn_alu_operand"    "vDb")))
+   (clobber (reg:DI VCC_REG))
+   (clobber (match_scratch:<VnSI> 3 "=&v"))]
+  ""
+  "#"
+  "gcn_can_split_p  (<MODE>mode, operands[0])
+   && gcn_can_split_p (<MODE>mode, operands[1])
+   && gcn_can_split_p (<MODE>mode, operands[2])"
+  [(const_int 0)]
+  {
+    rtx vcc = gen_rtx_REG (DImode, VCC_REG);
+    emit_insn (gen_add<vnsi>3_vcc_dup
+		(gcn_operand_part (<MODE>mode, operands[0], 0),
+		 gcn_operand_part (DImode, operands[1], 0),
+		 gcn_operand_part (<MODE>mode, operands[2], 0),
+		 vcc));
+    emit_insn (gen_vec_duplicate<vnsi> (operands[3],
+				  gcn_operand_part (DImode, operands[1], 1)));
+    emit_insn (gen_addc<vnsi>3
+		(gcn_operand_part (<MODE>mode, operands[0], 1),
+		 operands[3],
+		 gcn_operand_part (<MODE>mode, operands[2], 1),
+		 vcc, vcc));
+    DONE;
+  }
+  [(set_attr "type" "vmult")
+   (set_attr "length" "8")])
+
 (define_insn_and_split "add<mode>3_exec"
   [(set (match_operand:V_DI 0 "register_operand"		 "=  v")
 	(vec_merge:V_DI
@@ -1685,6 +1753,49 @@
   [(set_attr "type" "vmult")
    (set_attr "length" "8")])
 
+(define_insn_and_split "add<mode>3_dup_exec"
+  [(set (match_operand:V_DI 0 "register_operand"		     "= v")
+	(vec_merge:V_DI
+	  (plus:V_DI
+	    (vec_duplicate:V_DI
+	      (match_operand:DI 1 "register_operand"		     "SvB"))
+	    (match_operand:V_DI 2 "gcn_alu_operand"		         "vDb"))
+	  (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
+	  (match_operand:DI 4 "gcn_exec_reg_operand"		     "  e")))
+   (clobber (reg:DI VCC_REG))
+   (clobber (match_scratch:<VnSI> 5 "=&v"))]
+  ""
+  "#"
+  "gcn_can_split_p  (<MODE>mode, operands[0])
+   && gcn_can_split_p (<MODE>mode, operands[1])
+   && gcn_can_split_p (<MODE>mode, operands[2])
+   && gcn_can_split_p (<MODE>mode, operands[4])"
+  [(const_int 0)]
+  {
+    rtx vcc = gen_rtx_REG (DImode, VCC_REG);
+    emit_insn (gen_add<vnsi>3_vcc_dup_exec
+		(gcn_operand_part (<MODE>mode, operands[0], 0),
+		 gcn_operand_part (DImode, operands[1], 0),
+		 gcn_operand_part (<MODE>mode, operands[2], 0),
+		 vcc,
+		 gcn_operand_part (<MODE>mode, operands[3], 0),
+		 operands[4]));
+    emit_insn (gen_vec_duplicate<vnsi>_exec (operands[5],
+				gcn_operand_part (DImode, operands[1], 1),
+				gcn_gen_undef (<VnSI>mode),
+				operands[4]));
+    emit_insn (gen_addc<vnsi>3_exec
+		(gcn_operand_part (<MODE>mode, operands[0], 1),
+		 operands[5],
+		 gcn_operand_part (<MODE>mode, operands[2], 1),
+		 vcc, vcc,
+		 gcn_operand_part (<MODE>mode, operands[3], 1),
+		 operands[4]));
+    DONE;
+  }
+  [(set_attr "type" "vmult")
+   (set_attr "length" "8")])
+
 (define_insn_and_split "sub<mode>3"
   [(set (match_operand:V_DI 0 "register_operand"  "= v,  v")
 	(minus:V_DI                                        
@@ -1827,7 +1938,7 @@
 	(ltu:DI (plus:V_DI 
 		  (zero_extend:V_DI (vec_duplicate:<VnSI> (match_dup 1)))
 		  (match_dup 2))
-		(match_dup 1)))]
+		(match_dup 2)))]
   ""
   {@ [cons: =0, 1, 2, =3]
   [v,ASv,v,&Sg] #
@@ -1878,7 +1989,7 @@
 	  (ltu:DI (plus:V_DI 
 		    (zero_extend:V_DI (vec_duplicate:<VnSI> (match_dup 1)))
 		    (match_dup 2))
-		  (match_dup 1))
+		  (match_dup 2))
 	  (match_dup 5)))]
   ""
   {@ [cons: =0, 1, 2, =3, 4, 5]
@@ -1932,7 +2043,7 @@
 	(ltu:DI (plus:V_DI 
 		  (zero_extend:V_DI (match_dup 1))
 		  (vec_duplicate:V_DI (match_dup 2)))
-		(match_dup 1)))]
+		(vec_duplicate:V_DI (match_dup 2))))]
   ""
   {@ [cons: =0, 1, 2, =3]
   [v,v,DbSv,&cV] #
@@ -1981,7 +2092,7 @@
 	  (ltu:DI (plus:V_DI 
 		    (zero_extend:V_DI (match_dup 1))
 		    (vec_duplicate:V_DI (match_dup 2)))
-		  (match_dup 1))
+		  (vec_duplicate:V_DI (match_dup 2)))
 	  (match_dup 5)))]
   ""
   {@ [cons: =0, 1, 2, =3, 4, 5]
@@ -2190,6 +2301,22 @@
   [(set_attr "type" "vop3a")
    (set_attr "length" "8")])
 
+(define_insn "<su>mul<mode>3_highpart_dup<exec>"
+  [(set (match_operand:V_SI 0 "register_operand" "= v")
+	(truncate:V_SI
+	  (lshiftrt:<VnDI>
+	    (mult:<VnDI>
+	      (any_extend:<VnDI>
+		(vec_duplicate:V_SI
+		  (match_operand:SI 1 "gcn_alu_operand"  "SvA")))
+	      (any_extend:<VnDI>
+		(match_operand:V_SI 2 "gcn_alu_operand"  " vA")))
+	    (const_int 32))))]
+  ""
+  "v_mul_hi<sgnsuffix>0\t%0, %2, %1"
+  [(set_attr "type" "vop3a")
+   (set_attr "length" "8")])
+
 (define_insn "mul<mode>3<exec>"
   [(set (match_operand:V_INT_1REG 0 "register_operand"  "=   v")
 	(mult:V_INT_1REG
@@ -2201,11 +2328,11 @@
    (set_attr "length" "8")])
 
 (define_insn "mul<mode>3_dup<exec>"
-  [(set (match_operand:V_INT_1REG 0 "register_operand"	     "=   v")
+  [(set (match_operand:V_INT_1REG 0 "register_operand"	 "= v")
 	(mult:V_INT_1REG
-	  (match_operand:V_INT_1REG 1 "gcn_alu_operand"	     "%vSvA")
 	  (vec_duplicate:V_INT_1REG
-	    (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" "  SvA"))))]
+	    (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvA"))
+	  (match_operand:V_INT_1REG 2 "gcn_alu_operand"	     " vA")))]
   ""
   "v_mul_lo_u32\t%0, %1, %2"
   [(set_attr "type" "vop3a")
@@ -2241,6 +2368,37 @@
     DONE;
   })
 
+(define_insn_and_split "mul<mode>3_dup"
+  [(set (match_operand:V_DI 0 "register_operand" "=&v")
+	(mult:V_DI
+	  (vec_duplicate:V_DI
+	    (match_operand:DI 1 "gcn_alu_operand"    " Sv"))
+	  (match_operand:V_DI 2 "gcn_alu_operand"    "vDA")))
+   (clobber (match_scratch:<VnSI> 3 "=&v"))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+  {
+    rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
+    rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
+    rtx left_lo = gcn_operand_part (DImode, operands[1], 0);
+    rtx left_hi = gcn_operand_part (DImode, operands[1], 1);
+    rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
+    rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
+    rtx tmp = operands[3];
+
+    emit_insn (gen_mul<vnsi>3_dup (out_lo, left_lo, right_lo));
+    emit_insn (gen_umul<vnsi>3_highpart_dup (out_hi, left_lo, right_lo));
+    emit_insn (gen_mul<vnsi>3_dup (tmp, left_hi, right_lo));
+    emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
+    emit_insn (gen_mul<vnsi>3_dup (tmp, left_lo, right_hi));
+    emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
+    emit_insn (gen_mul<vnsi>3_dup (tmp, left_hi, right_hi));
+    emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
+    DONE;
+  })
+
 (define_insn_and_split "mul<mode>3_exec"
   [(set (match_operand:V_DI 0 "register_operand"		 "=&v")
 	(vec_merge:V_DI
@@ -2289,6 +2447,56 @@
     DONE;
   })
 
+(define_insn_and_split "mul<mode>3_dup_exec"
+  [(set (match_operand:V_DI 0 "register_operand"		 "=&v")
+	(vec_merge:V_DI
+	  (mult:V_DI
+	    (vec_duplicate:V_DI
+	      (match_operand:DI 1 "gcn_alu_operand"		 " Sv"))
+	    (match_operand:V_DI 2 "gcn_alu_operand"		 "vDA"))
+	  (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
+	  (match_operand:DI 4 "gcn_exec_reg_operand"		 "  e")))
+   (clobber (match_scratch:<VnSI> 5 "=&v"))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+  {
+    rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
+    rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
+    rtx left_lo = gcn_operand_part (DImode, operands[1], 0);
+    rtx left_hi = gcn_operand_part (DImode, operands[1], 1);
+    rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
+    rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
+    rtx exec = operands[4];
+    rtx tmp = operands[5];
+
+    rtx old_lo, old_hi;
+    if (GET_CODE (operands[3]) == UNSPEC)
+      {
+	old_lo = old_hi = gcn_gen_undef (<VnSI>mode);
+      }
+    else
+      {
+	old_lo = gcn_operand_part (<MODE>mode, operands[3], 0);
+	old_hi = gcn_operand_part (<MODE>mode, operands[3], 1);
+      }
+
+    rtx undef = gcn_gen_undef (<VnSI>mode);
+
+    emit_insn (gen_mul<vnsi>3_dup_exec (out_lo, left_lo, right_lo, old_lo,
+					exec));
+    emit_insn (gen_umul<vnsi>3_highpart_dup_exec (out_hi, left_lo, right_lo,
+						  old_hi, exec));
+    emit_insn (gen_mul<vnsi>3_dup_exec (tmp, left_hi, right_lo, undef, exec));
+    emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
+    emit_insn (gen_mul<vnsi>3_dup_exec (tmp, left_lo, right_hi, undef, exec));
+    emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
+    emit_insn (gen_mul<vnsi>3_dup_exec (tmp, left_hi, right_hi, undef, exec));
+    emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
+    DONE;
+  })
+
 (define_insn_and_split "mul<mode>3_zext"
   [(set (match_operand:V_DI 0 "register_operand"      "=&v")
 	(mult:V_DI
@@ -3056,7 +3264,8 @@
   "flag_unsafe_math_optimizations"
   "v_sqrt%i0\t%0, %1"
   [(set_attr "type" "vop1")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "transop" "yes")])
 
 (define_insn "sqrt<mode>2"
   [(set (match_operand:FP 0 "register_operand"  "=  v")
@@ -3065,7 +3274,8 @@
   "flag_unsafe_math_optimizations"
   "v_sqrt%i0\t%0, %1"
   [(set_attr "type" "vop1")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "transop" "yes")])
 
 ; These FP unops have f64, f32 and f16 versions.
 (define_int_iterator MATH_UNOP_1OR2REG
@@ -3355,7 +3565,8 @@
   ""
   "v_rcp%i0\t%0, %1"
   [(set_attr "type" "vop1")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "transop" "yes")])
 
 ;; v_div_scale takes a numerator (op2) and denominator (op1) and returns the
 ;; one that matches op3 adjusted for best results in reciprocal division.
@@ -3727,6 +3938,7 @@
    v_cmpx%E1\t%2, %3
    v_cmpx%E1\t%2, %3"
   [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a,vopc,vopc")
+   (set_attr "vcmp" "vcmp,vcmp,vcmpx,vcmpx,vcmp,vcmp,vcmpx,vcmpx")
    (set_attr "length" "4,8,4,8,8,8,4,8")
    (set_attr "rdna" "*,*,no,no,*,*,yes,yes")])
 
@@ -3781,6 +3993,7 @@
    v_cmpx%E1\t%2, %3
    v_cmpx%E1\t%2, %3"
   [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a,vopc,vopc")
+   (set_attr "vcmp" "vcmp,vcmp,vcmpx,vcmpx,vcmp,vcmp,vcmpx,vcmpx")
    (set_attr "length" "4,8,4,8,8,8,4,8")
    (set_attr "rdna" "*,*,no,no,*,*,yes,yes")])
 
@@ -3795,9 +4008,9 @@
     /* Unsigned comparisons use the same patterns as signed comparisons,
        except that they use unsigned operators (e.g. LTU vs LT).
        The '%E1' directive then does the Right Thing.  */
-    emit_insn (gen_vec_cmpu<mode>di_exec (operands[0], operands[1],
-					  operands[2], operands[3],
-					  operands[4]));
+    emit_insn (gen_vec_cmp<mode>di_exec (operands[0], operands[1],
+					 operands[2], operands[3],
+					 operands[4]));
     DONE;
   })
 
@@ -3839,6 +4052,7 @@
    v_cmpx%E1\t%2, %3
    v_cmpx%E1\t%2, %3"
   [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vopc,vopc")
+   (set_attr "vcmp" "vcmp,vcmp,vcmpx,vcmpx,vcmp,vcmpx,vcmpx")
    (set_attr "length" "4,8,4,8,8,4,8")
    (set_attr "rdna" "*,*,no,no,*,yes,yes")])
 
@@ -3862,6 +4076,7 @@
    v_cmpx%E1\t%2, %3
    v_cmpx%E1\t%2, %3"
   [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vopc,vopc")
+   (set_attr "vcmp" "vcmp,vcmp,vcmpx,vcmpx,vcmp,vcmpx,vcmpx")
    (set_attr "length" "4,8,4,8,8,4,8")
    (set_attr "rdna" "*,*,no,no,*,yes,yes")])
 
@@ -4052,6 +4267,32 @@
     DONE;
   })
 
+(define_expand "mask_gather_load<mode><vndi>"
+  [(set:V_MOV (match_operand:V_MOV 0 "register_operand")
+	      (unspec:V_MOV
+		[(match_operand:DI 1 "register_operand")
+		 (match_operand:<VnDI> 2 "register_operand")
+		 (match_operand 3 "immediate_operand")
+		 (match_operand:SI 4 "gcn_alu_operand")
+		 (match_operand:DI 5 "")
+		 (match_operand:V_MOV 6 "maskload_else_operand")]
+		UNSPEC_GATHER))]
+  ""
+  {
+    rtx exec = force_reg (DImode, operands[5]);
+
+    rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
+					  operands[2], operands[4],
+					  INTVAL (operands[3]), exec);
+
+    emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr,
+						   const0_rtx, const0_rtx,
+						   const0_rtx,
+						   gcn_gen_undef (<MODE>mode),
+						   exec));
+    DONE;
+  })
+
 (define_expand "mask_scatter_store<mode><vnsi>"
   [(match_operand:DI 0 "register_operand")
    (match_operand:<VnSI> 1 "register_operand")
@@ -4080,6 +4321,27 @@
     DONE;
   })
 
+(define_expand "mask_scatter_store<mode><vndi>"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:<VnDI> 1 "register_operand")
+   (match_operand 2 "immediate_operand")
+   (match_operand:SI 3 "gcn_alu_operand")
+   (match_operand:V_MOV 4 "register_operand")
+   (match_operand:DI 5 "")]
+  ""
+  {
+    rtx exec = force_reg (DImode, operands[5]);
+
+    rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
+					  operands[1], operands[3],
+					  INTVAL (operands[2]), exec);
+
+    emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx,
+						    operands[4], const0_rtx,
+						    const0_rtx, exec));
+    DONE;
+  })
+
 (define_code_iterator cond_op [plus minus mult])
 
 (define_expand "cond_<expander><mode>"
@@ -4400,7 +4662,7 @@
     rtx tmp = gen_reg_rtx (<MODE>mode);
     rtx v1 = gen_rtx_REG (<MODE>mode, VGPR_REGNO (1));
 
-    emit_insn (gen_mul<mode>3_dup (tmp, v1, operands[2]));
+    emit_insn (gen_mul<mode>3_dup (tmp, operands[2], v1));
     emit_insn (gen_add<mode>3_dup (operands[0], tmp, operands[1]));
     DONE;
   })
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index d59e87b..5ffeb23 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -54,6 +54,7 @@
 #include "gimple.h"
 #include "cgraph.h"
 #include "case-cfn-macros.h"
+#include "opts.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -183,6 +184,11 @@ gcn_option_override (void)
 
   if (flag_sram_ecc == HSACO_ATTR_DEFAULT)
     flag_sram_ecc = gcn_devices[gcn_arch].sramecc_default;
+
+  /* TODO: This seems to produce tighter loops, but the testsuites expects it
+     to be set to '2', so I'll leave it default for now.
+  SET_OPTION_IF_UNSET (&global_options, &global_options_set,
+		       param_vect_partial_vector_usage, 1);  */
 }
 
 /* }}}  */
@@ -585,9 +591,8 @@ gcn_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
     case XNACK_MASK_HI_REG:
     case TBA_HI_REG:
     case TMA_HI_REG:
-      return mode == SImode;
     case VCC_HI_REG:
-      return false;
+      return mode == SImode;
     case EXEC_HI_REG:
       return mode == SImode /*|| mode == V32BImode */ ;
     case SCC_REG:
@@ -1276,13 +1281,13 @@ gen_##PREFIX##vN##SUFFIX (PARAMS) \
 }
 
 #define GEN_VNM_NOEXEC(PREFIX, SUFFIX, PARAMS, ARGS) \
-GEN_VN_NOEXEC (PREFIX, qi##SUFFIX, A(PARAMS), A(ARGS)) \
-GEN_VN_NOEXEC (PREFIX, hi##SUFFIX, A(PARAMS), A(ARGS)) \
-GEN_VN_NOEXEC (PREFIX, hf##SUFFIX, A(PARAMS), A(ARGS)) \
+USE_QHF (GEN_VN_NOEXEC (PREFIX, qi##SUFFIX, A(PARAMS), A(ARGS))) \
+USE_QHF (GEN_VN_NOEXEC (PREFIX, hi##SUFFIX, A(PARAMS), A(ARGS))) \
+USE_QHF (GEN_VN_NOEXEC (PREFIX, hf##SUFFIX, A(PARAMS), A(ARGS))) \
 GEN_VN_NOEXEC (PREFIX, si##SUFFIX, A(PARAMS), A(ARGS)) \
-GEN_VN_NOEXEC (PREFIX, sf##SUFFIX, A(PARAMS), A(ARGS)) \
+USE_QHF (GEN_VN_NOEXEC (PREFIX, sf##SUFFIX, A(PARAMS), A(ARGS))) \
 GEN_VN_NOEXEC (PREFIX, di##SUFFIX, A(PARAMS), A(ARGS)) \
-GEN_VN_NOEXEC (PREFIX, df##SUFFIX, A(PARAMS), A(ARGS)) \
+USE_QHF (GEN_VN_NOEXEC (PREFIX, df##SUFFIX, A(PARAMS), A(ARGS))) \
 static rtx \
 gen_##PREFIX##vNm##SUFFIX (PARAMS) \
 { \
@@ -1290,13 +1295,13 @@ gen_##PREFIX##vNm##SUFFIX (PARAMS) \
   \
   switch (mode) \
     { \
-    case E_QImode: return gen_##PREFIX##vNqi##SUFFIX (ARGS); \
-    case E_HImode: return gen_##PREFIX##vNhi##SUFFIX (ARGS); \
-    case E_HFmode: return gen_##PREFIX##vNhf##SUFFIX (ARGS); \
+    USE_QHF (case E_QImode: return gen_##PREFIX##vNqi##SUFFIX (ARGS);) \
+    USE_QHF (case E_HImode: return gen_##PREFIX##vNhi##SUFFIX (ARGS);) \
+    USE_QHF (case E_HFmode: return gen_##PREFIX##vNhf##SUFFIX (ARGS);) \
     case E_SImode: return gen_##PREFIX##vNsi##SUFFIX (ARGS); \
-    case E_SFmode: return gen_##PREFIX##vNsf##SUFFIX (ARGS); \
+    USE_QHF (case E_SFmode: return gen_##PREFIX##vNsf##SUFFIX (ARGS);) \
     case E_DImode: return gen_##PREFIX##vNdi##SUFFIX (ARGS); \
-    case E_DFmode: return gen_##PREFIX##vNdf##SUFFIX (ARGS); \
+    USE_QHF (case E_DFmode: return gen_##PREFIX##vNdf##SUFFIX (ARGS);) \
     default: \
       break; \
     } \
@@ -1341,13 +1346,13 @@ gen_##PREFIX##vN##SUFFIX (PARAMS, rtx merge_src=NULL, rtx exec=NULL) \
 }
 
 #define GEN_VNM(PREFIX, SUFFIX, PARAMS, ARGS) \
-GEN_VN (PREFIX, qi##SUFFIX, A(PARAMS), A(ARGS)) \
-GEN_VN (PREFIX, hi##SUFFIX, A(PARAMS), A(ARGS)) \
-GEN_VN (PREFIX, hf##SUFFIX, A(PARAMS), A(ARGS)) \
+USE_QHF (GEN_VN (PREFIX, qi##SUFFIX, A(PARAMS), A(ARGS))) \
+USE_QHF (GEN_VN (PREFIX, hi##SUFFIX, A(PARAMS), A(ARGS))) \
+USE_QHF (GEN_VN (PREFIX, hf##SUFFIX, A(PARAMS), A(ARGS))) \
 GEN_VN (PREFIX, si##SUFFIX, A(PARAMS), A(ARGS)) \
-GEN_VN (PREFIX, sf##SUFFIX, A(PARAMS), A(ARGS)) \
+USE_QHF (GEN_VN (PREFIX, sf##SUFFIX, A(PARAMS), A(ARGS))) \
 GEN_VN (PREFIX, di##SUFFIX, A(PARAMS), A(ARGS)) \
-GEN_VN (PREFIX, df##SUFFIX, A(PARAMS), A(ARGS)) \
+USE_QHF (GEN_VN (PREFIX, df##SUFFIX, A(PARAMS), A(ARGS))) \
 USE_TI (GEN_VN (PREFIX, ti##SUFFIX, A(PARAMS), A(ARGS))) \
 static rtx \
 gen_##PREFIX##vNm##SUFFIX (PARAMS, rtx merge_src=NULL, rtx exec=NULL) \
@@ -1356,15 +1361,22 @@ gen_##PREFIX##vNm##SUFFIX (PARAMS, rtx merge_src=NULL, rtx exec=NULL) \
   \
   switch (mode) \
     { \
-    case E_QImode: return gen_##PREFIX##vNqi##SUFFIX (ARGS, merge_src, exec); \
-    case E_HImode: return gen_##PREFIX##vNhi##SUFFIX (ARGS, merge_src, exec); \
-    case E_HFmode: return gen_##PREFIX##vNhf##SUFFIX (ARGS, merge_src, exec); \
-    case E_SImode: return gen_##PREFIX##vNsi##SUFFIX (ARGS, merge_src, exec); \
-    case E_SFmode: return gen_##PREFIX##vNsf##SUFFIX (ARGS, merge_src, exec); \
-    case E_DImode: return gen_##PREFIX##vNdi##SUFFIX (ARGS, merge_src, exec); \
-    case E_DFmode: return gen_##PREFIX##vNdf##SUFFIX (ARGS, merge_src, exec); \
-    case E_TImode: \
-	USE_TI (return gen_##PREFIX##vNti##SUFFIX (ARGS, merge_src, exec);) \
+    USE_QHF (case E_QImode: \
+	return gen_##PREFIX##vNqi##SUFFIX (ARGS, merge_src, exec);) \
+    USE_QHF (case E_HImode: \
+	return gen_##PREFIX##vNhi##SUFFIX (ARGS, merge_src, exec);) \
+    USE_QHF (case E_HFmode: \
+	return gen_##PREFIX##vNhf##SUFFIX (ARGS, merge_src, exec);) \
+    case E_SImode: \
+	return gen_##PREFIX##vNsi##SUFFIX (ARGS, merge_src, exec); \
+    USE_QHF (case E_SFmode: \
+	return gen_##PREFIX##vNsf##SUFFIX (ARGS, merge_src, exec);) \
+    case E_DImode: \
+	return gen_##PREFIX##vNdi##SUFFIX (ARGS, merge_src, exec); \
+    USE_QHF (case E_DFmode: \
+	return gen_##PREFIX##vNdf##SUFFIX (ARGS, merge_src, exec);) \
+    USE_TI (case E_TImode: \
+	return gen_##PREFIX##vNti##SUFFIX (ARGS, merge_src, exec);) \
     default: \
       break; \
     } \
@@ -1373,7 +1385,8 @@ gen_##PREFIX##vNm##SUFFIX (PARAMS, rtx merge_src=NULL, rtx exec=NULL) \
   return NULL_RTX; \
 }
 
-/* These have TImode support.  */
+/* These support everything.  */
+#define USE_QHF(ARGS) ARGS
 #define USE_TI(ARGS) ARGS
 GEN_VNM (mov,, A(rtx dest, rtx src), A(dest, src))
 GEN_VNM (vec_duplicate,, A(rtx dest, rtx src), A(dest, src))
@@ -1383,6 +1396,7 @@ GEN_VNM (vec_duplicate,, A(rtx dest, rtx src), A(dest, src))
 #define USE_TI(ARGS)
 GEN_VNM (add,3, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
 GEN_VN (add,si3_dup, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
+GEN_VN (add,di3_dup, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
 GEN_VN (add,si3_vcc_dup, A(rtx dest, rtx src1, rtx src2, rtx vcc),
 	A(dest, src1, src2, vcc))
 GEN_VN (add,di3_sext_dup2, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
@@ -1394,15 +1408,20 @@ GEN_VN (add,di3_vcc_zext_dup2, A(rtx dest, rtx src1, rtx src2, rtx vcc),
 GEN_VN (addc,si3, A(rtx dest, rtx src1, rtx src2, rtx vccout, rtx vccin),
 	A(dest, src1, src2, vccout, vccin))
 GEN_VN (and,si3, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
-GEN_VN (ashl,si3, A(rtx dest, rtx src, rtx shift), A(dest, src, shift))
 GEN_VNM_NOEXEC (ds_bpermute,, A(rtx dest, rtx addr, rtx src, rtx exec),
 		A(dest, addr, src, exec))
 GEN_VNM (gather,_expr, A(rtx dest, rtx addr, rtx as, rtx vol),
 	 A(dest, addr, as, vol))
-GEN_VN (mul,si3_dup, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
 GEN_VN (sub,si3, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
 GEN_VN_NOEXEC (vec_series,si, A(rtx dest, rtx x, rtx c), A(dest, x, c))
 
+/* These do not have QI, HI, or any FP support.  */
+#undef USE_QHF
+#define USE_QHF(ARGS)
+GEN_VNM (ashl,3, A(rtx dest, rtx src, rtx shift), A(dest, src, shift))
+GEN_VNM (mul,3_dup, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
+
+#undef USE_QHF
 #undef USE_TI
 #undef GEN_VNM
 #undef GEN_VN
@@ -1996,8 +2015,8 @@ gcn_expand_vector_init (rtx op0, rtx vec)
   rtx addr = gen_reg_rtx (addrmode);
 
   int unit_size = GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op0)));
-  emit_insn (gen_mulvNsi3_dup (ramp, gen_rtx_REG (offsetmode, VGPR_REGNO (1)),
-			       GEN_INT (unit_size)));
+  emit_insn (gen_mulvNsi3_dup (ramp, GEN_INT (unit_size),
+			       gen_rtx_REG (offsetmode, VGPR_REGNO (1))));
 
   bool simple_repeat = true;
 
@@ -2294,36 +2313,46 @@ gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem,
 
    Return values.
      ADDR_SPACE_FLAT   - return VnDImode vector of absolute addresses.
-     ADDR_SPACE_GLOBAL - return VnSImode vector of offsets.  */
+     ADDR_SPACE_GLOBAL - return VnSImode vector of offsets.
+     64-bit offsets    - return VnDImode vector of absolute addresses. */
 
 rtx
 gcn_expand_scaled_offsets (addr_space_t as, rtx base, rtx offsets, rtx scale,
 			   bool unsigned_p, rtx exec)
 {
   int vf = GET_MODE_NUNITS (GET_MODE (offsets));
-  rtx tmpsi = gen_reg_rtx (VnMODE (vf, SImode));
-  rtx tmpdi = gen_reg_rtx (VnMODE (vf, DImode));
+  rtx scaled_offsets = gen_reg_rtx (GET_MODE (offsets));
+  rtx abs_addr = gen_reg_rtx (VnMODE (vf, DImode));
+  bool use_di = GET_MODE_INNER (GET_MODE (scaled_offsets)) == DImode;
 
   if (CONST_INT_P (scale)
       && INTVAL (scale) > 0
       && exact_log2 (INTVAL (scale)) >= 0)
-    emit_insn (gen_ashlvNsi3 (tmpsi, offsets,
-			      GEN_INT (exact_log2 (INTVAL (scale))),
-			      NULL, exec));
+    emit_insn (gen_ashlvNm3 (scaled_offsets, offsets,
+			     GEN_INT (exact_log2 (INTVAL (scale))),
+			     NULL, exec));
   else
-     emit_insn (gen_mulvNsi3_dup (tmpsi, offsets, scale, NULL, exec));
+     emit_insn (gen_mulvNm3_dup (scaled_offsets, scale, offsets, NULL, exec));
 
+  /* No instructions support DImode offsets.  */
+  if (use_di)
+    {
+      emit_insn (gen_addvNdi3_dup (abs_addr, base, scaled_offsets, NULL, exec));
+      return abs_addr;
+    }
   /* "Global" instructions do not support negative register offsets.  */
-  if (as == ADDR_SPACE_FLAT || !unsigned_p)
+  else if (as == ADDR_SPACE_FLAT || !unsigned_p)
     {
       if (unsigned_p)
-	 emit_insn (gen_addvNdi3_zext_dup2 (tmpdi, tmpsi, base, NULL, exec));
+	emit_insn (gen_addvNdi3_zext_dup2 (abs_addr, scaled_offsets, base,
+					   NULL, exec));
       else
-	 emit_insn (gen_addvNdi3_sext_dup2 (tmpdi, tmpsi, base, NULL, exec));
-      return tmpdi;
+	emit_insn (gen_addvNdi3_sext_dup2 (abs_addr, scaled_offsets, base,
+					   NULL, exec));
+      return abs_addr;
     }
   else if (as == ADDR_SPACE_GLOBAL)
-    return tmpsi;
+    return scaled_offsets;
 
   gcc_unreachable ();
 }
@@ -3096,8 +3125,7 @@ move_callee_saved_registers (rtx sp, machine_function *offsets,
 	saved_scalars++;
       }
 
-  rtx move_scalars = get_insns ();
-  end_sequence ();
+  rtx move_scalars = end_sequence ();
   start_sequence ();
 
   /* Ensure that all vector lanes are moved.  */
@@ -3232,8 +3260,7 @@ move_callee_saved_registers (rtx sp, machine_function *offsets,
 	offset += size;
       }
 
-  rtx move_vectors = get_insns ();
-  end_sequence ();
+  rtx move_vectors = end_sequence ();
 
   if (prologue)
     {
@@ -3360,8 +3387,7 @@ gcn_expand_prologue ()
 						 + offsets->callee_saves))));
 	}
 
-      rtx_insn *seq = get_insns ();
-      end_sequence ();
+      rtx_insn *seq = end_sequence ();
 
       emit_insn (seq);
     }
@@ -5319,8 +5345,12 @@ gcn_preferred_vector_alignment (const_tree type)
 static bool
 gcn_vectorize_support_vector_misalignment (machine_mode ARG_UNUSED (mode),
 					   const_tree type, int misalignment,
-					   bool is_packed)
+					   bool is_packed,
+					   bool is_gather_scatter)
 {
+  if (is_gather_scatter)
+    return true;
+
   if (is_packed)
     return false;
 
@@ -5765,6 +5795,16 @@ gcn_libc_has_function (enum function_class fn_class,
   return bsd_libc_has_function (fn_class, type);
 }
 
+/* Implement TARGET_VECTORIZE_PREFER_GATHER_SCATTER. */
+
+static bool
+gcn_prefer_gather_scatter (machine_mode ARG_UNUSED (mode),
+			   int ARG_UNUSED (scale),
+			   unsigned int ARG_UNUSED (group_size))
+{
+  return true;
+}
+
 /* }}}  */
 /* {{{ md_reorg pass.  */
 
@@ -5860,8 +5900,7 @@ gcn_restore_exec (rtx_insn *insn, rtx_insn *last_exec_def, int64_t curr_exec,
 	{
 	  start_sequence ();
 	  emit_move_insn (exec_save_reg, exec_reg);
-	  rtx_insn *seq = get_insns ();
-	  end_sequence ();
+	  rtx_insn *seq = end_sequence ();
 
 	  emit_insn_after (seq, last_exec_def);
 	  if (dump_file && (dump_flags & TDF_DETAILS))
@@ -5877,8 +5916,7 @@ gcn_restore_exec (rtx_insn *insn, rtx_insn *last_exec_def, int64_t curr_exec,
   /* Restore EXEC register before the usage.  */
   start_sequence ();
   emit_move_insn (exec_reg, exec);
-  rtx_insn *seq = get_insns ();
-  end_sequence ();
+  rtx_insn *seq = end_sequence ();
   emit_insn_before (seq, insn);
 
   if (dump_file && (dump_flags & TDF_DETAILS))
@@ -6039,8 +6077,7 @@ gcn_md_reorg (void)
 	    {
 	      start_sequence ();
 	      emit_move_insn (exec_reg, GEN_INT (new_exec));
-	      rtx_insn *seq = get_insns ();
-	      end_sequence ();
+	      rtx_insn *seq = end_sequence ();
 	      emit_insn_before (seq, insn);
 
 	      if (dump_file && (dump_flags & TDF_DETAILS))
@@ -6131,12 +6168,22 @@ gcn_md_reorg (void)
      detects the missed cases, and inserts the documented number of NOPs
      required for correct execution.  */
 
+  /* RDNA4 (not yet implemented) differs from RNDA 2/3/3.5 and requires some
+     s_nop, see 5.7 and esp. 5.7.2. in its ISA manual.
+     The assert here is a reminder to add those.  */
+  STATIC_ASSERT (ISA_CDNA1 - ISA_RDNA3 == 1);
+
+  if (TARGET_NO_MANUAL_NOPS)
+    return;
+
   const int max_waits = 5;
   struct ilist
   {
     rtx_insn *insn;
     attr_unit unit;
-    attr_delayeduse delayeduse;
+    attr_type type;
+    attr_flatmemaccess flatmemaccess;
+    bool delayeduse;
     HARD_REG_SET writes;
     HARD_REG_SET reads;
     int age;
@@ -6157,7 +6204,29 @@ gcn_md_reorg (void)
 
       attr_type itype = get_attr_type (insn);
       attr_unit iunit = get_attr_unit (insn);
-      attr_delayeduse idelayeduse = get_attr_delayeduse (insn);
+      attr_flatmemaccess iflatmemaccess = get_attr_flatmemaccess (insn);
+      bool delayeduse;
+      if (TARGET_CDNA3_NOPS)
+	switch (iflatmemaccess)
+	  {
+	  case FLATMEMACCESS_STORE:
+	  case FLATMEMACCESS_STOREX34:
+	  case FLATMEMACCESS_ATOMIC:
+	  case FLATMEMACCESS_CMPSWAPX2:
+	    delayeduse = true;
+	    break;
+	  case FLATMEMACCESS_LOAD:
+	  case FLATMEMACCESS_ATOMICWAIT:
+	  case FLATMEMACCESS_NO:
+	    delayeduse = false;
+	    break;
+	  default:
+	    gcc_unreachable ();
+	  }
+      else
+	delayeduse = (iflatmemaccess == FLATMEMACCESS_CMPSWAPX2
+		      || iflatmemaccess == FLATMEMACCESS_STOREX34);
+
       int ivccwait = get_attr_vccwait (insn);
       HARD_REG_SET ireads, iwrites;
       CLEAR_HARD_REG_SET (ireads);
@@ -6202,16 +6271,26 @@ gcn_md_reorg (void)
 		   && TEST_HARD_REG_BIT (ireads, VCCZ_REG))))
 	    nops_rqd = 5 - prev_insn->age;
 
-	  /* VALU writes SGPR/VCC followed by v_{read,write}lane using
-	     SGPR/VCC as lane select requires 4 wait states.  */
+	  /* VALU writes SGPR/VCC followed by
+	     - v_{read,write}lane using SGPR/VCC as lane select requires
+	       4 wait states
+	     - [CDNA3] VALU reads SGPR as constant requires 1 wait state
+	     - [CDNA3] VALU reads SGPR as carry-in requires no wait states  */
 	  if ((prev_insn->age + nops_rqd) < 4
 	      && prev_insn->unit == UNIT_VECTOR
-	      && get_attr_laneselect (insn) == LANESELECT_YES
+	      && get_attr_laneselect (insn) != LANESELECT_NO
 	      && (hard_reg_set_intersect_p
 		    (depregs, reg_class_contents[(int) SGPR_REGS])
 		  || hard_reg_set_intersect_p
 		       (depregs, reg_class_contents[(int) VCC_CONDITIONAL_REG])))
 	    nops_rqd = 4 - prev_insn->age;
+	  else if (TARGET_CDNA3_NOPS
+		   && (prev_insn->age + nops_rqd) < 1
+		   && prev_insn->unit == UNIT_VECTOR
+		   && iunit == UNIT_VECTOR
+		   && hard_reg_set_intersect_p
+			(depregs, reg_class_contents[(int) SGPR_REGS]))
+	    nops_rqd = 1 - prev_insn->age;
 
 	  /* VALU writes VGPR followed by VALU_DPP reading that VGPR
 	     requires 2 wait states.  */
@@ -6224,22 +6303,128 @@ gcn_md_reorg (void)
 		nops_rqd = 2 - prev_insn->age;
 	    }
 
+	  /* VALU writes EXEC followed by VALU DPP op requires 5 nop.  */
+	  if ((prev_insn->age + nops_rqd) < 5
+	      && itype == TYPE_VOP_DPP
+	      && prev_insn->unit == UNIT_VECTOR
+	      && TEST_HARD_REG_BIT (prev_insn->writes, EXECZ_REG))
+	    nops_rqd = 5 - prev_insn->age;
+
 	  /* Store that requires input registers are not overwritten by
-	     following instruction.  */
-	  if ((prev_insn->age + nops_rqd) < 1
-	      && prev_insn->delayeduse == DELAYEDUSE_YES
+	     following instruction.
+	     For CDNA3, only, VALU writes require 2 not 1 nop.
+	     CDNA3 additionally requires that 1 or 2 nop for global & scatch
+	     store/atomic.  */
+	  if (TARGET_CDNA3_NOPS
+	      && (prev_insn->age + nops_rqd) < 2
+	      && prev_insn->delayeduse
+	      && iunit == UNIT_VECTOR
+	      && ((hard_reg_set_intersect_p
+		   (prev_insn->reads, iwrites))))
+	    nops_rqd = 2 - prev_insn->age;
+	  else if ((prev_insn->age + nops_rqd) < 1
+	      && prev_insn->delayeduse
 	      && ((hard_reg_set_intersect_p
 		   (prev_insn->reads, iwrites))))
 	    nops_rqd = 1 - prev_insn->age;
 
-	  /* Instruction that requires VCC is not written too close before
-	     using it.  */
+	  /* Instruction (such as v_div_fmas) that requires VCC is not written
+	     too close before using it  */
 	  if (prev_insn->age < ivccwait
 	      && (hard_reg_set_intersect_p
 		  (prev_insn->writes,
 		   reg_class_contents[(int)VCC_CONDITIONAL_REG])))
 	    nops_rqd = ivccwait - prev_insn->age;
 
+	  /* NOTE: The following condition for adding wait state exists, but
+	     GCC does not access the special registers using their SGPR#.
+	     Thus, no action is required here.  The following wait-state
+	     condition exists at least for VEGA/gfx900+ to CDNA3:
+		Mixed use of VCC: alias vs. SGPR# - v_readlane,
+		v_readfirstlane, v_cmp, v_add_*i/u, v_sub_*i/u, v_div_*scale
+		followed by VALU reads VCC as constant requires 1 wait state.
+		(As carry-in, it requires none.)
+		[VCC can be accessed by name or logical SGPR that holds it.]  */
+
+	  /* Testing indicates that CDNA3 requires an s_nop between
+	     e.g. 'v_cmp_eq_u64 vcc, v[4:5], v[8:9]' and 'v_mov_b32 v0, vcc_lo'.
+	     Thus: add it between v_cmp writing VCC and VALU read of VCC.  */
+	  if (TARGET_CDNA3_NOPS
+	      && (prev_insn->age + nops_rqd) < 1
+	      && iunit == UNIT_VECTOR
+	      && (hard_reg_set_intersect_p
+		  (depregs, reg_class_contents[(int)VCC_CONDITIONAL_REG]))
+	      && get_attr_vcmp (prev_insn->insn) == VCMP_VCMP)
+	    nops_rqd = 1 - prev_insn->age;
+
+	  /* CDNA3: VALU writes SGPR/VCC: v_readlane, v_readfirstlane, v_cmp,
+	     v_add_*i/u, v_sub_*i/u, v_div_*scale - followed by:
+	     - VALU reads SGPR as constant requires 1 waite state
+	     - VALU reads SGPR as carry-in requires no waite state
+	     - v_readlane/v_writelane reads SGPR as lane select requires 4 wait
+	       states.  */
+	  if (TARGET_CDNA3_NOPS
+	      && (prev_insn->age + nops_rqd) < 4
+	      && iunit == UNIT_VECTOR
+	      && prev_insn->unit == UNIT_VECTOR
+	      && hard_reg_set_intersect_p
+		   (depregs, reg_class_contents[(int) SGPR_SRC_REGS]))
+	    {
+	      if (get_attr_laneselect (insn) != LANESELECT_NO)
+		nops_rqd = 4 - prev_insn->age;
+	      else if ((prev_insn->age + nops_rqd) < 1)
+		nops_rqd = 1 - prev_insn->age;
+	    }
+
+	  /* CDNA3: v_cmpx followed by
+	     - V_readlane, v_readfirstlane, v_writelane requires 4 wait states
+	     - VALU reads EXEC as constant requires 2 wait states
+	     - other VALU requires no wait state  */
+	  if (TARGET_CDNA3_NOPS
+	      && (prev_insn->age + nops_rqd) < 4
+	      && get_attr_vcmp (prev_insn->insn) == VCMP_VCMPX
+	      && get_attr_laneselect (insn) != LANESELECT_NO)
+	    nops_rqd = 4 - prev_insn->age;
+	  else if (TARGET_CDNA3_NOPS
+		   && (prev_insn->age + nops_rqd) < 2
+		   && iunit == UNIT_VECTOR
+		   && get_attr_vcmp (prev_insn->insn) == VCMP_VCMPX
+		   && TEST_HARD_REG_BIT (ireads, EXECZ_REG))
+	    nops_rqd = 2 - prev_insn->age;
+
+	  /* CDNA3: VALU writes VGPR followed by v_readlane vsrc0 reads VGPRn
+	     requires 1 wait state. */
+	  if (TARGET_CDNA3_NOPS
+	      && (prev_insn->age + nops_rqd) < 1
+	      && prev_insn->unit == UNIT_VECTOR
+	      && prev_insn->flatmemaccess != FLATMEMACCESS_LOAD
+	      && get_attr_laneselect (insn) == LANESELECT_READ
+	      && hard_reg_set_intersect_p
+		    (depregs, reg_class_contents[(int) VGPR_REGS]))
+	    nops_rqd = 1 - prev_insn->age;
+
+	  /* CDNA3: VALU op which uses OPSEL or SDWA with changes the result's
+	     bit position followed by VALU op consumes result of that op
+	     requires 1 wait state.
+	     FIXME: Handle OPSEL, once used.  */
+	  if (TARGET_CDNA3_NOPS
+	      && (prev_insn->age + nops_rqd) < 1
+	      && prev_insn->unit == UNIT_VECTOR
+	      && prev_insn->type == TYPE_VOP_SDWA
+	      && !hard_reg_set_empty_p (depregs))
+	    nops_rqd = 1 - prev_insn->age;
+
+	  /* CNDA3: VALU Trans Op (such as v_rcp_f64) followed by non-trans VALU
+	     op consumes result of that op requires 1 wait state.  */
+	  if (TARGET_CDNA3_NOPS
+	      && (prev_insn->age + nops_rqd) < 1
+	      && prev_insn->unit == UNIT_VECTOR
+	      && iunit == UNIT_VECTOR
+	      && get_attr_transop (prev_insn->insn) == TRANSOP_YES
+	      && get_attr_transop (insn) == TRANSOP_NO
+	      && !hard_reg_set_empty_p (depregs))
+	    nops_rqd = 1 - prev_insn->age;
+
 	  /* CDNA1: write VGPR before v_accvgpr_write reads it.  */
 	  if (TARGET_AVGPR_CDNA1_NOPS
 	      && (prev_insn->age + nops_rqd) < 2
@@ -6271,8 +6456,8 @@ gcn_md_reorg (void)
 	}
 
       /* Insert the required number of NOPs.  */
-      for (int i = nops_rqd; i > 0; i--)
-	emit_insn_after (gen_nop (), last_insn);
+      if (nops_rqd > 0)
+	emit_insn_after (gen_nops (GEN_INT (nops_rqd-1)), last_insn);
 
       /* Age the previous instructions.  We can also ignore writes to
          registers subsequently overwritten.  */
@@ -6295,7 +6480,9 @@ gcn_md_reorg (void)
       /* Track the current instruction as a previous instruction.  */
       back[oldest].insn = insn;
       back[oldest].unit = iunit;
-      back[oldest].delayeduse = idelayeduse;
+      back[oldest].type = itype;
+      back[oldest].flatmemaccess = iflatmemaccess;
+      back[oldest].delayeduse = delayeduse;
       back[oldest].writes = iwrites;
       back[oldest].reads = ireads;
       back[oldest].age = 0;
@@ -6587,8 +6774,8 @@ gcn_hsa_declare_function_name (FILE *file, const char *name,
   if (avgpr % vgpr_block_size)
     avgpr += vgpr_block_size - (avgpr % vgpr_block_size);
 
-  fputs ("\t.rodata\n"
-	 "\t.p2align\t6\n"
+  switch_to_section (readonly_data_section);
+  fputs ("\t.p2align\t6\n"
 	 "\t.amdhsa_kernel\t", file);
   assemble_name (file, name);
   fputs ("\n", file);
@@ -6707,7 +6894,7 @@ gcn_hsa_declare_function_name (FILE *file, const char *name,
   fputs ("        .end_amdgpu_metadata\n", file);
 #endif
 
-  fputs ("\t.text\n", file);
+  switch_to_section (current_function_section ());
   fputs ("\t.align\t256\n", file);
   fputs ("\t.type\t", file);
   assemble_name (file, name);
@@ -7108,11 +7295,19 @@ print_operand_address (FILE *file, rtx mem)
    E - print conditional code for v_cmp (eq_u64/ne_u64...)
    A - print address in formatting suitable for given address space.
    O - print offset:n for data share operations.
-   g - print "glc", if appropriate for given MEM
+   G - print "glc" (or for gfx94x: sc0) unconditionally [+ indep. of regnum]
+   g - print "glc" (or for gfx94x: sc0), if appropriate for given MEM
+       NOTE: Do not use 'G' or 'g with scalar memory access ('s_...') as those
+       require "glc" also with gfx94x.
    L - print low-part of a multi-reg value
    H - print second part of a multi-reg value (high-part of 2-reg value)
    J - print third part of a multi-reg value
    K - print fourth part of a multi-reg value
+   R   Print a scalar register number as an integer.  Temporary hack.
+   V - Print a vector register number as an integer.  Temporary hack.
+
+   Additionally, the standard builtin c, n, a, and l exist; see gccint's
+   "Output Templates and Operand Substitution" for details.
  */
 
 void
@@ -7724,10 +7919,13 @@ print_operand (FILE *file, rtx x, int code)
       else
 	output_addr_const (file, x);
       return;
+    case 'G':
+      fputs (TARGET_GLC_NAME, file);
+      return;
     case 'g':
       gcc_assert (xcode == MEM);
       if (MEM_VOLATILE_P (x))
-	fputs (" glc", file);
+	fputs (TARGET_GLC_NAME, file);
       return;
     default:
       output_operand_lossage ("invalid %%xn code");
@@ -7908,8 +8106,6 @@ gcn_dwarf_register_span (rtx rtl)
 #define TARGET_LEGITIMATE_CONSTANT_P gcn_legitimate_constant_p
 #undef  TARGET_LIBC_HAS_FUNCTION
 #define TARGET_LIBC_HAS_FUNCTION gcn_libc_has_function
-#undef  TARGET_LRA_P
-#define TARGET_LRA_P hook_bool_void_true
 #undef  TARGET_MACHINE_DEPENDENT_REORG
 #define TARGET_MACHINE_DEPENDENT_REORG gcn_md_reorg
 #undef  TARGET_MEMORY_MOVE_COST
@@ -7960,6 +8156,8 @@ gcn_dwarf_register_span (rtx rtl)
   gcn_vectorize_builtin_vectorized_function
 #undef  TARGET_VECTORIZE_GET_MASK_MODE
 #define TARGET_VECTORIZE_GET_MASK_MODE gcn_vectorize_get_mask_mode
+#undef  TARGET_VECTORIZE_PREFER_GATHER_SCATTER
+#define TARGET_VECTORIZE_PREFER_GATHER_SCATTER gcn_prefer_gather_scatter
 #undef  TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE gcn_vectorize_preferred_simd_mode
 #undef  TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT
diff --git a/gcc/config/gcn/gcn.h b/gcc/config/gcn/gcn.h
index 5198fbc..3d42de3 100644
--- a/gcc/config/gcn/gcn.h
+++ b/gcc/config/gcn/gcn.h
@@ -43,6 +43,8 @@ extern const struct gcn_device_def {
 	builtin_define ("__CDNA1__");                                          \
       else if (TARGET_CDNA2)                                                   \
 	builtin_define ("__CDNA2__");                                          \
+      else if (TARGET_CDNA3)                                                   \
+	builtin_define ("__CDNA3__");                                          \
       else if (TARGET_RDNA2)                                                   \
 	builtin_define ("__RDNA2__");                                          \
       else if (TARGET_RDNA3)                                                   \
diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md
index e0fb735..4130cf6 100644
--- a/gcc/config/gcn/gcn.md
+++ b/gcc/config/gcn/gcn.md
@@ -219,7 +219,7 @@
 ;	 flags: offen, idxen, glc, lds, slc, tfe
 ;
 ; flat - flat or global memory operations
-;	 flags: glc, slc
+;	 flags: {CDNA3: sc0, nt, sc1 | otherwise: glc, slc, scc}
 ;	 addr: vgpr0-255
 ;	 data: vgpr0-255
 ;	 vdst: vgpr0-255
@@ -312,18 +312,33 @@
 ; We need to be able to identify v_readlane and v_writelane with
 ; SGPR lane selection in order to handle "Manually Inserted Wait States".
 
-(define_attr "laneselect" "yes,no" (const_string "no"))
+(define_attr "laneselect" "write,read,no" (const_string "no"))
 
-; Identify instructions that require a "Manually Inserted Wait State" if
-; their inputs are overwritten by subsequent instructions.
+; Global or flat memory access using store or load followed by waitcnt
+; and using flat/global atomic access, possibly followed by a waitcnt.
+; 'storex34' denotes FLAT_STORE_X{3,4}.
+; 'cmpswapx2' denotes FLAT_ATOMIC_{F}CMPSWAP_X2
+; Used to handle "Manually Inserted Wait State".
 
-(define_attr "delayeduse" "yes,no" (const_string "no"))
+(define_attr "flatmemaccess"
+             "store,storex34,load,atomic,atomicwait,cmpswapx2,no"
+             (const_string "no"))
+
+; Identify v_cmp and v_cmpx instructions for "Manually Inserted Wait State"
+; handling.
+
+(define_attr "vcmp" "vcmp,vcmpx,no" (const_string "no"))
 
 ; Identify instructions that require "Manually Inserted Wait State" if
 ; a previous instruction writes to VCC.  The number gives the number of NOPs.
 
 (define_attr "vccwait" "" (const_int 0))
 
+; Mark trans ops such as v_{exp,rsq,sqrt,sin,cos,log,...}_F{16,32,64}
+; for later conditional s_nop insertion.
+
+(define_attr "transop" "yes,no" (const_string "no"))
+
 ;; }}}
 ;; {{{ Iterators useful across the wole machine description
 
@@ -414,6 +429,15 @@
   "s_nop\t0x0"
   [(set_attr "type" "sopp")])
 
+; Variant of 'nop' that accepts a count argument.
+; s_nop accepts 0x0 to 0xf for 1 to 16 nops; however,
+; as %0 prints decimals, only 0 to 9 (= 1 to 10 nops) can be used.
+(define_insn "nops"
+  [(match_operand 0 "const_int_operand")]
+  ""
+  "s_nop\t0x%0"
+  [(set_attr "type" "sopp")])
+
 ; FIXME: What should the value of the immediate be? Zero is disallowed, so
 ; pick 1 for now.
 (define_insn "trap"
@@ -555,9 +579,12 @@
   }
   [(set_attr "type" "sop1,vop1,vop3a,sopk,vopc,mult,smem,smem,smem,flat,flat,
 		     flat,flat,flat,flat")
+   (set_attr "flatmemaccess" "*,*,*,*,*,*,*,*,*,load,load,store,load,load,store")
+   (set_attr "vcmp" "*,*,*,*,vcmp,*,*,*,*,*,*,*,*,*,*")
    (set_attr "exec" "*,*,none,*,*,*,*,*,*,*,*,*,*,*,*")
    (set_attr "length" "4,4,4,4,4,8,12,12,12,12,12,12,12,12,12")
-   (set_attr "xnack" "*,*,*,*,*,*,off,on,*,off,on,*,off,on,*")])
+   (set_attr "xnack" "*,*,*,*,*,*,off,on,*,off,on,*,off,on,*")
+   (set_attr "laneselect" "*,*,read,*,*,*,*,*,*,*,*,*,*,*,*")])
 
 ; 32bit move pattern
 
@@ -565,38 +592,38 @@
   [(set (match_operand:SISF 0 "nonimmediate_operand")
 	(match_operand:SISF 1 "gcn_load_operand"))]
   ""
-  {@ [cons: =0, 1; attrs: type, exec, length, cdna, xnack]
-   [SD  ,SSA ;sop1 ,*   ,4 ,*    ,*  ] s_mov_b32\t%0, %1
-   [SD  ,J   ;sopk ,*   ,4 ,*    ,*  ] s_movk_i32\t%0, %1
-   [SD  ,B   ;sop1 ,*   ,8 ,*    ,*  ] s_mov_b32\t%0, %1
-   [SD  ,RB  ;smem ,*   ,12,*    ,off] s_buffer_load%s0\t%0, s[0:3], %1\;s_waitcnt\tlgkmcnt(0)
-   [&SD ,RB  ;smem ,*   ,12,*    ,on ] ^
-   [RB  ,Sm  ;smem ,*   ,12,*    ,*  ] s_buffer_store%s1\t%1, s[0:3], %0
-   [Sm  ,RS  ;smem ,*   ,12,*    ,off] s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
-   [&Sm ,RS  ;smem ,*   ,12,*    ,on ] ^
-   [RS  ,Sm  ;smem ,*   ,12,*    ,*  ] s_store_dword\t%1, %A0
-   [v   ,v   ;vop1 ,*   ,4 ,*    ,*  ] v_mov_b32\t%0, %1
-   [Sg  ,v   ;vop3a,none,8 ,*    ,*  ] v_readlane_b32\t%0, %1, 0
-   [v   ,Sv  ;vop3a,none,8 ,*    ,*  ] v_writelane_b32\t%0, %1, 0
-   [v   ,^a  ;vop3p_mai,*,8,*    ,*  ] v_accvgpr_read_b32\t%0, %1
-   [a   ,v   ;vop3p_mai,*,8,*    ,*  ] v_accvgpr_write_b32\t%0, %1
-   [a   ,a   ;vop1 ,*    ,4,cdna2,*  ] v_accvgpr_mov_b32\t%0, %1
-   [v   ,RF  ;flat ,*   ,12,*    ,off] flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0
-   [&v  ,RF  ;flat ,*   ,12,*    ,on ] ^
-   [^a  ,RF  ;flat ,*   ,12,cdna2,off] ^
-   [&^a ,RF  ;flat ,*   ,12,cdna2,on ] ^
-   [RF  ,v   ;flat ,*   ,12,*    ,*  ] flat_store_dword\t%A0, %1%O0%g0
-   [RF  ,a   ;flat ,*   ,12,cdna2,*  ] ^
-   [v   ,B   ;vop1 ,*   ,8 ,*    ,*  ] v_mov_b32\t%0, %1
-   [RLRG,v   ;ds   ,*   ,12,*    ,*  ] ds_write_b32\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
-   [v   ,RLRG;ds   ,*   ,12,*    ,*  ] ds_read_b32\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
-   [SD  ,Y   ;sop1 ,*   ,8 ,*    ,*  ] s_mov_b32\t%0, %1
-   [v   ,RM  ;flat ,*   ,12,*    ,off] global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
-   [&v  ,RM  ;flat ,*   ,12,*    ,on ] ^
-   [^a  ,RM  ;flat ,*   ,12,cdna2,off] ^
-   [&^a ,RM  ;flat ,*   ,12,cdna2,on ] ^
-   [RM  ,v   ;flat ,*   ,12,*    ,*  ] global_store_dword\t%A0, %1%O0%g0
-   [RM  ,a   ;flat ,*   ,12,cdna2,*  ] ^
+  {@ [cons: =0, 1; attrs: type, exec, length, cdna, xnack, laneselect, flatmemaccess]
+   [SD  ,SSA ;sop1 ,*   ,4 ,*    ,*  ,*    ,*    ] s_mov_b32\t%0, %1
+   [SD  ,J   ;sopk ,*   ,4 ,*    ,*  ,*    ,*    ] s_movk_i32\t%0, %1
+   [SD  ,B   ;sop1 ,*   ,8 ,*    ,*  ,*    ,*    ] s_mov_b32\t%0, %1
+   [SD  ,RB  ;smem ,*   ,12,*    ,off,*    ,*    ] s_buffer_load%s0\t%0, s[0:3], %1\;s_waitcnt\tlgkmcnt(0)
+   [&SD ,RB  ;smem ,*   ,12,*    ,on ,*    ,*    ] ^
+   [RB  ,Sm  ;smem ,*   ,12,*    ,*  ,*    ,*    ] s_buffer_store%s1\t%1, s[0:3], %0
+   [Sm  ,RS  ;smem ,*   ,12,*    ,off,*    ,*    ] s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
+   [&Sm ,RS  ;smem ,*   ,12,*    ,on ,*    ,*    ] ^
+   [RS  ,Sm  ;smem ,*   ,12,*    ,*  ,*    ,*    ] s_store_dword\t%1, %A0
+   [v   ,v   ;vop1 ,*   ,4 ,*    ,*  ,*    ,*    ] v_mov_b32\t%0, %1
+   [Sg  ,v   ;vop3a,none,8 ,*    ,*  ,read ,*    ] v_readlane_b32\t%0, %1, 0
+   [v   ,Sv  ;vop3a,none,8 ,*    ,*  ,write,*    ] v_writelane_b32\t%0, %1, 0
+   [v   ,^a  ;vop3p_mai,*,8,*    ,*  ,*    ,*    ] v_accvgpr_read_b32\t%0, %1
+   [a   ,v   ;vop3p_mai,*,8,*    ,*  ,*    ,*    ] v_accvgpr_write_b32\t%0, %1
+   [a   ,a   ;vop1 ,*    ,4,cdna2,*  ,*    ,*    ] v_accvgpr_mov_b32\t%0, %1
+   [v   ,RF  ;flat ,*   ,12,*    ,off,*    ,load ] flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0
+   [&v  ,RF  ;flat ,*   ,12,*    ,on ,*    ,load ] ^
+   [^a  ,RF  ;flat ,*   ,12,cdna2,off,*    ,load ] ^
+   [&^a ,RF  ;flat ,*   ,12,cdna2,on ,*    ,load ] ^
+   [RF  ,v   ;flat ,*   ,12,*    ,*  ,*    ,store] flat_store_dword\t%A0, %1%O0%g0
+   [RF  ,a   ;flat ,*   ,12,cdna2,*  ,*    ,store] ^
+   [v   ,B   ;vop1 ,*   ,8 ,*    ,*  ,*    ,*    ] v_mov_b32\t%0, %1
+   [RLRG,v   ;ds   ,*   ,12,*    ,*  ,*    ,*    ] ds_write_b32\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
+   [v   ,RLRG;ds   ,*   ,12,*    ,*  ,*    ,*    ] ds_read_b32\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
+   [SD  ,Y   ;sop1 ,*   ,8 ,*    ,*  ,*    ,*    ] s_mov_b32\t%0, %1
+   [v   ,RM  ;flat ,*   ,12,*    ,off,*    ,load ] global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
+   [&v  ,RM  ;flat ,*   ,12,*    ,on ,*    ,load ] ^
+   [^a  ,RM  ;flat ,*   ,12,cdna2,off,*    ,load ] ^
+   [&^a ,RM  ;flat ,*   ,12,cdna2,on ,*    ,load ] ^
+   [RM  ,v   ;flat ,*   ,12,*    ,*  ,*    ,store] global_store_dword\t%A0, %1%O0%g0
+   [RM  ,a   ;flat ,*   ,12,cdna2,*  ,*    ,store] ^
   })
 
 ; 8/16bit move pattern
@@ -606,31 +633,31 @@
   [(set (match_operand:QIHI 0 "nonimmediate_operand")
 	(match_operand:QIHI 1 "gcn_load_operand"))]
   "gcn_valid_move_p (<MODE>mode, operands[0], operands[1])"
-  {@ [cons: =0, 1; attrs: type, exec, length, cdna, xnack]
-  [SD  ,SSA ;sop1 ,*   ,4 ,*    ,*  ] s_mov_b32\t%0, %1
-  [SD  ,J   ;sopk ,*   ,4 ,*    ,*  ] s_movk_i32\t%0, %1
-  [SD  ,B   ;sop1 ,*   ,8 ,*    ,*  ] s_mov_b32\t%0, %1
-  [v   ,v   ;vop1 ,*   ,4 ,*    ,*  ] v_mov_b32\t%0, %1
-  [Sg  ,v   ;vop3a,none,4 ,*    ,*  ] v_readlane_b32\t%0, %1, 0
-  [v   ,Sv  ;vop3a,none,4 ,*    ,*  ] v_writelane_b32\t%0, %1, 0
-  [v   ,^a  ;vop3p_mai,*,8,*    ,*  ] v_accvgpr_read_b32\t%0, %1
-  [a   ,v   ;vop3p_mai,*,8,*    ,*  ] v_accvgpr_write_b32\t%0, %1
-  [a   ,a   ;vop1 ,*    ,8,cdna2,*  ] v_accvgpr_mov_b32\t%0, %1
-  [v   ,RF  ;flat ,*   ,12,*    ,off] flat_load%o1\t%0, %A1%O1%g1\;s_waitcnt\t0
-  [&v  ,RF  ;flat ,*   ,12,*    ,on ] ^
-  [^a  ,RF  ;flat ,*   ,12,cdna2,off] ^
-  [&^a ,RF  ;flat ,*   ,12,cdna2,on ] ^
-  [RF  ,v   ;flat ,*   ,12,*    ,*  ] flat_store%s0\t%A0, %1%O0%g0
-  [RF  ,a   ;flat ,*   ,12,cdna2,*  ] ^
-  [v   ,B   ;vop1 ,*   ,8 ,*    ,*  ] v_mov_b32\t%0, %1
-  [RLRG,v   ;ds   ,*   ,12,*    ,*  ] ds_write%b0\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
-  [v   ,RLRG;ds   ,*   ,12,*    ,*  ] ds_read%u1\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
-  [v   ,RM  ;flat ,*   ,12,*    ,off] global_load%o1\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
-  [&v  ,RM  ;flat ,*   ,12,*    ,on ] ^
-  [^a  ,RM  ;flat ,*   ,12,cdna2,off] ^
-  [&^a ,RM  ;flat ,*   ,12,cdna2,on ] ^
-  [RM  ,v   ;flat ,*   ,12,*    ,*  ] global_store%s0\t%A0, %1%O0%g0
-  [RM  ,a   ;flat ,*   ,12,cdna2,*  ] ^
+  {@ [cons: =0, 1; attrs: type, exec, length, cdna, xnack, laneselect, flatmemaccess]
+  [SD  ,SSA ;sop1 ,*   ,4 ,*    ,*  ,*    ,*    ] s_mov_b32\t%0, %1
+  [SD  ,J   ;sopk ,*   ,4 ,*    ,*  ,*    ,*    ] s_movk_i32\t%0, %1
+  [SD  ,B   ;sop1 ,*   ,8 ,*    ,*  ,*    ,*    ] s_mov_b32\t%0, %1
+  [v   ,v   ;vop1 ,*   ,4 ,*    ,*  ,*    ,*    ] v_mov_b32\t%0, %1
+  [Sg  ,v   ;vop3a,none,4 ,*    ,*  ,read ,*    ] v_readlane_b32\t%0, %1, 0
+  [v   ,Sv  ;vop3a,none,4 ,*    ,*  ,write,*    ] v_writelane_b32\t%0, %1, 0
+  [v   ,^a  ;vop3p_mai,*,8,*    ,*  ,*    ,*    ] v_accvgpr_read_b32\t%0, %1
+  [a   ,v   ;vop3p_mai,*,8,*    ,*  ,*    ,*    ] v_accvgpr_write_b32\t%0, %1
+  [a   ,a   ;vop1 ,*    ,8,cdna2,*  ,*    ,*    ] v_accvgpr_mov_b32\t%0, %1
+  [v   ,RF  ;flat ,*   ,12,*    ,off,*    ,load ] flat_load%o1\t%0, %A1%O1%g1\;s_waitcnt\t0
+  [&v  ,RF  ;flat ,*   ,12,*    ,on ,*    ,load ] ^
+  [^a  ,RF  ;flat ,*   ,12,cdna2,off,*    ,load ] ^
+  [&^a ,RF  ;flat ,*   ,12,cdna2,on ,*    ,load ] ^
+  [RF  ,v   ;flat ,*   ,12,*    ,*  ,*    ,store] flat_store%s0\t%A0, %1%O0%g0
+  [RF  ,a   ;flat ,*   ,12,cdna2,*  ,*    ,store] ^
+  [v   ,B   ;vop1 ,*   ,8 ,*    ,*  ,*    ,*    ] v_mov_b32\t%0, %1
+  [RLRG,v   ;ds   ,*   ,12,*    ,*  ,*    ,*    ] ds_write%b0\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
+  [v   ,RLRG;ds   ,*   ,12,*    ,*  ,*    ,*    ] ds_read%u1\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
+  [v   ,RM  ;flat ,*   ,12,*    ,off,*    ,load ] global_load%o1\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
+  [&v  ,RM  ;flat ,*   ,12,*    ,on ,*    ,load ] ^
+  [^a  ,RM  ;flat ,*   ,12,cdna2,off,*    ,load ] ^
+  [&^a ,RM  ;flat ,*   ,12,cdna2,on ,*    ,load ] ^
+  [RM  ,v   ;flat ,*   ,12,*    ,*  ,*    ,store] global_store%s0\t%A0, %1%O0%g0
+  [RM  ,a   ;flat ,*   ,12,cdna2,*  ,*    ,store] ^
   })
 
 ; 64bit move pattern
@@ -639,34 +666,34 @@
   [(set (match_operand:DIDF 0 "nonimmediate_operand")
 	(match_operand:DIDF 1 "general_operand"))]
   "GET_CODE(operands[1]) != SYMBOL_REF"
-  {@ [cons: =0, 1; attrs: type, length, cdna, xnack]
-  [SD  ,SSA ;sop1 ,4 ,*    ,*  ] s_mov_b64\t%0, %1
-  [SD  ,C   ;sop1 ,8 ,*    ,*  ] ^
-  [SD  ,DB  ;mult ,* ,*    ,*  ] #
-  [RS  ,Sm  ;smem ,12,*    ,*  ] s_store_dwordx2\t%1, %A0
-  [Sm  ,RS  ;smem ,12,*    ,off] s_load_dwordx2\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
-  [&Sm ,RS  ;smem ,12,*    ,on ] ^
-  [v   ,v   ;vmult,* ,*    ,*  ] #
-  [v   ,DB  ;vmult,* ,*    ,*  ] #
-  [Sg  ,v   ;vmult,* ,*    ,*  ] #
-  [v   ,Sv  ;vmult,* ,*    ,*  ] #
-  [v   ,^a  ;vmult,* ,*    ,*  ] #
-  [a   ,v   ;vmult,* ,*    ,*  ] #
-  [a   ,a   ;vmult,* ,cdna2,*  ] #
-  [v   ,RF  ;flat ,12,*    ,off] flat_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\t0
-  [&v  ,RF  ;flat ,12,*    ,on ] ^
-  [^a  ,RF  ;flat ,12,cdna2,off] ^
-  [&^a ,RF  ;flat ,12,cdna2,on ] ^
-  [RF  ,v   ;flat ,12,*    ,*  ] flat_store_dwordx2\t%A0, %1%O0%g0
-  [RF  ,a   ;flat ,12,cdna2,*  ] ^
-  [RLRG,v   ;ds   ,12,*    ,*  ] ds_write_b64\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
-  [v   ,RLRG;ds   ,12,*    ,*  ] ds_read_b64\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
-  [v   ,RM  ;flat ,12,*    ,off] global_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
-  [&v  ,RM  ;flat ,12,*    ,on ] ^
-  [^a  ,RM  ;flat ,12,cdna2,off] ^
-  [&^a ,RM  ;flat ,12,cdna2,on ] ^
-  [RM  ,v   ;flat ,12,*    ,*  ] global_store_dwordx2\t%A0, %1%O0%g0
-  [RM  ,a   ;flat ,12,cdna2,*  ] ^
+  {@ [cons: =0, 1; attrs: type, length, cdna, xnack, flatmemaccess]
+  [SD  ,SSA ;sop1 ,4 ,*    ,*  ,*    ] s_mov_b64\t%0, %1
+  [SD  ,C   ;sop1 ,8 ,*    ,*  ,*    ] ^
+  [SD  ,DB  ;mult ,* ,*    ,*  ,*    ] #
+  [RS  ,Sm  ;smem ,12,*    ,*  ,*    ] s_store_dwordx2\t%1, %A0
+  [Sm  ,RS  ;smem ,12,*    ,off,*    ] s_load_dwordx2\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
+  [&Sm ,RS  ;smem ,12,*    ,on ,*    ] ^
+  [v   ,v   ;vmult,* ,*    ,*  ,*    ] #
+  [v   ,DB  ;vmult,* ,*    ,*  ,*    ] #
+  [Sg  ,v   ;vmult,* ,*    ,*  ,*    ] #
+  [v   ,Sv  ;vmult,* ,*    ,*  ,*    ] #
+  [v   ,^a  ;vmult,* ,*    ,*  ,*    ] #
+  [a   ,v   ;vmult,* ,*    ,*  ,*    ] #
+  [a   ,a   ;vmult,* ,cdna2,*  ,*    ] #
+  [v   ,RF  ;flat ,12,*    ,off,load ] flat_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\t0
+  [&v  ,RF  ;flat ,12,*    ,on ,load ] ^
+  [^a  ,RF  ;flat ,12,cdna2,off,load ] ^
+  [&^a ,RF  ;flat ,12,cdna2,on ,load ] ^
+  [RF  ,v   ;flat ,12,*    ,*  ,store] flat_store_dwordx2\t%A0, %1%O0%g0
+  [RF  ,a   ;flat ,12,cdna2,*  ,store] ^
+  [RLRG,v   ;ds   ,12,*    ,*  ,*    ] ds_write_b64\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
+  [v   ,RLRG;ds   ,12,*    ,*  ,*    ] ds_read_b64\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
+  [v   ,RM  ;flat ,12,*    ,off,load ] global_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
+  [&v  ,RM  ;flat ,12,*    ,on ,load ] ^
+  [^a  ,RM  ;flat ,12,cdna2,off,load ] ^
+  [&^a ,RM  ;flat ,12,cdna2,on ,load ] ^
+  [RM  ,v   ;flat ,12,*    ,*  ,store] global_store_dwordx2\t%A0, %1%O0%g0
+  [RM  ,a   ;flat ,12,cdna2,*  ,store] ^
   }
   "reload_completed
    && ((!MEM_P (operands[0]) && !MEM_P (operands[1])
@@ -704,31 +731,31 @@
   [(set (match_operand:TI 0 "nonimmediate_operand")
 	(match_operand:TI 1 "general_operand"  ))]
   ""
-  {@ [cons: =0, 1; attrs: type, delayeduse, length, cdna, xnack]
-  [SD ,SSB;mult ,*  ,* ,*    ,*  ] #
-  [RS ,Sm ;smem ,*  ,12,*    ,*  ] s_store_dwordx4\t%1, %A0
-  [Sm ,RS ;smem ,yes,12,*    ,off] s_load_dwordx4\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
-  [&Sm,RS ;smem ,yes,12,*    ,on ] ^
-  [RF ,v  ;flat ,*  ,12,*    ,*  ] flat_store_dwordx4\t%A0, %1%O0%g0
-  [RF ,a  ;flat ,*  ,12,cdna2,*  ] ^
-  [v  ,RF ;flat ,*  ,12,*    ,off] flat_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\t0
-  [&v ,RF ;flat ,*  ,12,*    ,on ] ^
-  [^a ,RF ;flat ,*  ,12,cdna2,off] ^
-  [&^a,RF ;flat ,*  ,12,cdna2,on ] ^
-  [v  ,v  ;vmult,*  ,* ,*    ,*  ] #
-  [v  ,Sv ;vmult,*  ,* ,*    ,*  ] #
-  [SD ,v  ;vmult,*  ,* ,*    ,*  ] #
-  [RM ,v  ;flat ,yes,12,*    ,*  ] global_store_dwordx4\t%A0, %1%O0%g0
-  [RM ,a  ;flat ,yes,12,cdna2,*  ] ^
-  [v  ,RM ;flat ,*  ,12,*    ,off] global_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
-  [&v ,RM ;flat ,*  ,12,*    ,on ] ^
-  [^a ,RM ;flat ,*  ,12,cdna2,off] ^
-  [&^a,RM ;flat ,*  ,12,cdna2,on ] ^
-  [RL ,v  ;ds   ,*  ,12,*    ,*  ] ds_write_b128\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
-  [v  ,RL ;ds   ,*  ,12,*    ,*  ] ds_read_b128\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
-  [v  ,^a ;vmult,*  ,* ,*    ,*  ] #
-  [a  ,v  ;vmult,*  ,* ,*    ,*  ] #
-  [a  ,a  ;vmult,*  ,* ,cdna2,*  ] #
+  {@ [cons: =0, 1; attrs: type, length, cdna, xnack, flatmemaccess]
+  [SD ,SSB;mult ,* ,*    ,*  ,*       ] #
+  [RS ,Sm ;smem ,12,*    ,*  ,*       ] s_store_dwordx4\t%1, %A0
+  [Sm ,RS ;smem ,12,*    ,off,*       ] s_load_dwordx4\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
+  [&Sm,RS ;smem ,12,*    ,on ,*       ] ^
+  [RF ,v  ;flat ,12,*    ,*  ,storex34] flat_store_dwordx4\t%A0, %1%O0%g0
+  [RF ,a  ;flat ,12,cdna2,*  ,storex34] ^
+  [v  ,RF ;flat ,12,*    ,off,load    ] flat_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\t0
+  [&v ,RF ;flat ,12,*    ,on ,load    ] ^
+  [^a ,RF ;flat ,12,cdna2,off,load    ] ^
+  [&^a,RF ;flat ,12,cdna2,on ,load    ] ^
+  [v  ,v  ;vmult,* ,*    ,*  ,*       ] #
+  [v  ,Sv ;vmult,* ,*    ,*  ,*       ] #
+  [SD ,v  ;vmult,* ,*    ,*  ,*       ] #
+  [RM ,v  ;flat ,12,*    ,*  ,storex34] global_store_dwordx4\t%A0, %1%O0%g0
+  [RM ,a  ;flat ,12,cdna2,*  ,storex34] ^
+  [v  ,RM ;flat ,12,*    ,off,load    ] global_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
+  [&v ,RM ;flat ,12,*    ,on ,load    ] ^
+  [^a ,RM ;flat ,12,cdna2,off,load    ] ^
+  [&^a,RM ;flat ,12,cdna2,on ,load    ] ^
+  [RL ,v  ;ds   ,12,*    ,*  ,*       ] ds_write_b128\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
+  [v  ,RL ;ds   ,12,*    ,*  ,*       ] ds_read_b128\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
+  [v  ,^a ;vmult,* ,*    ,*  ,*       ] #
+  [a  ,v  ;vmult,* ,*    ,*  ,*       ] #
+  [a  ,a  ;vmult,* ,cdna2,*  ,*       ] #
   }
   "reload_completed
    && REG_P (operands[0])
@@ -1077,6 +1104,7 @@
    s_cmp%D1\t%2, %3
    v_cmp%E1\tvcc, %2, %3"
   [(set_attr "type" "sopc,vopc")
+   (set_attr "vcmp" "vcmp")
    (set_attr "length" "8")])
 
 (define_insn "cstoredi4_vector"
@@ -1087,6 +1115,7 @@
   ""
   "v_cmp%E1\tvcc, %2, %3"
   [(set_attr "type" "vopc")
+   (set_attr "vcmp" "vcmp")
    (set_attr "length" "8")])
 
 (define_expand "cbranchdi4"
@@ -1113,6 +1142,7 @@
   ""
   "v_cmp%E1\tvcc, %2, %3"
   [(set_attr "type" "vopc")
+   (set_attr "vcmp" "vcmp")
    (set_attr "length" "8")])
 
 (define_expand "cbranch<mode>4"
@@ -1136,14 +1166,13 @@
   [(set (match_operand:SI 0 "register_operand"         "= Sg, Sg, Sg,   v")
         (plus:SI (match_operand:SI 1 "gcn_alu_operand" "%SgA,  0,SgA,   v")
 		 (match_operand:SI 2 "gcn_alu_operand" " SgA,SgJ,  B,vBSv")))
-   (clobber (match_scratch:BI 3			       "= cs, cs, cs,   X"))
-   (clobber (match_scratch:DI 4			       "=  X,  X,  X,  cV"))]
+   (clobber (match_scratch:BI 3			       "= cs, cs, cs,   X"))]
   ""
   "@
    s_add_i32\t%0, %1, %2
    s_addk_i32\t%0, %2
    s_add_i32\t%0, %1, %2
-   v_add_co_u32\t%0, vcc, %2, %1"
+   {v_add_u32|v_add_nc_u32}\t%0, %2, %1"
   [(set_attr "type" "sop2,sopk,sop2,vop2")
    (set_attr "length" "4,4,8,8")])
 
@@ -1151,8 +1180,7 @@
   [(parallel [(set (match_operand:SI 0 "register_operand")
 		   (plus:SI (match_operand:SI 1 "gcn_alu_operand")
 			    (match_operand:SI 2 "gcn_alu_operand")))
-	      (clobber (reg:BI SCC_REG))
-	      (clobber (scratch:DI))])]
+	      (clobber (reg:BI SCC_REG))])]
   ""
   {})
 
@@ -1332,14 +1360,13 @@
   [(set (match_operand:SI 0 "register_operand"          "=Sg, Sg,    v,   v")
 	(minus:SI (match_operand:SI 1 "gcn_alu_operand" "SgA,SgA,    v,vBSv")
 		  (match_operand:SI 2 "gcn_alu_operand" "SgA,  B, vBSv,   v")))
-   (clobber (match_scratch:BI 3				"=cs, cs,    X,   X"))
-   (clobber (match_scratch:DI 4				"= X,  X,   cV,  cV"))]
+   (clobber (match_scratch:BI 3				"=cs, cs,    X,   X"))]
   ""
   "@
    s_sub_i32\t%0, %1, %2
    s_sub_i32\t%0, %1, %2
-   v_subrev_co_u32\t%0, vcc, %2, %1
-   v_sub_co_u32\t%0, vcc, %1, %2"
+   {v_subrev_u32|v_subrev_nc_u32}\t%0, %2, %1
+   {v_sub_u32|v_sub_nc_u32}\t%0, %1, %2"
   [(set_attr "type" "sop2,sop2,vop2,vop2")
    (set_attr "length" "4,8,8,8")])
 
@@ -1569,8 +1596,7 @@
 	(mult:DI (match_operand:DI 1 "register_operand" "%Sg, Sg,  v, v")
 		 (match_operand:DI 2 "nonmemory_operand" "Sg,  i,vSv, A")))
    (clobber (match_scratch:SI 3 "=&Sg,&Sg,&v,&v"))
-   (clobber (match_scratch:BI 4  "=cs, cs, X, X"))
-   (clobber (match_scratch:DI 5   "=X,  X,cV,cV"))]
+   (clobber (match_scratch:BI 4  "=cs, cs, X, X"))]
   ""
   "#"
   "reload_completed"
@@ -1585,15 +1611,13 @@
     emit_insn (gen_umulsidi3 (operands[0], op1lo, op2lo));
     emit_insn (gen_mulsi3 (tmp, op1lo, op2hi));
     rtx add = gen_rtx_SET (dsthi, gen_rtx_PLUS (SImode, dsthi, tmp));
-    rtx clob1 = gen_rtx_CLOBBER (VOIDmode, operands[4]);
-    rtx clob2 = gen_rtx_CLOBBER (VOIDmode, operands[5]);
-    add = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, add, clob1, clob2));
+    rtx clob = gen_rtx_CLOBBER (VOIDmode, operands[4]);
+    add = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, add, clob));
     emit_insn (add);
     emit_insn (gen_mulsi3 (tmp, op1hi, op2lo));
     add = gen_rtx_SET (dsthi, gen_rtx_PLUS (SImode, dsthi, tmp));
-    clob1 = gen_rtx_CLOBBER (VOIDmode, operands[4]);
-    clob2 = gen_rtx_CLOBBER (VOIDmode, operands[5]);
-    add = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, add, clob1, clob2));
+    clob = gen_rtx_CLOBBER (VOIDmode, operands[4]);
+    add = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, add, clob));
     emit_insn (add);
     DONE;
   })
@@ -1964,6 +1988,14 @@
   [(set_attr "type" "mult")
    (set_attr "length" "8")])
 
+(define_insn "*memory_barrier"
+  [(set (match_operand:BLK 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))]
+  "TARGET_TARGET_SC_CACHE"
+  "buffer_inv sc1"
+  [(set_attr "type" "mubuf")
+   (set_attr "length" "4")])
+
 ; FIXME: These patterns have been disabled as they do not seem to work
 ; reliably - they can cause hangs or incorrect results.
 ; TODO: flush caches according to memory model
@@ -1980,9 +2012,10 @@
   "0 /* Disabled.  */"
   "@
    s_atomic_<bare_mnemonic><X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)
-   flat_atomic_<bare_mnemonic><X>\t%0, %1, %2 glc\;s_waitcnt\t0
-   global_atomic_<bare_mnemonic><X>\t%0, %A1, %2%O1 glc\;s_waitcnt\tvmcnt(0)"
+   flat_atomic_<bare_mnemonic><X>\t%0, %1, %2 %G2\;s_waitcnt\t0
+   global_atomic_<bare_mnemonic><X>\t%0, %A1, %2%O1 %G2\;s_waitcnt\tvmcnt(0)"
   [(set_attr "type" "smem,flat,flat")
+   (set_attr "flatmemaccess" "*,atomicwait,atomicwait")
    (set_attr "length" "12")])
 
 ; FIXME: These patterns are disabled because the instructions don't
@@ -2004,6 +2037,7 @@
    flat_atomic_<bare_mnemonic><X>\t%0, %1\;s_waitcnt\t0
    global_atomic_<bare_mnemonic><X>\t%A0, %1%O0\;s_waitcnt\tvmcnt(0)"
   [(set_attr "type" "smem,flat,flat")
+   (set_attr "flatmemaccess" "*,atomicwait,atomicwait")
    (set_attr "length" "12")])
 
 (define_mode_attr x2 [(SI "DI") (DI "TI")])
@@ -2047,11 +2081,11 @@
   ""
   "@
    s_atomic_cmpswap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)
-   flat_atomic_cmpswap<X>\t%0, %1, %2 glc\;s_waitcnt\t0
-   global_atomic_cmpswap<X>\t%0, %A1, %2%O1 glc\;s_waitcnt\tvmcnt(0)"
+   flat_atomic_cmpswap<X>\t%0, %1, %2 %G2\;s_waitcnt\t0
+   global_atomic_cmpswap<X>\t%0, %A1, %2%O1 %G2\;s_waitcnt\tvmcnt(0)"
   [(set_attr "type" "smem,flat,flat")
    (set_attr "length" "12")
-   (set_attr "delayeduse" "*,yes,yes")])
+   (set_attr "flatmemaccess" "*,cmpswapx2,cmpswapx2")])
 
 (define_insn "sync_compare_and_swap<mode>_lds_insn"
   [(set (match_operand:SIDI 0 "register_operand"    "= v")
@@ -2091,12 +2125,12 @@
 	    return "s_load%o0\t%0, %A1 glc\;s_waitcnt\tlgkmcnt(0)";
 	  case 1:
 	    return (TARGET_RDNA2 /* Not GFX11.  */
-		    ? "flat_load%o0\t%0, %A1%O1 glc dlc\;s_waitcnt\t0"
-		    : "flat_load%o0\t%0, %A1%O1 glc\;s_waitcnt\t0");
+		    ? "flat_load%o0\t%0, %A1%O1 %G1 dlc\;s_waitcnt\t0"
+		    : "flat_load%o0\t%0, %A1%O1 %G1\;s_waitcnt\t0");
 	  case 2:
 	    return (TARGET_RDNA2 /* Not GFX11.  */
-		    ? "global_load%o0\t%0, %A1%O1 glc dlc\;s_waitcnt\tvmcnt(0)"
-		    : "global_load%o0\t%0, %A1%O1 glc\;s_waitcnt\tvmcnt(0)");
+		    ? "global_load%o0\t%0, %A1%O1 %G1 dlc\;s_waitcnt\tvmcnt(0)"
+		    : "global_load%o0\t%0, %A1%O1 %G1\;s_waitcnt\tvmcnt(0)");
 	  }
 	break;
       case MEMMODEL_CONSUME:
@@ -2109,21 +2143,27 @@
 		   "s_dcache_wb_vol";
 	  case 1:
 	    return (TARGET_RDNA2
-		    ? "flat_load%o0\t%0, %A1%O1 glc dlc\;s_waitcnt\t0\;"
+		    ? "flat_load%o0\t%0, %A1%O1 %G1 dlc\;s_waitcnt\t0\;"
 		      "buffer_gl1_inv\;buffer_gl0_inv"
 		    : TARGET_RDNA3
-		    ? "flat_load%o0\t%0, %A1%O1 glc\;s_waitcnt\t0\;"
+		    ? "flat_load%o0\t%0, %A1%O1 %G1\;s_waitcnt\t0\;"
 		      "buffer_gl1_inv\;buffer_gl0_inv"
-		    : "flat_load%o0\t%0, %A1%O1 glc\;s_waitcnt\t0\;"
+		    : TARGET_TARGET_SC_CACHE
+		    ? "flat_load%o0\t%0, %A1%O1 %G1\;s_waitcnt\t0\;"
+		      "buffer_inv sc1"
+		    : "flat_load%o0\t%0, %A1%O1 %G1\;s_waitcnt\t0\;"
 		      "buffer_wbinvl1_vol");
 	  case 2:
 	    return (TARGET_RDNA2
-		    ? "global_load%o0\t%0, %A1%O1 glc dlc\;s_waitcnt\tvmcnt(0)\;"
+		    ? "global_load%o0\t%0, %A1%O1 %G1 dlc\;s_waitcnt\tvmcnt(0)\;"
 		      "buffer_gl1_inv\;buffer_gl0_inv"
 		    : TARGET_RDNA3
-		    ? "global_load%o0\t%0, %A1%O1 glc\;s_waitcnt\tvmcnt(0)\;"
+		    ? "global_load%o0\t%0, %A1%O1 %G1\;s_waitcnt\tvmcnt(0)\;"
 		      "buffer_gl1_inv\;buffer_gl0_inv"
-		    : "global_load%o0\t%0, %A1%O1 glc\;s_waitcnt\tvmcnt(0)\;"
+		    : TARGET_TARGET_SC_CACHE
+		    ? "global_load%o0\t%0, %A1%O1 %G1\;s_waitcnt\tvmcnt(0)\;"
+		      "buffer_inv sc1"
+		    : "global_load%o0\t%0, %A1%O1 %G1\;s_waitcnt\tvmcnt(0)\;"
 		      "buffer_wbinvl1_vol");
 	  }
 	break;
@@ -2137,21 +2177,27 @@
 		   "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol";
 	  case 1:
 	    return (TARGET_RDNA2
-		    ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_load%o0\t%0, %A1%O1 glc dlc\;"
+		    ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_load%o0\t%0, %A1%O1 %G1 dlc\;"
 		      "s_waitcnt\t0\;buffer_gl1_inv\;buffer_gl0_inv"
 		    : TARGET_RDNA3
-		    ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_load%o0\t%0, %A1%O1 glc\;"
+		    ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_load%o0\t%0, %A1%O1 %G1\;"
 		      "s_waitcnt\t0\;buffer_gl1_inv\;buffer_gl0_inv"
-		    : "buffer_wbinvl1_vol\;flat_load%o0\t%0, %A1%O1 glc\;"
+		    : TARGET_TARGET_SC_CACHE
+		    ? "buffer_wbl2\tsc0\;s_waitcnt\t0\;flat_load%o0\t%0, %A1%O1 %G1\;"
+		      "s_waitcnt\t0\;buffer_inv sc1"
+		    : "buffer_wbinvl1_vol\;flat_load%o0\t%0, %A1%O1 %G1\;"
 		      "s_waitcnt\t0\;buffer_wbinvl1_vol");
 	  case 2:
 	    return (TARGET_RDNA2
-		    ? "buffer_gl1_inv\;buffer_gl0_inv\;global_load%o0\t%0, %A1%O1 glc dlc\;"
+		    ? "buffer_gl1_inv\;buffer_gl0_inv\;global_load%o0\t%0, %A1%O1 %G1 dlc\;"
 		      "s_waitcnt\tvmcnt(0)\;buffer_gl1_inv\;buffer_gl0_inv"
 		    : TARGET_RDNA3
-		    ? "buffer_gl1_inv\;buffer_gl0_inv\;global_load%o0\t%0, %A1%O1 glc\;"
+		    ? "buffer_gl1_inv\;buffer_gl0_inv\;global_load%o0\t%0, %A1%O1 %G1\;"
 		      "s_waitcnt\tvmcnt(0)\;buffer_gl1_inv\;buffer_gl0_inv"
-		    : "buffer_wbinvl1_vol\;global_load%o0\t%0, %A1%O1 glc\;"
+		    : TARGET_TARGET_SC_CACHE
+		    ? "buffer_wbl2\tsc0\;s_waitcnt\tvmcnt(0)\;global_load%o0\t%0, %A1%O1 %G1\;"
+		      "s_waitcnt\tvmcnt(0)\;buffer_inv sc1"
+		    : "buffer_wbinvl1_vol\;global_load%o0\t%0, %A1%O1 %G1\;"
 		      "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol");
 	  }
 	break;
@@ -2159,6 +2205,7 @@
     gcc_unreachable ();
   }
   [(set_attr "type" "smem,flat,flat")
+   (set_attr "flatmemaccess" "*,load,load")
    (set_attr "length" "28")
    (set_attr "rdna" "no,*,*")])
 
@@ -2178,9 +2225,9 @@
 	  case 0:
 	    return "s_store%o1\t%1, %A0 glc\;s_waitcnt\tlgkmcnt(0)";
 	  case 1:
-	    return "flat_store%o1\t%A0, %1%O0 glc\;s_waitcnt\t0";
+	    return "flat_store%o1\t%A0, %1%O0 %G1\;s_waitcnt\t0";
 	  case 2:
-	    return "global_store%o1\t%A0, %1%O0 glc\;s_waitcnt\tvmcnt(0)";
+	    return "global_store%o1\t%A0, %1%O0 %G1\;s_waitcnt\tvmcnt(0)";
 	  }
 	break;
       case MEMMODEL_RELEASE:
@@ -2191,15 +2238,19 @@
 	    return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 glc";
 	  case 1:
 	    return (TARGET_GLn_CACHE
-		    ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_store%o1\t%A0, %1%O0 glc"
+		    ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_store%o1\t%A0, %1%O0 %G1"
 		    : TARGET_WBINVL1_CACHE
-		    ? "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc"
+		    ? "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 %G1"
+		    : TARGET_TARGET_SC_CACHE
+		    ? "buffer_wbl2\tsc0\;s_waitcnt\t0\;flat_store%o1\t%A0, %1%O0 %G1"
 		    : "error: cache architectire unspecified");
 	  case 2:
 	    return (TARGET_GLn_CACHE
-		    ? "buffer_gl1_inv\;buffer_gl0_inv\;global_store%o1\t%A0, %1%O0 glc"
+		    ? "buffer_gl1_inv\;buffer_gl0_inv\;global_store%o1\t%A0, %1%O0 %G1"
 		    : TARGET_WBINVL1_CACHE
-		    ? "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc"
+		    ? "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 %G1"
+		    : TARGET_TARGET_SC_CACHE
+		    ? "buffer_wbl2\tsc0\;s_waitcnt\tvmcnt(0)\;global_store%o1\t%A0, %1%O0 %G1"
 		    : "error: cache architecture unspecified");
 	  }
 	break;
@@ -2213,19 +2264,27 @@
 		   "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol";
 	  case 1:
 	    return (TARGET_GLn_CACHE
-		    ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_store%o1\t%A0, %1%O0 glc\;"
+		    ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_store%o1\t%A0, %1%O0 %G1\;"
 		      "s_waitcnt\t0\;buffer_gl1_inv\;buffer_gl0_inv"
 		    : TARGET_WBINVL1_CACHE
-		    ? "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc\;"
+		    ? "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 %G1\;"
 		      "s_waitcnt\t0\;buffer_wbinvl1_vol"
+		    : TARGET_TARGET_SC_CACHE
+		    ? "buffer_wbl2\tsc0\;s_waitcnt\t0\;"
+		      "flat_store%o1\t%A0, %1%O0 %G1\;"
+		      "s_waitcnt\t0\;buffer_inv sc1"
 		    : "error: cache architecture unspecified");
 	  case 2:
 	    return (TARGET_GLn_CACHE
-		    ? "buffer_gl1_inv\;buffer_gl0_inv\;global_store%o1\t%A0, %1%O0 glc\;"
+		    ? "buffer_gl1_inv\;buffer_gl0_inv\;global_store%o1\t%A0, %1%O0 %G1\;"
 		      "s_waitcnt\tvmcnt(0)\;buffer_gl1_inv\;buffer_gl0_inv"
 		    : TARGET_WBINVL1_CACHE
-		    ? "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc\;"
+		    ? "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 %G1\;"
 		      "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"
+		    : TARGET_TARGET_SC_CACHE
+		    ? "buffer_wbl2\tsc0\;s_waitcnt\tvmcnt(0)\;"
+		      "global_store%o1\t%A0, %1%O0 %G1\;"
+		      "s_waitcnt\tvmcnt(0)\;buffer_inv sc1"
 		    : "error: cache architecture unspecified");
 	  }
 	break;
@@ -2233,6 +2292,7 @@
     gcc_unreachable ();
   }
   [(set_attr "type" "smem,flat,flat")
+   (set_attr "flatmemaccess" "*,store,store")
    (set_attr "length" "28")
    (set_attr "rdna" "no,*,*")])
 
@@ -2254,9 +2314,9 @@
 	  case 0:
 	    return "s_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)";
 	  case 1:
-	    return "flat_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\t0";
+	    return "flat_atomic_swap<X>\t%0, %1, %2 %G1\;s_waitcnt\t0";
 	  case 2:
-	    return "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
+	    return "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
 		   "s_waitcnt\tvmcnt(0)";
 	  }
 	break;
@@ -2270,19 +2330,25 @@
 		   "s_dcache_wb_vol\;s_dcache_inv_vol";
 	  case 1:
 	    return (TARGET_GLn_CACHE
-		    ? "flat_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\t0\;"
+		    ? "flat_atomic_swap<X>\t%0, %1, %2 %G1\;s_waitcnt\t0\;"
 		      "buffer_gl1_inv\;buffer_gl0_inv"
 		    : TARGET_WBINVL1_CACHE
-            ? "flat_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\t0\;"
+            ? "flat_atomic_swap<X>\t%0, %1, %2 %G1\;s_waitcnt\t0\;"
 		      "buffer_wbinvl1_vol"
+	    : TARGET_TARGET_SC_CACHE
+            ? "flat_atomic_swap<X>\t%0, %1, %2 %G1\;s_waitcnt\t0\;"
+		      "buffer_inv sc1"
             : "error: cache architecture unspecified");
 	  case 2:
 	    return (TARGET_GLn_CACHE
-		    ? "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
+		    ? "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
 		      "s_waitcnt\tvmcnt(0)\;buffer_gl1_inv\;buffer_gl0_inv"
 		    : TARGET_WBINVL1_CACHE
-            ? "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
+            ? "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
 		      "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"
+	    : TARGET_TARGET_SC_CACHE
+            ? "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
+		      "s_waitcnt\tvmcnt(0)\;buffer_inv sc1"
             : "error: cache architecture unspecified");
 	  }
 	break;
@@ -2295,20 +2361,27 @@
 		   "s_waitcnt\tlgkmcnt(0)";
 	  case 1:
 	    return (TARGET_GLn_CACHE
-		    ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
+		    ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;"
 		      "s_waitcnt\t0"
 		    : TARGET_WBINVL1_CACHE
-            ? "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
+            ? "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;"
+		      "s_waitcnt\t0"
+	    : TARGET_TARGET_SC_CACHE
+            ? "buffer_wbl2\tsc0\;s_waitcnt\t0\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;"
 		      "s_waitcnt\t0"
             : "error: cache architecture unspecified");
 	  case 2:
 	    return (TARGET_GLn_CACHE
 		    ? "buffer_gl1_inv\;buffer_gl0_inv\;"
-		      "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
+		      "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
 		      "s_waitcnt\tvmcnt(0)"
 		    : TARGET_WBINVL1_CACHE
             ? "buffer_wbinvl1_vol\;"
-		      "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
+		      "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
+		      "s_waitcnt\tvmcnt(0)"
+	    : TARGET_TARGET_SC_CACHE
+            ? "buffer_wbl2\tsc0\;s_waitcnt\tvmcnt(0)\;"
+		      "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
 		      "s_waitcnt\tvmcnt(0)"
             : "error: cache architecture unspecified");
 	  }
@@ -2323,21 +2396,28 @@
 		   "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol";
 	  case 1:
 	    return (TARGET_GLn_CACHE
-		    ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
+		    ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;"
 		      "s_waitcnt\t0\;buffer_gl1_inv\;buffer_gl0_inv"
 		    : TARGET_WBINVL1_CACHE
-            ? "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
+            ? "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;"
 		      "s_waitcnt\t0\;buffer_wbinvl1_vol"
+	    : TARGET_TARGET_SC_CACHE
+            ? "buffer_wbl2\tsc0\;s_waitcnt\t0\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;"
+		      "s_waitcnt\t0\;buffer_inv sc1"
             : "error: cache architecture unspecified");
 	  case 2:
 	    return (TARGET_GLn_CACHE
 		    ? "buffer_gl1_inv\;buffer_gl0_inv\;"
-		      "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
+		      "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
 		      "s_waitcnt\tvmcnt(0)\;buffer_gl1_inv\;buffer_gl0_inv"
 		    : TARGET_WBINVL1_CACHE
             ? "buffer_wbinvl1_vol\;"
-		      "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
+		      "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
 		      "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"
+	    : TARGET_TARGET_SC_CACHE
+            ? "buffer_wbl2\tsc0\;s_waitcnt\tvmcnt(0)\;"
+		      "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
+		      "s_waitcnt\tvmcnt(0)\;buffer_inv sc1"
             : "error: cache architecture unspecified");
 	  }
 	break;
@@ -2345,6 +2425,7 @@
     gcc_unreachable ();
   }
   [(set_attr "type" "smem,flat,flat")
+   (set_attr "flatmemaccess" "*,atomicwait,atomicwait")
    (set_attr "length" "28")
    (set_attr "rdna" "no,*,*")])
 
diff --git a/gcc/config/h8300/addsub.md b/gcc/config/h8300/addsub.md
index 32eba9d..f153625 100644
--- a/gcc/config/h8300/addsub.md
+++ b/gcc/config/h8300/addsub.md
@@ -271,7 +271,7 @@
 			     (match_operand:QHSI 2 "register_operand" "r"))
 		(match_dup 1)))
    (set (match_operand:QHSI 0 "register_operand" "=r")
-	(plus (match_dup 1) (match_dup 2)))
+	(plus:QHSI (match_dup 1) (match_dup 2)))
    (clobber (reg:CC CC_REG))]
   ""
 {
diff --git a/gcc/config/h8300/h8300.h b/gcc/config/h8300/h8300.h
index 6b2dd65..8018b63 100644
--- a/gcc/config/h8300/h8300.h
+++ b/gcc/config/h8300/h8300.h
@@ -610,6 +610,12 @@ struct cum_arg
 #define DATA_SECTION_ASM_OP "\t.section .data"
 #define BSS_SECTION_ASM_OP "\t.section .bss"
 
+/* Override default definitions from elfos.h. */
+#undef INIT_SECTION_ASM_OP
+#define INIT_SECTION_ASM_OP "\t.section\t.init,\"ax\""
+#undef FINI_SECTION_ASM_OP
+#define FINI_SECTION_ASM_OP "\t.section\t.fini,\"ax\""
+
 #undef DO_GLOBAL_CTORS_BODY
 #define DO_GLOBAL_CTORS_BODY			\
 {						\
@@ -647,19 +653,11 @@ struct cum_arg
 /* Globalizing directive for a label.  */
 #define GLOBAL_ASM_OP "\t.global "
 
+/* Override default definition from elfos.h. */
+#undef ASM_DECLARE_FUNCTION_NAME
 #define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \
    ASM_OUTPUT_FUNCTION_LABEL (FILE, NAME, DECL)
 
-/* This is how to store into the string LABEL
-   the symbol_ref name of an internal numbered label where
-   PREFIX is the class of label and NUM is the number within the class.
-   This is suitable for output with `assemble_name'.
-
-   N.B.: The h8300.md branch_true and branch_false patterns also know
-   how to generate internal labels.  */
-#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM)	\
-  sprintf (LABEL, "*.%s%lu", PREFIX, (unsigned long)(NUM))
-
 /* This is how to output an insn to push a register on the stack.
    It need not be very fast code.  */
 
@@ -690,9 +688,6 @@ struct cum_arg
   if ((LOG) != 0)				\
     fprintf (FILE, "\t.align %d\n", (LOG))
 
-#define ASM_OUTPUT_SKIP(FILE, SIZE) \
-  fprintf (FILE, "\t.space %d\n", (int)(SIZE))
-
 /* This says how to output an assembler line
    to define a global common symbol.  */
 
diff --git a/gcc/config/h8300/jumpcall.md b/gcc/config/h8300/jumpcall.md
index 4e63408..44847e4 100644
--- a/gcc/config/h8300/jumpcall.md
+++ b/gcc/config/h8300/jumpcall.md
@@ -156,7 +156,7 @@
   "#"
   "&& reload_completed"
   [(set (reg:CCZ CC_REG)
-	(eq (zero_extract:HSI (match_dup 1) (const_int 1) (match_dup 2))
+	(eq:CCZ (zero_extract:HSI (match_dup 1) (const_int 1) (match_dup 2))
 	    (const_int 0)))
    (set (pc)
 	(if_then_else (match_op_dup 3 [(reg:CCZ CC_REG) (const_int 0)])
@@ -181,7 +181,7 @@
 			   (lshiftrt:SI (match_dup 1) (const_int 16))))
 	      (clobber (reg:CC CC_REG))])
    (set (reg:CCZ CC_REG)
-	(eq (zero_extract:SI (match_dup 4) (const_int 1) (match_dup 2))
+	(eq:CCZ (zero_extract:SI (match_dup 4) (const_int 1) (match_dup 2))
 	    (const_int 0)))
    (set (pc)
 	(if_then_else (match_op_dup 3 [(reg:CCZ CC_REG) (const_int 0)])
@@ -288,7 +288,7 @@
   })
 
 (define_insn "call_insn_<mode>"
-  [(call (mem:QI (match_operand 0 "call_insn_operand" "Cr"))
+  [(call (mem:QI (match_operand:P 0 "call_insn_operand" "Cr"))
 	         (match_operand:P 1 "general_operand" "g"))]
   "!SIBLING_CALL_P (insn)"
 {
@@ -326,7 +326,7 @@
 
 (define_insn "call_value_insn_<mode>"
   [(set (match_operand 0 "" "=r")
-	(call (mem:QI (match_operand 1 "call_insn_operand" "Cr"))
+	(call (mem:QI (match_operand:P 1 "call_insn_operand" "Cr"))
 		      (match_operand:P 2 "general_operand" "g")))]
   "!SIBLING_CALL_P (insn)"
 {
@@ -358,7 +358,7 @@
   })
 
 (define_insn "sibcall_insn_<mode>"
-  [(call (mem:QI (match_operand 0 "call_insn_operand" "Cr"))
+  [(call (mem:QI (match_operand:P 0 "call_insn_operand" "Cr"))
 	         (match_operand:P 1 "general_operand" "g"))]
   "SIBLING_CALL_P (insn)"
 {
@@ -396,7 +396,7 @@
 
 (define_insn "sibcall_value_insn_<mode>"
   [(set (match_operand 0 "" "=r")
-	(call (mem:QI (match_operand 1 "call_insn_operand" "Cr"))
+	(call (mem:QI (match_operand:P 1 "call_insn_operand" "Cr"))
 		      (match_operand:P 2 "general_operand" "g")))]
   "SIBLING_CALL_P (insn)"
 {
diff --git a/gcc/config/h8300/testcompare.md b/gcc/config/h8300/testcompare.md
index 694c9e6..3b43381 100644
--- a/gcc/config/h8300/testcompare.md
+++ b/gcc/config/h8300/testcompare.md
@@ -28,7 +28,7 @@
 ;;
 (define_insn ""
   [(set (reg:CCZ CC_REG)
-	(eq (zero_extract:HSI (match_operand:HSI 0 "register_operand" "r")
+	(eq:CCZ (zero_extract:HSI (match_operand:HSI 0 "register_operand" "r")
 			      (const_int 1)
 			      (match_operand 1 "const_int_operand" "n"))
 	    (const_int 0)))]
@@ -54,7 +54,7 @@
 
 (define_insn "*tsthi_upper"
   [(set (reg:CCZN CC_REG)
-	(compare (and:HI (match_operand:HI 0 "register_operand" "r")
+	(compare:CCZN (and:HI (match_operand:HI 0 "register_operand" "r")
 			 (const_int -256))
 		 (const_int 0)))]
   "reload_completed"
@@ -63,7 +63,7 @@
 
 (define_insn "*tsthi_upper_z"
   [(set (reg:CCZ CC_REG)
-	(compare (and:HI (match_operand:HI 0 "register_operand" "r")
+	(compare:CCZ (and:HI (match_operand:HI 0 "register_operand" "r")
 			 (const_int -256))
 		 (const_int 0)))]
   "reload_completed"
@@ -72,7 +72,7 @@
 
 (define_insn "*tstsi_upper"
   [(set (reg:CCZN CC_REG)
-	(compare (and:SI (match_operand:SI 0 "register_operand" "r")
+	(compare:CCZN (and:SI (match_operand:SI 0 "register_operand" "r")
 			 (const_int -65536))
 		 (const_int 0)))]
   "reload_completed"
@@ -81,7 +81,7 @@
 
 (define_insn "*cmp<mode>_c"
   [(set (reg:CCC CC_REG)
-	(ltu (match_operand:QHSI 0 "h8300_dst_operand" "rQ")
+	(ltu:CCC (match_operand:QHSI 0 "h8300_dst_operand" "rQ")
 	     (match_operand:QHSI 1 "h8300_src_operand" "rQi")))]
   "reload_completed"
   {
@@ -97,7 +97,7 @@
 
 (define_insn "*cmpqi_z"
   [(set (reg:CCZ CC_REG)
-	(eq (match_operand:QI 0 "h8300_dst_operand" "rQ")
+	(eq:CCZ (match_operand:QI 0 "h8300_dst_operand" "rQ")
 	    (match_operand:QI 1 "h8300_src_operand" "rQi")))]
   "reload_completed"
   { return "cmp.b	%X1,%X0"; }
@@ -105,7 +105,7 @@
 
 (define_insn "*cmphi_z"
   [(set (reg:CCZ CC_REG)
-	(eq (match_operand:HI 0 "h8300_dst_operand" "rQ")
+	(eq:CCZ (match_operand:HI 0 "h8300_dst_operand" "rQ")
 	    (match_operand:HI 1 "h8300_src_operand" "rQi")))]
   "reload_completed"
   { return "cmp.w	%T1,%T0"; }
@@ -113,7 +113,7 @@
 
 (define_insn "*cmpsi_z"
   [(set (reg:CCZ CC_REG)
-	(eq (match_operand:SI 0 "h8300_dst_operand" "rQ")
+	(eq:CCZ (match_operand:SI 0 "h8300_dst_operand" "rQ")
 	    (match_operand:SI 1 "h8300_src_operand" "rQi")))]
   "reload_completed"
   { return "cmp.l	%S1,%S0"; }
@@ -121,7 +121,7 @@
 
 (define_insn "*cmpqi"
   [(set (reg:CC CC_REG)
-	(compare (match_operand:QI 0 "h8300_dst_operand" "rQ")
+	(compare:CC (match_operand:QI 0 "h8300_dst_operand" "rQ")
 		 (match_operand:QI 1 "h8300_src_operand" "rQi")))]
   "reload_completed"
   "cmp.b	%X1,%X0"
@@ -129,7 +129,7 @@
 
 (define_insn "*cmphi"
   [(set (reg:CC CC_REG)
-	(compare (match_operand:HI 0 "h8300_dst_operand" "rU,rQ")
+	(compare:CC (match_operand:HI 0 "h8300_dst_operand" "rU,rQ")
 		 (match_operand:HI 1 "h8300_src_operand" "P3>X,rQi")))]
   "reload_completed"
 {
@@ -150,7 +150,7 @@
 
 (define_insn "cmpsi"
   [(set (reg:CC CC_REG)
-	(compare (match_operand:SI 0 "h8300_dst_operand" "r,rQ")
+	(compare:CC (match_operand:SI 0 "h8300_dst_operand" "r,rQ")
 		 (match_operand:SI 1 "h8300_src_operand" "P3>X,rQi")))]
   "reload_completed"
 {
@@ -176,7 +176,7 @@
 (define_peephole2
   [(match_scratch:QHSI 1 "r")
    (set (reg:CC CC_REG)
-	(compare (match_operand:QHSI 0 "memory_operand" "")
+	(compare:CC (match_operand:QHSI 0 "memory_operand" "")
 		 (const_int 0)))]
   "!mode_dependent_address_p (XEXP (operands[0], 0), MEM_ADDR_SPACE (operands[0]))"
   [(parallel [(set (reg:CCZN CC_REG) (compare:CCZN (match_dup 0) (const_int 0)))
@@ -187,7 +187,7 @@
 (define_peephole2
   [(match_scratch:QHSI 1 "r")
    (set (reg:CC CC_REG)
-	(compare (match_operand:QHSI 0 "memory_operand" "")
+	(compare:CC (match_operand:QHSI 0 "memory_operand" "")
 		 (const_int 0)))]
   "mode_dependent_address_p (XEXP (operands[0], 0), MEM_ADDR_SPACE (operands[0]))"
   [(parallel [(set (match_dup 1) (match_dup 0)) (clobber (reg:CC CC_REG))])
diff --git a/gcc/config/i386/avx10_2-512bf16intrin.h b/gcc/config/i386/avx10_2-512bf16intrin.h
deleted file mode 100644
index 21e4b36..0000000
--- a/gcc/config/i386/avx10_2-512bf16intrin.h
+++ /dev/null
@@ -1,681 +0,0 @@
-/* Copyright (C) 2024-2025 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _IMMINTRIN_H_INCLUDED
-#error "Never use <avx10_2-512bf16intrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef _AVX10_2_512BF16INTRIN_H_INCLUDED
-#define _AVX10_2_512BF16INTRIN_H_INCLUDED
-
-#if !defined (__AVX10_2__)
-#pragma GCC push_options
-#pragma GCC target("avx10.2")
-#define __DISABLE_AVX10_2__
-#endif /* __AVX10_2__ */
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_add_pbh (__m512bh __A, __m512bh __B)
-{
-  return (__m512bh) __builtin_ia32_addbf16512 (__A, __B);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_add_pbh (__m512bh __W, __mmask32 __U,
-		     __m512bh __A, __m512bh __B)
-{
-  return (__m512bh)
-    __builtin_ia32_addbf16512_mask (__A, __B, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_add_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
-{
-  return (__m512bh)
-    __builtin_ia32_addbf16512_mask (__A, __B,
-				    (__v32bf) _mm512_setzero_si512 (),
-				    __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_sub_pbh (__m512bh __A, __m512bh __B)
-{
-  return (__m512bh) __builtin_ia32_subbf16512 (__A, __B);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_sub_pbh (__m512bh __W, __mmask32 __U,
-		     __m512bh __A, __m512bh __B)
-{
-  return (__m512bh)
-    __builtin_ia32_subbf16512_mask (__A, __B, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_sub_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
-{
-  return (__m512bh)
-    __builtin_ia32_subbf16512_mask (__A, __B,
-				    (__v32bf) _mm512_setzero_si512 (),
-				    __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mul_pbh (__m512bh __A, __m512bh __B)
-{
-  return (__m512bh) __builtin_ia32_mulbf16512 (__A, __B);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_mul_pbh (__m512bh __W, __mmask32 __U,
-		     __m512bh __A, __m512bh __B)
-{
-  return (__m512bh)
-    __builtin_ia32_mulbf16512_mask (__A, __B, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_mul_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
-{
-  return (__m512bh)
-    __builtin_ia32_mulbf16512_mask (__A, __B,
-				    (__v32bf) _mm512_setzero_si512 (),
-				    __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_div_pbh (__m512bh __A, __m512bh __B)
-{
-  return (__m512bh) __builtin_ia32_divbf16512 (__A, __B);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_div_pbh (__m512bh __W, __mmask32 __U,
-		     __m512bh __A, __m512bh __B)
-{
-  return (__m512bh)
-    __builtin_ia32_divbf16512_mask (__A, __B, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_div_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
-{
-  return (__m512bh)
-    __builtin_ia32_divbf16512_mask (__A, __B,
-				    (__v32bf) _mm512_setzero_si512 (),
-				    __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_max_pbh (__m512bh __A, __m512bh __B)
-{
-  return (__m512bh) __builtin_ia32_maxbf16512 (__A, __B);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_max_pbh (__m512bh __W, __mmask32 __U,
-		     __m512bh __A, __m512bh __B)
-{
-  return (__m512bh)
-    __builtin_ia32_maxbf16512_mask (__A, __B, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_max_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
-{
-  return (__m512bh)
-    __builtin_ia32_maxbf16512_mask (__A, __B,
-				    (__v32bf) _mm512_setzero_si512 (),
-				    __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_min_pbh (__m512bh __A, __m512bh __B)
-{
-  return (__m512bh) __builtin_ia32_minbf16512 (__A, __B);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_min_pbh (__m512bh __W, __mmask32 __U,
-		     __m512bh __A, __m512bh __B)
-{
-  return (__m512bh)
-    __builtin_ia32_minbf16512_mask (__A, __B, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_min_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
-{
-  return (__m512bh)
-    __builtin_ia32_minbf16512_mask (__A, __B,
-				    (__v32bf) _mm512_setzero_si512 (),
-				    __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_scalef_pbh (__m512bh __A, __m512bh __B)
-{
-  return (__m512bh) __builtin_ia32_scalefbf16512 (__A, __B);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_scalef_pbh (__m512bh __W, __mmask32 __U,
-			__m512bh __A, __m512bh __B)
-{
-  return (__m512bh)
-    __builtin_ia32_scalefbf16512_mask (__A, __B, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_scalef_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
-{
-  return (__m512bh)
-    __builtin_ia32_scalefbf16512_mask (__A, __B,
-				       (__v32bf) _mm512_setzero_si512 (),
-				       __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fmadd_pbh (__m512bh __A, __m512bh __B, __m512bh __C)
-{
-  return (__m512bh)
-    __builtin_ia32_fmaddbf16512_mask (__A, __B, __C, (__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fmadd_pbh (__m512bh __A, __mmask32 __U,
-		       __m512bh __B, __m512bh __C)
-{
-  return (__m512bh)
-    __builtin_ia32_fmaddbf16512_mask (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fmadd_pbh (__m512bh __A, __m512bh __B,
-			__m512bh __C, __mmask32 __U)
-{
-  return (__m512bh)
-    __builtin_ia32_fmaddbf16512_mask3 (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fmadd_pbh (__mmask32 __U, __m512bh __A,
-			  __m512bh __B, __m512bh __C)
-{
-  return (__m512bh)
-    __builtin_ia32_fmaddbf16512_maskz (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fmsub_pbh (__m512bh __A, __m512bh __B, __m512bh __C)
-{
-  return (__m512bh)
-    __builtin_ia32_fmsubbf16512_mask (__A, __B, __C, (__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fmsub_pbh (__m512bh __A, __mmask32 __U,
-		       __m512bh __B, __m512bh __C)
-{
-  return (__m512bh)
-    __builtin_ia32_fmsubbf16512_mask (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fmsub_pbh (__m512bh __A, __m512bh __B,
-			__m512bh __C, __mmask32 __U)
-{
-  return (__m512bh)
-    __builtin_ia32_fmsubbf16512_mask3 (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fmsub_pbh (__mmask32 __U, __m512bh __A,
-			__m512bh __B, __m512bh __C)
-{
-  return (__m512bh)
-    __builtin_ia32_fmsubbf16512_maskz (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fnmadd_pbh (__m512bh __A, __m512bh __B, __m512bh __C)
-{
-  return (__m512bh)
-    __builtin_ia32_fnmaddbf16512_mask (__A, __B, __C, (__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fnmadd_pbh (__m512bh __A, __mmask32 __U,
-			__m512bh __B, __m512bh __C)
-{
-  return (__m512bh)
-    __builtin_ia32_fnmaddbf16512_mask (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fnmadd_pbh (__m512bh __A, __m512bh __B,
-			 __m512bh __C, __mmask32 __U)
-{
-  return (__m512bh)
-    __builtin_ia32_fnmaddbf16512_mask3 (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fnmadd_pbh (__mmask32 __U, __m512bh __A,
-			 __m512bh __B, __m512bh __C)
-{
-  return (__m512bh)
-    __builtin_ia32_fnmaddbf16512_maskz (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fnmsub_pbh (__m512bh __A, __m512bh __B, __m512bh __C)
-{
-  return (__m512bh)
-    __builtin_ia32_fnmsubbf16512_mask (__A, __B, __C, (__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fnmsub_pbh (__m512bh __A, __mmask32 __U,
-			__m512bh __B, __m512bh __C)
-{
-  return (__m512bh)
-    __builtin_ia32_fnmsubbf16512_mask (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fnmsub_pbh (__m512bh __A, __m512bh __B,
-			 __m512bh __C, __mmask32 __U)
-{
-  return (__m512bh)
-    __builtin_ia32_fnmsubbf16512_mask3 (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fnmsub_pbh (__mmask32 __U, __m512bh __A,
-			 __m512bh __B, __m512bh __C)
-{
-  return (__m512bh)
-    __builtin_ia32_fnmsubbf16512_maskz (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_rsqrt_pbh (__m512bh __A)
-{
-  return (__m512bh)
-    __builtin_ia32_rsqrtbf16512_mask (__A,
-				      (__v32bf) _mm512_setzero_si512 (),
-				      (__mmask32) -1);
-
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_rsqrt_pbh (__m512bh __W, __mmask32 __U, __m512bh __A)
-{
-  return (__m512bh)
-    __builtin_ia32_rsqrtbf16512_mask (__A,  __W,  __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_rsqrt_pbh (__mmask32 __U, __m512bh __A)
-{
-  return (__m512bh)
-    __builtin_ia32_rsqrtbf16512_mask (__A,
-				      (__v32bf) _mm512_setzero_si512 (),
-				      __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_sqrt_pbh (__m512bh __A)
-{
-  return (__m512bh)
-    __builtin_ia32_sqrtbf16512_mask (__A,
-				     (__v32bf) _mm512_setzero_si512 (),
-				     (__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_sqrt_pbh (__m512bh __W, __mmask32 __U, __m512bh __A)
-{
-  return (__m512bh)
-    __builtin_ia32_sqrtbf16512_mask (__A,  __W,  __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_sqrt_pbh (__mmask32 __U, __m512bh __A)
-{
-  return (__m512bh)
-    __builtin_ia32_sqrtbf16512_mask (__A,
-				     (__v32bf) _mm512_setzero_si512 (),
-				     __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_rcp_pbh (__m512bh __A)
-{
-  return (__m512bh)
-    __builtin_ia32_rcpbf16512_mask (__A,
-				    (__v32bf) _mm512_setzero_si512 (),
-				    (__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_rcp_pbh (__m512bh __W, __mmask32 __U, __m512bh __A)
-{
-  return (__m512bh)
-    __builtin_ia32_rcpbf16512_mask (__A,  __W,  __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_rcp_pbh (__mmask32 __U, __m512bh __A)
-{
-  return (__m512bh)
-    __builtin_ia32_rcpbf16512_mask (__A,
-				    (__v32bf) _mm512_setzero_si512 (),
-				    __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_getexp_pbh (__m512bh __A)
-{
-  return (__m512bh)
-    __builtin_ia32_getexpbf16512_mask (__A,
-				       (__v32bf) _mm512_setzero_si512 (),
-				       (__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_getexp_pbh (__m512bh __W, __mmask32 __U, __m512bh __A)
-{
-  return (__m512bh) __builtin_ia32_getexpbf16512_mask (__A,  __W,  __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_getexp_pbh (__mmask32 __U, __m512bh __A)
-{
-  return (__m512bh)
-    __builtin_ia32_getexpbf16512_mask (__A,
-				       (__v32bf) _mm512_setzero_si512 (),
-				       __U);
-}
-
-/* Intrinsics vrndscalebf16.  */
-#ifdef __OPTIMIZE__
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_roundscale_pbh (__m512bh __A, int B)
-{
-  return (__m512bh)
-    __builtin_ia32_rndscalebf16512_mask (__A, B,
-					 (__v32bf) _mm512_setzero_si512 (),
-					 (__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_roundscale_pbh (__m512bh __W, __mmask32 __U, __m512bh __A, int B)
-{
-  return (__m512bh)
-    __builtin_ia32_rndscalebf16512_mask (__A, B, __W,  __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_roundscale_pbh (__mmask32 __U, __m512bh __A, int B)
-{
-  return (__m512bh)
-    __builtin_ia32_rndscalebf16512_mask (__A, B,
-					 (__v32bf) _mm512_setzero_si512 (),
-					 __U);
-}
-
-#else
-#define _mm512_roundscale_pbh(A, B)					      \
-  (__builtin_ia32_rndscalebf16512_mask ((A), (B),			      \
-					(__v32bf) _mm512_setzero_si512 (),    \
-					(__mmask32) -1))
-
-#define _mm512_mask_roundscale_pbh(A, B, C, D)	    		      \
-  (__builtin_ia32_rndscalebf16512_mask ((C), (D), (A), (B)))
-
-#define _mm512_maskz_roundscale_pbh(A, B, C)				      \
-  (__builtin_ia32_rndscalebf16512_mask ((B), (C),			      \
-					(__v32bf) _mm512_setzero_si512 (),    \
-					(A)))
-
-#endif /* __OPTIMIZE__ */
-
-/* Intrinsics vreducebf16.  */
-#ifdef __OPTIMIZE__
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_reduce_pbh (__m512bh __A, int B)
-{
-  return (__m512bh)
-    __builtin_ia32_reducebf16512_mask (__A, B,
-				       (__v32bf) _mm512_setzero_si512 (),
-				       (__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_reduce_pbh (__m512bh __W, __mmask32 __U,
-			__m512bh __A, int B)
-{
-  return (__m512bh)
-    __builtin_ia32_reducebf16512_mask (__A, B, __W,  __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_reduce_pbh (__mmask32 __U, __m512bh __A, int B)
-{
-  return (__m512bh)
-    __builtin_ia32_reducebf16512_mask (__A, B,
-					  (__v32bf) _mm512_setzero_si512 (),
-					  __U);
-}
-
-#else
-#define _mm512_reduce_pbh(A, B)					      \
-  (__builtin_ia32_reducebf16512_mask ((A), (B),			      \
-				      (__v32bf) _mm512_setzero_si512 (),   \
-				      (__mmask32) -1))
-
-#define _mm512_mask_reduce_pbh(A, B, C, D)				      \
-  (__builtin_ia32_reducebf16512_mask ((C), (D), (A), (B)))
-
-#define _mm512_maskz_reduce_pbh(A, B, C)				      \
-  (__builtin_ia32_reducebf16512_mask ((B), (C),			      \
-				      (__v32bf) _mm512_setzero_si512 (),      \
-				      (A)))
-
-#endif /* __OPTIMIZE__ */
-
-/* Intrinsics vgetmantbf16.  */
-#ifdef __OPTIMIZE__
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_getmant_pbh (__m512bh __A, _MM_MANTISSA_NORM_ENUM __B,
-		    _MM_MANTISSA_SIGN_ENUM __C)
-{
-  return (__m512bh)
-    __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B,
-					(__v32bf) _mm512_setzero_si512 (),
-					(__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_getmant_pbh (__m512bh __W, __mmask32 __U, __m512bh __A,
-			 _MM_MANTISSA_NORM_ENUM __B,
-			 _MM_MANTISSA_SIGN_ENUM __C)
-{
-  return (__m512bh)
-    __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B,
-					__W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_getmant_pbh (__mmask32 __U, __m512bh __A,
-			  _MM_MANTISSA_NORM_ENUM __B,
-			  _MM_MANTISSA_SIGN_ENUM __C)
-{
-  return (__m512bh)
-    __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B,
-					(__v32bf) _mm512_setzero_si512 (),
-					__U);
-}
-
-#else
-#define _mm512_getmant_pbh(A, B, C)					      \
-  (__builtin_ia32_getmantbf16512_mask ((A), (int)(((C)<<2) | (B)),	      \
-				       (__v32bf) _mm512_setzero_si512 (),     \
-				       (__mmask32) -1))
-
-#define _mm512_mask_getmant_pbh(A, B, C, D, E)				      \
-  (__builtin_ia32_getmantbf16512_mask ((C), (int)(((D)<<2) | (E)), (A), (B)))
-
-#define _mm512_maskz_getmant_pbh(A, B, C, D)				      \
-  (__builtin_ia32_getmantbf16512_mask ((B), (int)(((C)<<2) | (D)),	      \
-				       (__v32bf) _mm512_setzero_si512 (),     \
-					  (A)))
-
-#endif /* __OPTIMIZE__ */
-
-/* Intrinsics vfpclassbf16.  */
-#ifdef __OPTIMIZE__
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fpclass_pbh_mask (__mmask32 __U, __m512bh __A,
-			      const int __imm)
-{
-  return (__mmask32)
-    __builtin_ia32_fpclassbf16512_mask (__A, __imm, __U);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fpclass_pbh_mask (__m512bh __A, const int __imm)
-{
-  return (__mmask32)
-    __builtin_ia32_fpclassbf16512_mask (__A, __imm,
-					(__mmask32) -1);
-}
-
-#else
-#define _mm512_mask_fpclass_pbh_mask(U, X, C)				   \
-  ((__mmask32) __builtin_ia32_fpclassbf16512_mask (			   \
-      (__v32bf) (__m512bh) (X), (int) (C), (__mmask32) (U)))
-
-#define _mm512_fpclass_pbh_mask(X, C)					   \
-  ((__mmask32) __builtin_ia32_fpclassbf16512_mask (			   \
-      (__v32bf) (__m512bh) (X), (int) (C), (__mmask32) (-1)))
-#endif /* __OPIMTIZE__ */
-
-
-/* Intrinsics vcmpbf16.  */
-#ifdef __OPTIMIZE__
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmp_pbh_mask (__mmask32 __U, __m512bh __A, __m512bh __B,
-			  const int __imm)
-{
-  return (__mmask32)
-    __builtin_ia32_cmpbf16512_mask (__A, __B, __imm, __U);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmp_pbh_mask (__m512bh __A, __m512bh __B, const int __imm)
-{
-  return (__mmask32)
-    __builtin_ia32_cmpbf16512_mask (__A, __B, __imm,
-				    (__mmask32) -1);
-}
-
-#else
-#define _mm512_mask_cmp_pbh_mask(A, B, C, D)				\
-  ((__mmask32) __builtin_ia32_cmpbf16512_mask ((B), (C), (D), (A)))
-
-#define _mm512_cmp_pbh_mask(A, B, C)					\
-  ((__mmask32) __builtin_ia32_cmpbf16512_mask ((A), (B), (C), (-1)))
-
-#endif /* __OPIMTIZE__ */
-
-#ifdef __DISABLE_AVX10_2__
-#undef __DISABLE_AVX10_2__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX10_2__ */
-
-#endif /* _AVX10_2_512BF16INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx10_2-512convertintrin.h b/gcc/config/i386/avx10_2-512convertintrin.h
deleted file mode 100644
index 611a40d..0000000
--- a/gcc/config/i386/avx10_2-512convertintrin.h
+++ /dev/null
@@ -1,572 +0,0 @@
-/* Copyright (C) 2024-2025 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _IMMINTRIN_H_INCLUDED
-#error "Never use <avx10_2-512convertintrin.h> directly; include <immintrin.h> instead."
-#endif // _IMMINTRIN_H_INCLUDED
-
-#ifndef __AVX10_2_512CONVERTINTRIN_H_INCLUDED
-#define __AVX10_2_512CONVERTINTRIN_H_INCLUDED
-
-#ifndef __AVX10_2__
-#pragma GCC push_options
-#pragma GCC target("avx10.2")
-#define __DISABLE_AVX10_2__
-#endif /* __AVX10_2__ */
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtx2ps_ph (__m512 __A, __m512 __B)
-{
-  return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
-							     (__v16sf) __B,
-							     (__v32hf)
-							     _mm512_setzero_ph (),
-							     (__mmask32) -1,
-							     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtx2ps_ph (__m512h __W, __mmask32 __U, __m512 __A,
-			__m512 __B)
-{
-  return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
-							     (__v16sf) __B,
-							     (__v32hf) __W,
-							     (__mmask32) __U,
-							     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtx2ps_ph (__mmask32 __U, __m512 __A, __m512 __B)
-{
-  return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
-							     (__v16sf) __B,
-							     (__v32hf)
-							     _mm512_setzero_ph (),
-							     (__mmask32) __U,
-							     _MM_FROUND_CUR_DIRECTION);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtx_round2ps_ph (__m512 __A, __m512 __B, const int __R)
-{
-  return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
-							    (__v16sf) __B,
-							    (__v32hf)
-							    _mm512_setzero_ph (),
-							    (__mmask32) -1,
-							    __R);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtx_round2ps_ph (__m512h __W, __mmask32 __U, __m512 __A,
-			      __m512 __B, const int __R)
-{
-  return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
-							    (__v16sf) __B,
-							    (__v32hf) __W,
-							    (__mmask32) __U,
-							    __R);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtx_round2ps_ph (__mmask32 __U, __m512 __A,
-			       __m512 __B, const int __R)
-{
-  return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
-							    (__v16sf) __B,
-							    (__v32hf)
-							    _mm512_setzero_ph (),
-							    (__mmask32) __U,
-							    __R);
-}
-
-#else
-#define _mm512_cvtx_round2ps_ph(A, B, R) \
-  ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \
-						       (__v16sf) (B), \
-						       (__v32hf) \
-						       (_mm512_setzero_ph ()), \
-						       (__mmask32) (-1), \
-						       (R)))
-#define _mm512_mask_cvtx_round2ps_ph(W, U, A, B, R) \
-  ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \
-						       (__v16sf) (B), \
-						       (__v32hf) (W), \
-						       (__mmask32) (U), \
-						       (R)))
-#define _mm512_maskz_cvtx_round2ps_ph(U, A, B, R) \
-  ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \
-						       (__v16sf) (B), \
-						       (__v32hf) \
-						       (_mm512_setzero_ph ()), \
-						       (__mmask32) (U), \
-						       (R)))
-#endif  /* __OPTIMIZE__  */
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtbiasph_bf8 (__m512i __A, __m512h __B)
-{
-  return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A,
-							  (__v32hf) __B,
-							  (__v32qi)(__m256i)
-							  _mm256_undefined_si256 (),
-							  (__mmask32) -1);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtbiasph_bf8 (__m256i __W, __mmask32 __U,
-			   __m512i __A, __m512h __B)
-{
-  return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A,
-							  (__v32hf) __B,
-							  (__v32qi)(__m256i) __W,
-							  (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtbiasph_bf8 (__mmask32 __U, __m512i __A, __m512h __B)
-{
-  return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A,
-							  (__v32hf) __B,
-							  (__v32qi)(__m256i)
-							  _mm256_setzero_si256 (),
-							  (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvts_biasph_bf8 (__m512i __A, __m512h __B)
-{
-  return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A,
-							   (__v32hf) __B,
-							   (__v32qi)(__m256i)
-							   _mm256_undefined_si256 (),
-							   (__mmask32) -1);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvts_biasph_bf8 (__m256i __W, __mmask32 __U,
-			     __m512i __A, __m512h __B)
-{
-  return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A,
-							   (__v32hf) __B,
-							   (__v32qi)(__m256i) __W,
-							   (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvts_biasph_bf8 (__mmask32 __U, __m512i __A, __m512h __B)
-{
-  return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A,
-							   (__v32hf) __B,
-							   (__v32qi)(__m256i)
-							   _mm256_setzero_si256 (),
-							   (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtbiasph_hf8 (__m512i __A, __m512h __B)
-{
-  return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A,
-							  (__v32hf) __B,
-							  (__v32qi)(__m256i)
-					 		  _mm256_undefined_si256 (),
-							  (__mmask32) -1);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtbiasph_hf8 (__m256i __W, __mmask32 __U, __m512i __A,
-			   __m512h __B)
-{
-  return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A,
-							  (__v32hf) __B,
-							  (__v32qi)(__m256i) __W,
-							  (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtbiasph_hf8 (__mmask32 __U, __m512i __A, __m512h __B)
-{
-  return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A,
-							  (__v32hf) __B,
-							  (__v32qi)(__m256i)
-							  _mm256_setzero_si256 (),
-							  (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvts_biasph_hf8 (__m512i __A, __m512h __B)
-{
-  return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A,
-							   (__v32hf) __B,
-							   (__v32qi)(__m256i)
-							   _mm256_undefined_si256 (),
-							   (__mmask32) -1);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvts_biasph_hf8 (__m256i __W, __mmask32 __U,
-			     __m512i __A, __m512h __B)
-{
-  return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A,
-							   (__v32hf) __B,
-							   (__v32qi)(__m256i) __W,
-							   (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvts_biasph_hf8 (__mmask32 __U, __m512i __A, __m512h __B)
-{
-  return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A,
-							   (__v32hf) __B,
-							   (__v32qi)(__m256i)
-							   _mm256_setzero_si256 (),
-							   (__mmask32) __U);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvt2ph_bf8 (__m512h __A, __m512h __B)
-{
-  return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A,
-						       (__v32hf) __B,
-						       (__v64qi)
-						       _mm512_setzero_si512 (),
-						       (__mmask64) -1);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvt2ph_bf8 (__m512i __W, __mmask64 __U,
-			__m512h __A, __m512h __B)
-{
-  return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A,
-						       (__v32hf) __B,
-						       (__v64qi) __W,
-						       (__mmask64) __U);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvt2ph_bf8 (__mmask64 __U,  __m512h __A, __m512h __B)
-{
-  return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A,
-						       (__v32hf) __B,
-						       (__v64qi)
-						       _mm512_setzero_si512 (),
-						       (__mmask64) __U);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvts_2ph_bf8 (__m512h __A, __m512h __B)
-{
-  return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A,
-							(__v32hf) __B,
-							(__v64qi)
-							_mm512_setzero_si512 (),
-							(__mmask64) -1);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvts_2ph_bf8 (__m512i __W, __mmask64 __U,
-			  __m512h __A, __m512h __B)
-{
-  return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A,
-							(__v32hf) __B,
-							(__v64qi) __W,
-							(__mmask64) __U);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvts_2ph_bf8 (__mmask64 __U, __m512h __A, __m512h __B)
-{
-  return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A,
-							(__v32hf) __B,
-							(__v64qi)
-							_mm512_setzero_si512 (),
-							(__mmask64) __U);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvt2ph_hf8 (__m512h __A, __m512h __B)
-{
-  return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A,
-						       (__v32hf) __B,
-						       (__v64qi)
-						       _mm512_setzero_si512 (),
-						       (__mmask64) -1);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvt2ph_hf8 (__m512i __W, __mmask64 __U,
-			__m512h __A, __m512h __B)
-{
-  return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A,
-						       (__v32hf) __B,
-						       (__v64qi) __W,
-						       (__mmask64) __U);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvt2ph_hf8 (__mmask64 __U, __m512h __A, __m512h __B)
-{
-  return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A,
-						       (__v32hf) __B,
-						       (__v64qi)
-						       _mm512_setzero_si512 (),
-						       (__mmask64) __U);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvts_2ph_hf8 (__m512h __A, __m512h __B)
-{
-  return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A,
-							(__v32hf) __B,
-							(__v64qi)
-							_mm512_setzero_si512 (),
-							(__mmask64) -1);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvts_2ph_hf8 (__m512i __W, __mmask64 __U,
-			  __m512h __A, __m512h __B)
-{
-  return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A,
-							(__v32hf) __B,
-							(__v64qi) __W,
-							(__mmask64) __U);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvts_2ph_hf8 (__mmask64 __U, __m512h __A, __m512h __B)
-{
-  return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A,
-							(__v32hf) __B,
-							(__v64qi)
-							_mm512_setzero_si512 (),
-							(__mmask64) __U);
-}
-
-extern __inline__ __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvthf8_ph (__m256i __A)
-{
-  return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A,
-						      (__v32hf) (__m512h)
-						      _mm512_undefined_ph (),
-						      (__mmask32) -1);
-}
-
-extern __inline__ __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvthf8_ph (__m512h __W, __mmask32 __U, __m256i __A)
-{
-  return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A,
-						      (__v32hf) (__m512h) __W,
-						      (__mmask32) __U);
-}
-
-extern __inline__ __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvthf8_ph (__mmask32 __U, __m256i __A)
-{
-  return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A,
-						      (__v32hf) (__m512h)
-						      _mm512_setzero_ph (),
-						      (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtph_bf8 (__m512h __A)
-{
-  return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A,
-						      (__v32qi) (__m256i)
-						      _mm256_undefined_si256 (),
-						      (__mmask32) -1);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtph_bf8 (__m256i __W, __mmask32 __U, __m512h __A)
-{
-  return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A,
-						      (__v32qi) (__m256i) __W,
-						      (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtph_bf8 (__mmask32 __U, __m512h __A)
-{
-  return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A,
-						      (__v32qi) (__m256i)
-						      _mm256_setzero_si256 (),
-						      (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvts_ph_bf8 (__m512h __A)
-{
-  return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A,
-						       (__v32qi) (__m256i)
-						       _mm256_undefined_si256 (),
-						       (__mmask32) -1);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvts_ph_bf8 (__m256i __W, __mmask32 __U, __m512h __A)
-{
-  return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A,
-						       (__v32qi) (__m256i) __W,
-						       (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvts_ph_bf8 (__mmask32 __U, __m512h __A)
-{
-  return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A,
-						       (__v32qi) (__m256i)
-						       _mm256_setzero_si256 (),
-						       (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtph_hf8 (__m512h __A)
-{
-  return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A,
-						      (__v32qi) (__m256i)
-						      _mm256_undefined_si256 (),
-						      (__mmask32) -1);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtph_hf8 (__m256i __W, __mmask32 __U, __m512h __A)
-{
-  return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A,
-						      (__v32qi)(__m256i) __W,
-						      (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtph_hf8 (__mmask32 __U, __m512h __A)
-{
-  return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A,
-						      (__v32qi) (__m256i)
-						      _mm256_setzero_si256 (),
-						      (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvts_ph_hf8 (__m512h __A)
-{
-  return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A,
-						       (__v32qi) (__m256i)
-						       _mm256_undefined_si256 (),
-						       (__mmask32) -1);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvts_ph_hf8 (__m256i __W, __mmask32 __U, __m512h __A)
-{
-  return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A,
-						       (__v32qi) (__m256i) __W,
-						       (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvts_ph_hf8 (__mmask32 __U, __m512h __A)
-{
-  return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A,
-						       (__v32qi) (__m256i)
-						       _mm256_setzero_si256 (),
-						       (__mmask32) __U);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtbf8_ph (__m256i __A)
-{
-  return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_slli_epi16 (
-	 (__m512i) _mm512_cvtepi8_epi16 (__A), 8));
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtbf8_ph (__m512h __S, __mmask32 __U, __m256i __A)
-{
-  return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_mask_slli_epi16 (
-	 (__m512i) __S, __U, (__m512i) _mm512_cvtepi8_epi16 (__A), 8));
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtbf8_ph (__mmask32 __U, __m256i __A)
-{
-  return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_slli_epi16 (
-	 (__m512i) _mm512_maskz_cvtepi8_epi16 (__U, __A), 8));
-}
-
-#ifdef __DISABLE_AVX10_2__
-#undef __DISABLE_AVX10_2__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX10_2__ */
-
-#endif /* __AVX10_2_512CONVERTINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx10_2-512mediaintrin.h b/gcc/config/i386/avx10_2-512mediaintrin.h
deleted file mode 100644
index 43271e7..0000000
--- a/gcc/config/i386/avx10_2-512mediaintrin.h
+++ /dev/null
@@ -1,514 +0,0 @@
-/* Copyright (C) 2024-2025 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if !defined _IMMINTRIN_H_INCLUDED
-#error "Never use <avx10_2-512mediaintrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef _AVX10_2_512MEDIAINTRIN_H_INCLUDED
-#define _AVX10_2_512MEDIAINTRIN_H_INCLUDED
-
-#if !defined(__AVX10_2__)
-#pragma GCC push_options
-#pragma GCC target("avx10.2")
-#define __DISABLE_AVX10_2__
-#endif /* __AVX10_2__ */
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpbssd_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpbssd512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpbssd_epi32 (__m512i __W, __mmask16 __U,
-			  __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpbssd_v16si_mask ((__v16si) __W,
-					(__v16si) __A,
-					(__v16si) __B,
-					(__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpbssd_epi32 (__mmask16 __U, __m512i __W,
-			   __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpbssd_v16si_maskz ((__v16si) __W,
-					 (__v16si) __A,
-					 (__v16si) __B,
-					 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpbssds_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpbssds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpbssds_epi32 (__m512i __W, __mmask16 __U,
-			   __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpbssds_v16si_mask ((__v16si) __W,
-					 (__v16si) __A,
-					 (__v16si) __B,
-					 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpbssds_epi32 (__mmask16 __U, __m512i __W,
-			    __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpbssds_v16si_maskz ((__v16si) __W,
-					  (__v16si) __A,
-					  (__v16si) __B,
-					  (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpbsud_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpbsud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpbsud_epi32 (__m512i __W, __mmask16 __U,
-			  __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpbsud_v16si_mask ((__v16si) __W,
-					(__v16si) __A,
-					(__v16si) __B,
-					(__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpbsud_epi32 (__mmask16 __U, __m512i __W,
-			   __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpbsud_v16si_maskz ((__v16si) __W,
-					 (__v16si) __A,
-					 (__v16si) __B,
-					 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpbsuds_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpbsuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpbsuds_epi32 (__m512i __W, __mmask16 __U,
-			   __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpbsuds_v16si_mask ((__v16si) __W,
-					 (__v16si) __A,
-					 (__v16si) __B,
-					 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpbsuds_epi32 (__mmask16 __U, __m512i __W,
-			    __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpbsuds_v16si_maskz ((__v16si) __W,
-					  (__v16si) __A,
-					  (__v16si) __B,
-					  (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpbuud_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpbuud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpbuud_epi32 (__m512i __W, __mmask16 __U,
-			  __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpbuud_v16si_mask ((__v16si) __W,
-					(__v16si) __A,
-					(__v16si) __B,
-					(__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpbuud_epi32 (__mmask16 __U, __m512i __W,
-			   __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpbuud_v16si_maskz ((__v16si) __W,
-					 (__v16si) __A,
-					 (__v16si) __B,
-					 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpbuuds_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpbuuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpbuuds_epi32 (__m512i __W, __mmask16 __U,
-			   __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpbuuds_v16si_mask ((__v16si) __W,
-					 (__v16si) __A,
-					 (__v16si) __B,
-					 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpbuuds_epi32 (__mmask16 __U, __m512i __W,
-			    __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpbuuds_v16si_maskz ((__v16si) __W,
-					  (__v16si) __A,
-					  (__v16si) __B,
-					  (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpwsud_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpwsud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpwsud_epi32 (__m512i __W, __mmask16 __U,
-			  __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpwsud_v16si_mask ((__v16si) __W,
-					(__v16si) __A,
-					(__v16si) __B,
-					(__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpwsud_epi32 (__mmask16 __U, __m512i __W,
-			   __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpwsud_v16si_maskz ((__v16si) __W,
-					 (__v16si) __A,
-					 (__v16si) __B,
-					 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpwsuds_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpwsuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpwsuds_epi32 (__m512i __W, __mmask16 __U,
-			   __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpwsuds_v16si_mask ((__v16si) __W,
-					 (__v16si) __A,
-					 (__v16si) __B,
-					 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpwsuds_epi32 (__mmask16 __U, __m512i __W,
-			    __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpwsuds_v16si_maskz ((__v16si) __W,
-					  (__v16si) __A,
-					  (__v16si) __B,
-					  (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpwusd_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpwusd512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpwusd_epi32 (__m512i __W, __mmask16 __U,
-			  __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpwusd_v16si_mask ((__v16si) __W,
-					(__v16si) __A,
-					(__v16si) __B,
-					(__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpwusd_epi32 (__mmask16 __U, __m512i __W,
-			   __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpwusd_v16si_maskz ((__v16si) __W,
-					 (__v16si) __A,
-					 (__v16si) __B,
-					 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpwusds_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpwusds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpwusds_epi32 (__m512i __W, __mmask16 __U,
-			   __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpwusds_v16si_mask ((__v16si) __W,
-					 (__v16si) __A,
-					 (__v16si) __B,
-					 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpwusds_epi32 (__mmask16 __U, __m512i __W,
-			    __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpwusds_v16si_maskz ((__v16si) __W,
-					  (__v16si) __A,
-					  (__v16si) __B,
-					  (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpwuud_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpwuud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpwuud_epi32 (__m512i __W, __mmask16 __U,
-			  __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpwuud_v16si_mask ((__v16si) __W,
-					(__v16si) __A,
-					(__v16si) __B,
-					(__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpwuud_epi32 (__mmask16 __U, __m512i __W,
-			   __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpwuud_v16si_maskz ((__v16si) __W,
-					 (__v16si) __A,
-					 (__v16si) __B,
-					 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpwuuds_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpwuuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpwuuds_epi32 (__m512i __W, __mmask16 __U,
-			   __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpwuuds_v16si_mask ((__v16si) __W,
-					 (__v16si) __A,
-					 (__v16si) __B,
-					 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpwuuds_epi32 (__mmask16 __U, __m512i __W,
-			    __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpwuuds_v16si_maskz ((__v16si) __W,
-					  (__v16si) __A,
-					  (__v16si) __B,
-					  (__mmask16) __U);
-}
-
-extern __inline __m512
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpph_ps (__m512 __W, __m512h __A, __m512h __B)
-{
-  return (__m512)
-    __builtin_ia32_vdpphps512_mask ((__v16sf) __W,
-				    (__v16sf) __A,
-				    (__v16sf) __B,
-				    (__mmask16) -1);
-}
-
-extern __inline __m512
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpph_ps (__m512 __W, __mmask16 __U, __m512h __A,
-		     __m512h __B)
-{
-  return (__m512)
-    __builtin_ia32_vdpphps512_mask ((__v16sf) __W,
-				    (__v16sf) __A,
-				    (__v16sf) __B,
-				    (__mmask16) __U);
-}
-
-extern __inline __m512
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpph_ps (__mmask16 __U, __m512 __W, __m512h __A,
-		      __m512h __B)
-{
-  return (__m512)
-    __builtin_ia32_vdpphps512_maskz ((__v16sf) __W,
-				     (__v16sf) __A,
-				     (__v16sf) __B,
-				     (__mmask16) __U);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mpsadbw_epu8 (__m512i __X, __m512i __Y, const int __M)
-{
-  return (__m512i) __builtin_ia32_mpsadbw512 ((__v64qi) __X,
-					      (__v64qi) __Y,
-					      __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_mpsadbw_epu8 (__m512i __W, __mmask32 __U, __m512i __X,
-			  __m512i __Y, const int __M)
-{
-  return (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi) __X,
-						   (__v64qi) __Y,
-						   __M,
-						   (__v32hi) __W,
-						   __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_mpsadbw_epu8 (__mmask32 __U, __m512i __X,
-			   __m512i __Y, const int __M)
-{
-  return (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi) __X,
-						   (__v64qi) __Y,
-						   __M,
-						   (__v32hi) _mm512_setzero_epi32 (),
-						   __U);
-}
-#else
-#define _mm512_mpsadbw_epu8(X, Y, M)					\
-  (__m512i) __builtin_ia32_mpsadbw512 ((__v64qi)(__m512i)(X),		\
-				       (__v64qi)(__m512i)(Y), (int)(M))
-
-#define _mm512_mask_mpsadbw_epu8(W, U, X, Y, M)				\
-  (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi)(__m512i)(X),	\
-					    (__v64qi)(__m512i)(Y),	\
-					    (int)(M),			\
-					    (__v32hi)(__m512i)(W),	\
-					    (__mmask32)(U))
-
-#define _mm512_maskz_mpsadbw_epu8(U, X, Y, M)				\
-  (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi)(__m512i)(X),	\
-					    (__v64qi)(__m512i)(Y),	\
-					    (int)(M),			\
-					    (__v32hi) _mm512_setzero_epi32 (),	\
-					    (__mmask32)(U))
-#endif
-
-#ifdef __DISABLE_AVX10_2__
-#undef __DISABLE_AVX10_2__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX10_2__ */
-
-#endif /* __AVX10_2_512MEDIAINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx10_2-512minmaxintrin.h b/gcc/config/i386/avx10_2-512minmaxintrin.h
deleted file mode 100644
index a743346..0000000
--- a/gcc/config/i386/avx10_2-512minmaxintrin.h
+++ /dev/null
@@ -1,489 +0,0 @@
-/* Copyright (C) 2024-2025 Free Software Foundation, Inc.
-   This file is part of GCC.
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if !defined _IMMINTRIN_H_INCLUDED
-#error "Never use <avx10_2-512minmaxintrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef _AVX10_2_512MINMAXINTRIN_H_INCLUDED
-#define _AVX10_2_512MINMAXINTRIN_H_INCLUDED
-
-#if !defined (__AVX10_2__)
-#pragma GCC push_options
-#pragma GCC target("avx10.2")
-#define __DISABLE_AVX10_2__
-#endif /* __AVX10_2__ */
-
-#ifdef __OPTIMIZE__
-extern __inline __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_minmax_pbh (__m512bh __A, __m512bh __B, const int __C)
-{
-  return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A,
-						       (__v32bf) __B,
-						       __C,
-						       (__v32bf)(__m512bh)
-						       _mm512_setzero_si512 (),
-						       (__mmask32) -1);
-}
-
-extern __inline __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_minmax_pbh (__m512bh __W, __mmask32 __U,
-			__m512bh __A, __m512bh __B, const int __C)
-{
-  return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A,
-						       (__v32bf) __B,
-						       __C,
-						       (__v32bf) __W,
-						       (__mmask32) __U);
-}
-
-extern __inline __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_minmax_pbh (__mmask32 __U, __m512bh __A,
-			 __m512bh __B, const int __C)
-{
-  return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A,
-						       (__v32bf) __B,
-						       __C,
-						       (__v32bf)(__m512bh)
-						       _mm512_setzero_si512 (),
-						       (__mmask32) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_minmax_pd (__m512d __A, __m512d __B, const int __C)
-{
-  return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
-							  (__v8df) __B,
-							  __C,
-							  (__v8df)
-							  _mm512_undefined_pd (),
-							  (__mmask8) -1,
-							  _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_minmax_pd (__m512d __W, __mmask8 __U, __m512d __A,
-		       __m512d __B, const int __C)
-{
-  return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
-							  (__v8df) __B,
-							  __C,
-							  (__v8df) __W,
-							  (__mmask8) __U,
-							  _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_minmax_pd (__mmask8 __U, __m512d __A, __m512d __B,
-			const int __C)
-{
-  return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
-							  (__v8df) __B,
-							  __C,
-							  (__v8df)
-							  _mm512_setzero_pd (),
-							  (__mmask8) __U,
-							  _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_minmax_round_pd (__m512d __A, __m512d __B, const int __C,
-			const int __R)
-{
-  return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
-							  (__v8df) __B,
-							  __C,
-							  (__v8df)
-							  _mm512_undefined_pd (),
-							  (__mmask8) -1, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_minmax_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
-			     __m512d __B, const int __C, const int __R)
-{
-  return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
-							  (__v8df) __B,
-							  __C,
-							  (__v8df) __W,
-							  (__mmask8) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_minmax_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
-			      const int __C, const int __R)
-{
-  return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
-							  (__v8df) __B,
-							  __C,
-							  (__v8df)
-							  _mm512_setzero_pd (),
-							  (__mmask8) __U, __R);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_minmax_ph (__m512h __A, __m512h __B, const int __C)
-{
-  return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
-							  (__v32hf) __B,
-							  __C,
-							  (__v32hf)
-							  _mm512_undefined_ph (),
-							  (__mmask32) -1,
-							  _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_minmax_ph (__m512h __W, __mmask32 __U, __m512h __A,
-		       __m512h __B, const int __C)
-{
-  return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
-							  (__v32hf) __B,
-							  __C,
-							  (__v32hf) __W,
-							  (__mmask32) __U,
-							  _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_minmax_ph (__mmask32 __U, __m512h __A, __m512h __B,
-			const int __C)
-{
-  return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
-							  (__v32hf) __B,
-							  __C,
-							  (__v32hf)
-							  _mm512_setzero_ph (),
-							  (__mmask32) __U,
-							  _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_minmax_round_ph (__m512h __A, __m512h __B, const int __C, const int __R)
-{
-  return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
-							  (__v32hf) __B,
-							  __C,
-							  (__v32hf)
-							  _mm512_undefined_ph (),
-							  (__mmask32) -1, __R);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_minmax_round_ph (__m512h __W, __mmask32 __U, __m512h __A,
-			     __m512h __B, const int __C, const int __R)
-{
-  return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
-							  (__v32hf) __B,
-							  __C,
-							  (__v32hf) __W,
-							  (__mmask32) __U, __R);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_minmax_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
-			      const int __C, const int __R)
-{
-  return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
-							  (__v32hf) __B,
-							  __C,
-							  (__v32hf)
-							  _mm512_setzero_ph (),
-							  (__mmask32) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_minmax_ps (__m512 __A, __m512 __B, const int __C)
-{
-  return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
-							 (__v16sf) __B,
-							 __C,
-							 (__v16sf)
-							 _mm512_undefined_ps (),
-							 (__mmask16) -1,
-							 _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_minmax_ps (__m512 __W, __mmask16 __U, __m512 __A,
-		       __m512 __B, const int __C)
-{
-  return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
-							 (__v16sf) __B,
-							 __C,
-							 (__v16sf) __W,
-							 (__mmask16) __U,
-							 _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_minmax_ps (__mmask16 __U, __m512 __A, __m512 __B,
-			const int __C)
-{
-  return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
-							 (__v16sf) __B,
-							 __C,
-							 (__v16sf)
-							 _mm512_setzero_ps (),
-							 (__mmask16) __U,
-							 _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_minmax_round_ps (__m512 __A, __m512 __B, const int __C, const int __R)
-{
-  return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
-							 (__v16sf) __B,
-							 __C,
-							 (__v16sf)
-							 _mm512_undefined_ps (),
-							 (__mmask16) -1, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_minmax_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
-			     __m512 __B, const int __C, const int __R)
-{
-  return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
-							 (__v16sf) __B,
-							 __C,
-							 (__v16sf) __W,
-							 (__mmask16) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_minmax_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
-			      const int __C, const int __R)
-{
-  return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
-							 (__v16sf) __B,
-							 __C,
-							 (__v16sf)
-							 _mm512_setzero_ps (),
-							 (__mmask16) __U, __R);
-}
-
-#else
-#define _mm512_minmax_pbh(A, B, C)					      \
-  ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A),		      \
-						 (__v32bf) (B),		      \
-						 (int) (C),		      \
-						 (__v32bf) (__m512bh)	      \
-						 _mm512_setzero_si512 (),     \
-						 (__mmask32) (-1)))
-
-#define _mm512_mask_minmax_pbh(W, U, A, B, C)				      \
-  ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A),		      \
-						 (__v32bf) (B), 	      \
-						 (int) (C),		      \
-						 (__v32bf) (__m512bh) (W),    \
-						 (__mmask32) (U)))
-
-#define _mm512_maskz_minmax_pbh(U, A, B, C)				      \
-  ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A),		      \
-						 (__v32bf) (B),		      \
-						 (int) (C),		      \
-						 (__v32bf) (__m512bh)	      \
-						 _mm512_setzero_si512 (),     \
-						 (__mmask32) (U)))
-
-#define _mm512_minmax_round_pd(A, B, C, R)				      \
-  ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A),	      \
-						    (__v8df) (B),	      \
-						    (int) (C),		      \
-						    (__v8df) (__m512d)	      \
-						    _mm512_undefined_pd (),   \
-						    (__mmask8) (-1),	      \
-						    (int) (R)))
-
-#define _mm512_mask_minmax_round_pd(W, U, A, B, C, R)			      \
-  ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A),	      \
-						    (__v8df) (B),	      \
-						    (int) (C),		      \
-						    (__v8df) (__m512d) (W),   \
-						    (__mmask8) (U),	      \
-						    (int) (R)))
-
-#define _mm512_maskz_minmax_round_pd(U, A, B, C, R)			      \
-  ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A),	      \
-						    (__v8df) (B),	      \
-						    (int) (C),		      \
-						    (__v8df) (__m512d)	      \
-						    _mm512_setzero_pd (),     \
-						    (__mmask8) (U), 	      \
-						    (int) (R)))
-
-#define _mm512_minmax_round_ph(A, B, C, R)				      \
-  ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A),	      \
-						    (__v32hf) (B),	      \
-						    (int) (C),		      \
-						    (__v32hf) (__m512h)	      \
-						    _mm512_undefined_ph (),   \
-						    (__mmask32) (-1),	      \
-						    (int) (R)))
-
-#define _mm512_mask_minmax_round_ph(W, U, A, B, C, R)			      \
-  ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A),	      \
-						    (__v32hf) (B),	      \
-						    (int) (C),		      \
-						    (__v32hf) (__m512h) (W),  \
-						    (__mmask32) (U),	      \
-						    (int) (R)))
-
-#define _mm512_maskz_minmax_round_ph(U, A, B, C, R)			      \
-  ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A),	      \
-						    (__v32hf) (B),	      \
-						    (int) (C),		      \
-						    (__v32hf) (__m512h)	      \
-						    _mm512_setzero_ph (),     \
-						    (__mmask32) (U),	      \
-						    (int) (R)))
-
-#define _mm512_minmax_round_ps(A, B, C, R)				      \
-  ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A),	      \
-						   (__v16sf) (B),	      \
-						   (int) (C),		      \
-						   (__v16sf) (__m512)	      \
-						   _mm512_undefined_ps (),    \
-						   (__mmask16) (-1),	      \
-						   (int) (R)))
-
-#define _mm512_mask_minmax_round_ps(W, U, A, B, C, R)			      \
-  ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A),	      \
-						   (__v16sf) (B),	      \
-						   (int) (C),		      \
-						   (__v16sf) (__m512) (W),    \
-						   (__mmask16) (U),	      \
-						   (int) (R)))
-
-#define _mm512_maskz_minmax_round_ps(U, A, B, C, R)			      \
-  ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), 	      \
-						   (__v16sf) (B),	      \
-						   (int) (C),		      \
-						   (__v16sf) (__m512)	      \
-						   _mm512_setzero_ps (),      \
-						   (__mmask16) (U),	      \
-						   (int) (R)))
-
-#define _mm512_minmax_pd(A, B, C)					      \
-  ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A),	      \
-						    (__v8df) (B),	      \
-						    (int) (C),		      \
-						    (__v8df) (__m512d)	      \
-						    _mm512_undefined_pd (),   \
-						    (__mmask8) (-1),	      \
-						    _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_mask_minmax_pd(W, U, A, B, C)				      \
-  ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A),	      \
-						    (__v8df) (B),	      \
-						    (int) (C),		      \
-						    (__v8df) (__m512d) (W),   \
-						    (__mmask8) (U),	      \
-						    _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_maskz_minmax_pd(U, A, B, C)				      \
-  ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A),	      \
-						    (__v8df) (B),	      \
-						    (int) (C),		      \
-						    (__v8df) (__m512d)	      \
-						    _mm512_setzero_pd (),     \
-						    (__mmask8) (U),	      \
-						    _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_minmax_ph(A, B, C)					      \
-  ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A),	      \
-						    (__v32hf) (B),	      \
-						    (int) (C),		      \
-						    (__v32hf) (__m512h)	      \
-						    _mm512_undefined_ph (),   \
-						    (__mmask32) (-1),	      \
-						    _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_mask_minmax_ph(W, U, A, B, C)				      \
-  ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A),	      \
-						    (__v32hf) (B),	      \
-						    (int) (C),		      \
-						    (__v32hf) (__m512h) (W),  \
-						    (__mmask32) (U),	      \
-						    _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_maskz_minmax_ph(U, A, B, C)				      \
-  ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A),	      \
-						    (__v32hf) (B),	      \
-						    (int) (C),		      \
-						    (__v32hf) (__m512h)	      \
-						    _mm512_setzero_ph (),     \
-						    (__mmask32) (U),	      \
-						    _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_minmax_ps(A, B, C)					      \
-  ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A),	      \
-						   (__v16sf) (B),	      \
-						   (int) (C),		      \
-						   (__v16sf) (__m512)	      \
-						   _mm512_undefined_ps (),    \
-						   (__mmask16) (-1),	      \
-						   _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_mask_minmax_ps(W, U, A, B, C)				      \
-  ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A),	      \
-						   (__v16sf) (B),	      \
-						   (int) (C),		      \
-						   (__v16sf) (__m512) (W),    \
-						   (__mmask16) (U),	      \
-						   _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_maskz_minmax_ps(U, A, B, C)				      \
-  ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A),	      \
-						   (__v16sf) (B),	      \
-						   (int) (C),		      \
-						   (__v16sf) (__m512)	      \
-						   _mm512_setzero_ps (),      \
-						   (__mmask16) (U),	      \
-						   _MM_FROUND_CUR_DIRECTION))
-
-#endif
-
-#ifdef __DISABLE_AVX10_2__
-#undef __DISABLE_AVX10_2__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX10_2__ */
-
-#endif /* _AVX10_2_512MINMAXINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx10_2-512satcvtintrin.h b/gcc/config/i386/avx10_2-512satcvtintrin.h
deleted file mode 100644
index 215b7fd..0000000
--- a/gcc/config/i386/avx10_2-512satcvtintrin.h
+++ /dev/null
@@ -1,1575 +0,0 @@
-/* Copyright (C) 2024-2025 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if !defined _IMMINTRIN_H_INCLUDED
-#error "Never use <avx10_2-512satcvtintrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef _AVX10_2_512SATCVTINTRIN_H_INCLUDED
-#define _AVX10_2_512SATCVTINTRIN_H_INCLUDED
-
-#if !defined (__AVX10_2__)
-#pragma GCC push_options
-#pragma GCC target("avx10.2")
-#define __DISABLE_AVX10_2__
-#endif /* __AVX10_2__ */
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvts_bf16_epi8 (__m512bh __A)
-{
-  return
-    (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A,
-						  (__v32hi)
-						  _mm512_undefined_si512 (),
-						  (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvts_bf16_epi8 (__m512i __W, __mmask32 __U, __m512bh __A)
-{
-  return (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A,
-						       (__v32hi) __W,
-						       (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvts_bf16_epi8 (__mmask32 __U, __m512bh __A)
-{
-  return
-    (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A,
-						  (__v32hi)
-						  _mm512_setzero_si512 (),
-						  (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvts_bf16_epu8 (__m512bh __A)
-{
-  return
-    (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A,
-						   (__v32hi)
-						   _mm512_undefined_si512 (),
-						   (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvts_bf16_epu8 (__m512i __W, __mmask32 __U, __m512bh __A)
-{
-  return (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A,
-							(__v32hi) __W,
-							(__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvts_bf16_epu8 (__mmask32 __U, __m512bh __A)
-{
-  return
-    (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A,
-						   (__v32hi)
-						   _mm512_setzero_si512 (),
-						   (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtts_bf16_epi8 (__m512bh __A)
-{
-  return
-    (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A,
-						   (__v32hi)
-						   _mm512_undefined_si512 (),
-						   (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtts_bf16_epi8 (__m512i __W, __mmask32 __U, __m512bh __A)
-{
-  return (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A,
-							(__v32hi) __W,
-							(__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvtts_bf16_epi8 (__mmask32 __U, __m512bh __A)
-{
-  return
-    (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A,
-						   (__v32hi)
-						   _mm512_setzero_si512 (),
-						   (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtts_bf16_epu8 (__m512bh __A)
-{
-  return (__m512i)
-    __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A,
-					  (__v32hi) _mm512_undefined_si512 (),
-					  (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtts_bf16_epu8 (__m512i __W, __mmask32 __U, __m512bh __A)
-{
-  return (__m512i) __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A,
-							 (__v32hi) __W,
-							 (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvtts_bf16_epu8 (__mmask32 __U, __m512bh __A)
-{
-  return (__m512i)
-    __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A,
-					  (__v32hi)
-					  _mm512_setzero_si512 (),
-					  (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvts_ph_epi8 (__m512h __A)
-{
-  return
-    (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A,
-						(__v32hi)
-						_mm512_undefined_si512 (),
-						(__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvts_ph_epi8 (__m512i __W, __mmask32 __U, __m512h __A)
-{
-  return (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A,
-						     (__v32hi) __W,
-						     (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvts_ph_epi8 (__mmask32 __U, __m512h __A)
-{
-  return
-    (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A,
-						(__v32hi)
-						_mm512_setzero_si512 (),
-						(__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvts_ph_epu8 (__m512h __A)
-{
-  return
-    (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A,
-						 (__v32hi)
-						 _mm512_undefined_si512 (),
-						 (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvts_ph_epu8 (__m512i __W, __mmask32 __U, __m512h __A)
-{
-  return (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A,
-						      (__v32hi) __W,
-						      (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvts_ph_epu8 (__mmask32 __U, __m512h __A)
-{
-  return
-    (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A,
-						 (__v32hi)
-						 _mm512_setzero_si512 (),
-						 (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvts_ps_epi8 (__m512 __A)
-{
-  return
-    (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A,
-						(__v16si)
-						_mm512_undefined_si512 (),
-						(__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvts_ps_epi8 (__m512i __W, __mmask16 __U, __m512 __A)
-{
-  return (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A,
-						     (__v16si) __W,
-						     (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvts_ps_epi8 (__mmask16 __U, __m512 __A)
-{
-  return
-    (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A,
-						(__v16si)
-						_mm512_setzero_si512 (),
-						(__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvts_ps_epu8 (__m512 __A)
-{
-  return
-    (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A,
-						 (__v16si)
-						 _mm512_undefined_si512 (),
-						 (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvts_ps_epu8 (__m512i __W, __mmask16 __U, __m512 __A)
-{
-  return (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A,
-						      (__v16si) __W,
-						      (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvts_ps_epu8 (__mmask16 __U, __m512 __A)
-{
-  return
-    (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A,
-						 (__v16si)
-						 _mm512_setzero_si512 (),
-						 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtts_ph_epi8 (__m512h __A)
-{
-  return (__m512i)
-    __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A,
-				       (__v32hi)
-				       _mm512_undefined_si512 (),
-				       (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtts_ph_epi8 (__m512i __W, __mmask32 __U, __m512h __A)
-{
-  return (__m512i) __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A,
-						      (__v32hi) __W,
-						      (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvtts_ph_epi8 (__mmask32 __U, __m512h __A)
-{
-  return
-    (__m512i) __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A,
-						 (__v32hi)
-						 _mm512_setzero_si512 (),
-						 (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtts_ph_epu8 (__m512h __A)
-{
-  return (__m512i)
-    __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A,
-					(__v32hi)
-					_mm512_undefined_si512 (),
-					(__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtts_ph_epu8 (__m512i __W, __mmask32 __U, __m512h __A)
-{
-  return (__m512i) __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A,
-						       (__v32hi) __W,
-						       (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvtts_ph_epu8 (__mmask32 __U, __m512h __A)
-{
-  return (__m512i)
-    __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A,
-					(__v32hi)
-					_mm512_setzero_si512 (),
-					(__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtts_ps_epi8 (__m512 __A)
-{
-  return (__m512i)
-    __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A,
-				       (__v16si)
-				       _mm512_undefined_si512 (),
-				       (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtts_ps_epi8 (__m512i __W, __mmask16 __U, __m512 __A)
-{
-  return (__m512i) __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A,
-						      (__v16si) __W,
-						      (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvtts_ps_epi8 (__mmask16 __U, __m512 __A)
-{
-  return (__m512i)
-    __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A,
-				       (__v16si)
-				       _mm512_setzero_si512 (),
-				       (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtts_ps_epu8 (__m512 __A)
-{
-  return (__m512i)
-    __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A,
-					(__v16si)
-					_mm512_undefined_si512 (),
-					(__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtts_ps_epu8 (__m512i __W, __mmask16 __U, __m512 __A)
-{
-  return (__m512i) __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A,
-						       (__v16si) __W,
-						       (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvtts_ps_epu8 (__mmask16 __U, __m512 __A)
-{
-  return (__m512i)
-    __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A,
-					(__v16si)
-					_mm512_setzero_si512 (),
-					(__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_pd_epi32 (__m512d __A)
-{
-  return (__m256i)
-    __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A,
-				       (__v8si)
-				       _mm256_undefined_si256 (),
-				       (__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_pd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
-{
-  return (__m256i) __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A,
-						      (__v8si) __W,
-						      (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_pd_epi32 (__mmask8 __U, __m512d __A)
-{
-  return
-    (__m256i) __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A,
-						 (__v8si)
-						 _mm256_setzero_si256 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_pd_epi64 (__m512d __A)
-{
-  return (__m512i)
-    __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A,
-				       (__v8di)
-				       _mm512_undefined_si512 (),
-				       (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_pd_epi64 (__m512i __W, __mmask8 __U, __m512d __A)
-{
-  return (__m512i) __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A,
-						      (__v8di) __W,
-						      (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_pd_epi64 (__mmask8 __U, __m512d __A)
-{
-  return
-    (__m512i) __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A,
-						 (__v8di)
-						 _mm512_setzero_si512 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_pd_epu32 (__m512d __A)
-{
-  return (__m256i)
-    __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A,
-					(__v8si)
-					_mm256_undefined_si256 (),
-					(__mmask8) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_pd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
-{
-  return (__m256i) __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A,
-						       (__v8si) __W,
-						       (__mmask8) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_pd_epu32 (__mmask8 __U, __m512d __A)
-{
-  return
-    (__m256i) __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A,
-						  (__v8si)
-						  _mm256_setzero_si256 (),
-						  (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_pd_epu64 (__m512d __A)
-{
-  return (__m512i)
-    __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A,
-					(__v8di)
-					_mm512_undefined_si512 (),
-					(__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_pd_epu64 (__m512i __W, __mmask8 __U, __m512d __A)
-{
-  return (__m512i) __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A,
-						       (__v8di) __W,
-						       (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_pd_epu64 (__mmask8 __U, __m512d __A)
-{
-  return (__m512i)
-    __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A,
-					(__v8di)
-					_mm512_setzero_si512 (),
-					(__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_ps_epi32 (__m512 __A)
-{
-  return (__m512i)
-    __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A,
-				       (__v16si)
-				       _mm512_undefined_si512 (),
-				       (__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_ps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
-{
-  return (__m512i) __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A,
-						      (__v16si) __W,
-						      (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_ps_epi32 (__mmask16 __U, __m512 __A)
-{
-  return
-    (__m512i) __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A,
-						 (__v16si)
-						 _mm512_setzero_si512 (),
-						 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_ps_epi64 (__m256 __A)
-{
-  return (__m512i)
-    __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A,
-				       (__v8di)
-				       _mm512_undefined_si512 (),
-				       (__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_ps_epi64 (__m512i __W, __mmask8 __U, __m256 __A)
-{
-  return (__m512i) __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A,
-						      (__v8di) __W,
-						      (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_ps_epi64 (__mmask8 __U, __m256 __A)
-{
-  return
-    (__m512i) __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A,
-						 (__v8di)
-						 _mm512_setzero_si512 (),
-						 (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_ps_epu32 (__m512 __A)
-{
-  return (__m512i)
-    __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A,
-					(__v16si)
-					_mm512_undefined_si512 (),
-					(__mmask16) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_ps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
-{
-  return (__m512i) __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A,
-						       (__v16si) __W,
-						       (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_ps_epu32 (__mmask16 __U, __m512 __A)
-{
-  return (__m512i)
-    __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A,
-					(__v16si)
-					_mm512_setzero_si512 (),
-					(__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_ps_epu64 (__m256 __A)
-{
-  return (__m512i)
-    __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A,
-					(__v8di)
-					_mm512_undefined_si512 (),
-					(__mmask8) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_ps_epu64 (__m512i __W, __mmask8 __U, __m256 __A)
-{
-  return (__m512i) __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A,
-						       (__v8di) __W,
-						       (__mmask8) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_ps_epu64 (__mmask8 __U, __m256 __A)
-{
-  return
-    (__m512i) __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A,
-						  (__v8di)
-						  _mm512_setzero_si512 (),
-						  (__mmask8) __U);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvts_roundph_epi8 (__m512h __A, const int __R)
-{
-  return
-    (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A,
-						      (__v32hi)
-						      _mm512_undefined_si512 (),
-						      (__mmask32) -1,
-						      __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvts_roundph_epi8 (__m512i __W, __mmask32 __U, __m512h __A,
-				 const int __R)
-{
-  return (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A,
-							   (__v32hi) __W,
-							   (__mmask32) __U,
-							   __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvts_roundph_epi8 (__mmask32 __U, __m512h __A, const int __R)
-{
-  return
-    (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A,
-						      (__v32hi)
-						      _mm512_setzero_si512 (),
-						      (__mmask32) __U,
-						      __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvts_roundph_epu8 (__m512h __A, const int __R)
-{
-  return
-    (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A,
-						       (__v32hi)
-						       _mm512_undefined_si512 (),
-						       (__mmask32) -1,
-						       __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvts_roundph_epu8 (__m512i __W, __mmask32 __U, __m512h __A,
-				 const int __R)
-{
-  return (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A,
-							    (__v32hi) __W,
-							    (__mmask32) __U,
-							    __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvts_roundph_epu8 (__mmask32 __U, __m512h __A, const int __R)
-{
-  return
-    (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A,
-						       (__v32hi)
-						       _mm512_setzero_si512 (),
-						       (__mmask32) __U,
-						       __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvts_roundps_epi8 (__m512 __A, const int __R)
-{
-  return
-    (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A,
-						      (__v16si)
-						      _mm512_undefined_si512 (),
-						      (__mmask16) -1,
-						      __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvts_roundps_epi8 (__m512i __W, __mmask16 __U, __m512 __A,
-				 const int __R)
-{
-  return (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A,
-							   (__v16si) __W,
-							   (__mmask16) __U,
-							   __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvts_roundps_epi8 (__mmask16 __U, __m512 __A, const int __R)
-{
-  return
-    (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A,
-						      (__v16si)
-						      _mm512_setzero_si512 (),
-						      (__mmask16) __U,
-						      __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvts_roundps_epu8 (__m512 __A, const int __R)
-{
-  return
-    (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A,
-						       (__v16si)
-						       _mm512_undefined_si512 (),
-						       (__mmask16) -1,
-						       __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvts_roundps_epu8 (__m512i __W, __mmask16 __U, __m512 __A,
-				 const int __R)
-{
-  return (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A,
-							    (__v16si) __W,
-							    (__mmask16) __U,
-							    __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvts_roundps_epu8 (__mmask16 __U, __m512 __A, const int __R)
-{
-  return
-    (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A,
-						       (__v16si)
-						       _mm512_setzero_si512 (),
-						       (__mmask16) __U,
-						       __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtts_roundph_epi8 (__m512h __A, const int __R)
-{
-  return (__m512i)
-    __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A,
-					     (__v32hi)
-					     _mm512_undefined_si512 (),
-					     (__mmask32) -1,
-					     __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtts_roundph_epi8 (__m512i __W, __mmask32 __U, __m512h __A,
-				  const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A,
-							    (__v32hi) __W,
-							    (__mmask32) __U,
-							    __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvtts_roundph_epi8 (__mmask32 __U, __m512h __A, const int __R)
-{
-  return
-    (__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A,
-						       (__v32hi)
-						       _mm512_setzero_si512 (),
-						       (__mmask32) __U,
-						       __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtts_roundph_epu8 (__m512h __A, const int __R)
-{
-  return (__m512i)
-    __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A,
-					      (__v32hi)
-					      _mm512_undefined_si512 (),
-					      (__mmask32) -1,
-					      __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtts_roundph_epu8 (__m512i __W, __mmask32 __U, __m512h __A,
-				  const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A,
-							     (__v32hi) __W,
-							     (__mmask32) __U,
-							     __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvtts_roundph_epu8 (__mmask32 __U, __m512h __A, const int __R)
-{
-  return (__m512i)
-    __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A,
-					      (__v32hi)
-					      _mm512_setzero_si512 (),
-					      (__mmask32) __U,
-					      __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtts_roundps_epi8 (__m512 __A, const int __R)
-{
-  return (__m512i)
-    __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A,
-					     (__v16si)
-					     _mm512_undefined_si512 (),
-					     (__mmask16) -1,
-					     __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtts_roundps_epi8 (__m512i __W, __mmask16 __U, __m512 __A,
-				  const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A,
-							    (__v16si) __W,
-							    (__mmask16) __U,
-							    __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvtts_roundps_epi8 (__mmask16 __U, __m512 __A, const int __R)
-{
-  return (__m512i)
-    __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A,
-					     (__v16si)
-					     _mm512_setzero_si512 (),
-					     (__mmask16) __U,
-					     __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtts_roundps_epu8 (__m512 __A, const int __R)
-{
-  return (__m512i)
-    __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A,
-					      (__v16si)
-					      _mm512_undefined_si512 (),
-					      (__mmask16) -1,
-					      __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtts_roundps_epu8 (__m512i __W, __mmask16 __U, __m512 __A,
-				  const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A,
-							     (__v16si) __W,
-							     (__mmask16) __U,
-							     __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvtts_roundps_epu8 (__mmask16 __U, __m512 __A, const int __R)
-{
-  return (__m512i)
-    __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A,
-					      (__v16si)
-					      _mm512_setzero_si512 (),
-					      (__mmask16) __U,
-					      __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_roundpd_epi32 (__m512d __A, const int __R)
-{
-  return (__m256i)
-    __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A,
-					     (__v8si)
-					     _mm256_undefined_si256 (),
-					     (__mmask8) -1,
-					     __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
-				 const int __R)
-{
-  return (__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A,
-							    (__v8si) __W,
-							    (__mmask8) __U,
-							    __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A,
-						       (__v8si)
-						       _mm256_setzero_si256 (),
-						       (__mmask8) __U,
-						       __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_roundpd_epi64 (__m512d __A, const int __R)
-{
-  return (__m512i)
-    __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A,
-					     (__v8di)
-					     _mm512_undefined_si512 (),
-					     (__mmask8) -1,
-					     __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A,
-				 const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A,
-							    (__v8di) __W,
-							    (__mmask8) __U,
-							    __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_roundpd_epi64 (__mmask8 __U, __m512d __A, const int __R)
-{
-  return
-    (__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A,
-						       (__v8di)
-						       _mm512_setzero_si512 (),
-						       (__mmask8) __U,
-						       __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_roundpd_epu32 (__m512d __A, const int __R)
-{
-  return (__m256i)
-    __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A,
-					      (__v8si)
-					      _mm256_undefined_si256 (),
-					      (__mmask8) -1,
-					      __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
-				 const int __R)
-{
-  return (__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A,
-							     (__v8si) __W,
-							     (__mmask8) __U,
-							     __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A,
-							(__v8si)
-							_mm256_setzero_si256 (),
-							(__mmask8) __U,
-							__R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_roundpd_epu64 (__m512d __A, const int __R)
-{
-  return (__m512i)
-    __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A,
-					      (__v8di)
-					      _mm512_undefined_si512 (),
-					      (__mmask8) -1,
-					      __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A,
-				 const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A,
-							     (__v8di) __W,
-							     (__mmask8) __U,
-							     __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_roundpd_epu64 (__mmask8 __U, __m512d __A, const int __R)
-{
-  return (__m512i)
-    __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A,
-					      (__v8di)
-					      _mm512_setzero_si512 (),
-					      (__mmask8) __U,
-					      __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_roundps_epi32 (__m512 __A, const int __R)
-{
-  return (__m512i)
-    __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A,
-					     (__v16si)
-					     _mm512_undefined_si512 (),
-					     (__mmask16) -1,
-					     __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
-				 const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A,
-							    (__v16si) __W,
-							    (__mmask16) __U,
-							    __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
-{
-  return
-    (__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A,
-						       (__v16si)
-						       _mm512_setzero_si512 (),
-						       (__mmask16) __U,
-						       __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_roundps_epi64 (__m256 __A, const int __R)
-{
-  return (__m512i)
-    __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A,
-					     (__v8di)
-					     _mm512_undefined_si512 (),
-					     (__mmask8) -1,
-					     __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A,
-				 const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A,
-							    (__v8di) __W,
-							    (__mmask8) __U,
-							    __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_roundps_epi64 (__mmask8 __U, __m256 __A, const int __R)
-{
-  return
-    (__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A,
-						       (__v8di)
-						       _mm512_setzero_si512 (),
-						       (__mmask8) __U,
-						       __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_roundps_epu32 (__m512 __A, const int __R)
-{
-  return (__m512i)
-    __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A,
-					      (__v16si)
-					      _mm512_undefined_si512 (),
-					      (__mmask16) -1,
-					      __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
-				 const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A,
-							     (__v16si) __W,
-							     (__mmask16) __U,
-							     __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
-{
-  return (__m512i)
-    __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A,
-					      (__v16si)
-					      _mm512_setzero_si512 (),
-					      (__mmask16) __U,
-					      __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_roundps_epu64 (__m256 __A, const int __R)
-{
-  return (__m512i)
-    __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A,
-					      (__v8di)
-					      _mm512_undefined_si512 (),
-					      (__mmask8) -1,
-					      __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A,
-				 const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A,
-							     (__v8di) __W,
-							     (__mmask8) __U,
-							     __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_roundps_epu64 (__mmask8 __U, __m256 __A, const int __R)
-{
-  return
-    (__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A,
-							(__v8di)
-							_mm512_setzero_si512 (),
-							(__mmask8) __U,
-							__R);
-}
-#else
-#define _mm512_ipcvts_roundph_epi8(A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \
-					   (__v32hi) \
-					   (_mm512_undefined_si512 ()), \
-					   (__mmask32) (-1), \
-					   (R)))
-
-#define _mm512_mask_ipcvts_roundph_epi8(W, U, A, R) \
-  ((__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \
-						     (__v32hi) (W), \
-						     (__mmask32) (U), \
-						     (R)))
-
-#define _mm512_maskz_ipcvts_roundph_epi8(U, A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \
-					   (__v32hi) \
-					   (_mm512_setzero_si512 ()), \
-					   (__mmask32) (U), \
-					   (R)))
-
-#define _mm512_ipcvts_roundph_epu8(A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \
-					    (__v32hi) \
-					    (_mm512_undefined_si512 ()), \
-					    (__mmask32) (-1), \
-					    (R)))
-
-#define _mm512_mask_ipcvts_roundph_epu8(W, U, A, R) \
-  ((__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \
-						      (__v32hi) (W), \
-						      (__mmask32) (U), \
-						      (R)))
-
-#define _mm512_maskz_ipcvts_roundph_epu8(U, A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \
-					    (__v32hi) \
-					    (_mm512_setzero_si512 ()), \
-					    (__mmask32) (U), \
-					    (R)))
-
-#define _mm512_ipcvts_roundps_epi8(A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \
-					   (__v16si) \
-					   (_mm512_undefined_si512 ()), \
-					   (__mmask16) (-1), \
-					   (R)))
-
-#define _mm512_mask_ipcvts_roundps_epi8(W, U, A, R) \
-  ((__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \
-						     (__v16si) (W), \
-						     (__mmask16) (U), \
-						     (R)))
-
-#define _mm512_maskz_ipcvts_roundps_epi8(U, A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \
-					   (__v16si) \
-					   (_mm512_setzero_si512 ()), \
-					   (__mmask16) (U), \
-					   (R)))
-
-#define _mm512_ipcvts_roundps_epu8(A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \
-					    (__v16si) \
-					    (_mm512_undefined_si512 ()), \
-					    (__mmask16) (-1), \
-					    (R)))
-
-#define _mm512_mask_ipcvts_roundps_epu8(W, U, A, R) \
-  ((__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \
-						      (__v16si) (W), \
-						      (__mmask16) (U), \
-						      (R)))
-
-#define _mm512_maskz_ipcvts_roundps_epu8(U, A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \
-					    (__v16si) \
-					    (_mm512_setzero_si512 ()), \
-					    (__mmask16) (U), \
-					    (R)))
-
-#define _mm512_ipcvtts_roundph_epi8(A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \
-					    (__v32hi) \
-					    (_mm512_undefined_si512 ()), \
-					    (__mmask32) (-1), \
-					    (R)))
-
-#define _mm512_mask_ipcvtts_roundph_epi8(W, U, A, R) \
-  ((__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \
-						      (__v32hi) (W), \
-						      (__mmask32) (U), \
-						      (R)))
-
-#define _mm512_maskz_ipcvtts_roundph_epi8(U, A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \
-					    (__v32hi) \
-					    (_mm512_setzero_si512 ()), \
-					    (__mmask32) (U), \
-					    (R)))
-
-#define _mm512_ipcvtts_roundph_epu8(A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \
-					     (__v32hi) \
-					     (_mm512_undefined_si512 ()), \
-					     (__mmask32) (-1), \
-					     (R)))
-
-#define _mm512_mask_ipcvtts_roundph_epu8(W, U, A, R) \
-  ((__m512i) __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \
-						       (__v32hi) (W), \
-						       (__mmask32) (U), \
-						       (R)))
-
-#define _mm512_maskz_ipcvtts_roundph_epu8(U, A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \
-					     (__v32hi) \
-					     (_mm512_setzero_si512 ()), \
-					     (__mmask32) (U), \
-					     (R)))
-
-#define _mm512_ipcvtts_roundps_epi8(A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \
-					    (__v16si) \
-					    (_mm512_undefined_si512 ()), \
-					    (__mmask16) (-1), \
-					    (R)))
-
-#define _mm512_mask_ipcvtts_roundps_epi8(W, U, A, R) \
-  ((__m512i) __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \
-						      (__v16si) (W), \
-						      (__mmask16) (U), \
-						      (R)))
-
-#define _mm512_maskz_ipcvtts_roundps_epi8(U, A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \
-					    (__v16si) \
-					    (_mm512_setzero_si512 ()), \
-					    (__mmask16) (U), \
-					    (R)))
-
-#define _mm512_ipcvtts_roundps_epu8(A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \
-					     (__v16si) \
-					     (_mm512_undefined_si512 ()), \
-					     (__mmask16) (-1), \
-					     (R)))
-
-#define _mm512_mask_ipcvtts_roundps_epu8(W, U, A, R) \
-  ((__m512i) __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \
-						       (__v16si) (W), \
-						       (__mmask16) (U), \
-						       (R)))
-
-#define _mm512_maskz_ipcvtts_roundps_epu8(U, A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \
-					     (__v16si) \
-					     (_mm512_setzero_si512 ()), \
-					     (__mmask16) (U), \
-					     (R)))
-
-#define _mm512_cvtts_roundpd_epi32(A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \
-					    (__v8si) \
-					    (_mm256_undefined_si256 ()), \
-					    (__mmask8) (-1), \
-					    (R)))
-
-#define _mm512_mask_cvtts_roundpd_epi32(W, U, A, R) \
-  ((__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \
-						      (__v8si) (W), \
-						      (__mmask8) (U), \
-						      (R)))
-
-#define _mm512_maskz_cvtts_roundpd_epi32(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \
-					    (__v8si) \
-					    (_mm256_setzero_si256 ()), \
-					    (__mmask8) (U), \
-					    (R)))
-
-#define _mm512_cvtts_roundpd_epi64(A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \
-					    (__v8di) \
-					    (_mm512_undefined_si512 ()), \
-					    (__mmask8) (-1), \
-					    (R)))
-
-#define _mm512_mask_cvtts_roundpd_epi64(W, U, A, R) \
-  ((__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \
-						      (__v8di) (W), \
-						      (__mmask8) (U), \
-						      (R)))
-
-#define _mm512_maskz_cvtts_roundpd_epi64(U, A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \
-					    (__v8di) \
-					    (_mm512_setzero_si512 ()), \
-					    (__mmask8) (U), \
-					    (R)))
-
-#define _mm512_cvtts_roundpd_epu32(A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \
-					     (__v8si) \
-					     (_mm256_undefined_si256 ()), \
-					     (__mmask8) (-1), \
-					     (R)))
-
-#define _mm512_mask_cvtts_roundpd_epu32(W, U, A, R) \
-  ((__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \
-						       (__v8si) (W), \
-						       (__mmask8) (U), \
-						       (R)))
-
-#define _mm512_maskz_cvtts_roundpd_epu32(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \
-					     (__v8si) \
-					     (_mm256_setzero_si256 ()), \
-					     (__mmask8) (U), \
-					     (R)))
-
-#define _mm512_cvtts_roundpd_epu64(A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \
-					     (__v8di) \
-					     (_mm512_undefined_si512 ()), \
-					     (__mmask8) (-1), \
-					     (R)))
-
-#define _mm512_mask_cvtts_roundpd_epu64(W, U, A, R) \
-  ((__m512i) __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \
-						       (__v8di) (W), \
-						       (__mmask8) (U), \
-						       (R)))
-
-#define _mm512_maskz_cvtts_roundpd_epu64(U, A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \
-					     (__v8di) \
-					     (_mm512_setzero_si512 ()), \
-					     (__mmask8) (U), \
-					     (R)))
-
-#define _mm512_cvtts_roundps_epi32(A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \
-					    (__v16si) \
-					    (_mm512_undefined_si512 ()), \
-					    (__mmask16) (-1), \
-					    (R)))
-
-#define _mm512_mask_cvtts_roundps_epi32(W, U, A, R) \
-  ((__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \
-						      (__v16si) (W), \
-						      (__mmask16) (U), \
-						      (R)))
-
-#define _mm512_maskz_cvtts_roundps_epi32(U, A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \
-					    (__v16si) \
-					    (_mm512_setzero_si512 ()), \
-					    (__mmask16) (U), \
-					    (R)))
-
-#define _mm512_cvtts_roundps_epi64(A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \
-					    (__v8di) \
-					    (_mm512_undefined_si512 ()), \
-					    (__mmask8) (-1), \
-					    (R)))
-
-#define _mm512_mask_cvtts_roundps_epi64(W, U, A, R) \
-  ((__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \
-						      (__v8di) (W), \
-						      (__mmask8) (U), \
-						      (R)))
-
-#define _mm512_maskz_cvtts_roundps_epi64(U, A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \
-					    (__v8di) \
-					    (_mm512_setzero_si512 ()), \
-					    (__mmask8) (U), \
-					    (R)))
-
-#define _mm512_cvtts_roundps_epu32(A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \
-					     (__v16si) \
-					     (_mm512_undefined_si512 ()), \
-					     (__mmask16) (-1), \
-					     (R)))
-
-#define _mm512_mask_cvtts_roundps_epu32(W, U, A, R) \
-  ((__m512i) __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \
-						       (__v16si) (W), \
-						       (__mmask16) (U), \
-						       (R)))
-
-#define _mm512_maskz_cvtts_roundps_epu32(U, A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \
-					     (__v16si) \
-					     (_mm512_setzero_si512 ()), \
-					     (__mmask16) (U), \
-					     (R)))
-
-#define _mm512_cvtts_roundps_epu64(A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \
-					     (__v8di) \
-					     (_mm512_undefined_si512 ()), \
-					     (__mmask8) (-1), \
-					     (R)))
-
-#define _mm512_mask_cvtts_roundps_epu64(W, U, A, R) \
-  ((__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \
-						       (__v8di) (W), \
-						       (__mmask8) (U), \
-						       (R)))
-
-#define _mm512_maskz_cvtts_roundps_epu64(U, A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \
-					     (__v8di) \
-					     (_mm512_setzero_si512 ()), \
-					     (__mmask8) (U), \
-					     (R)))
-#endif
-
-#ifdef __DISABLE_AVX10_2__
-#undef __DISABLE_AVX10_2__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX10_2__ */
-
-#endif /* _AVX10_2_512SATCVTINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx10_2bf16intrin.h b/gcc/config/i386/avx10_2bf16intrin.h
index e6890fc..9560480 100644
--- a/gcc/config/i386/avx10_2bf16intrin.h
+++ b/gcc/config/i386/avx10_2bf16intrin.h
@@ -34,6 +34,32 @@
 #define __DISABLE_AVX10_2__
 #endif /* __AVX10_2__ */
 
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_add_pbh (__m512bh __A, __m512bh __B)
+{
+  return (__m512bh) __builtin_ia32_addbf16512 (__A, __B);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_add_pbh (__m512bh __W, __mmask32 __U,
+		     __m512bh __A, __m512bh __B)
+{
+  return (__m512bh)
+    __builtin_ia32_addbf16512_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_add_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
+{
+  return (__m512bh)
+    __builtin_ia32_addbf16512_mask (__A, __B,
+				    (__v32bf) _mm512_setzero_si512 (),
+				    __U);
+}
+
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_add_pbh (__m256bh __A, __m256bh __B)
@@ -86,6 +112,32 @@ _mm_maskz_add_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
 				    __U);
 }
 
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sub_pbh (__m512bh __A, __m512bh __B)
+{
+  return (__m512bh) __builtin_ia32_subbf16512 (__A, __B);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sub_pbh (__m512bh __W, __mmask32 __U,
+		     __m512bh __A, __m512bh __B)
+{
+  return (__m512bh)
+    __builtin_ia32_subbf16512_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sub_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
+{
+  return (__m512bh)
+    __builtin_ia32_subbf16512_mask (__A, __B,
+				    (__v32bf) _mm512_setzero_si512 (),
+				    __U);
+}
+
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_sub_pbh (__m256bh __A, __m256bh __B)
@@ -138,6 +190,32 @@ _mm_maskz_sub_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
 				    __U);
 }
 
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mul_pbh (__m512bh __A, __m512bh __B)
+{
+  return (__m512bh) __builtin_ia32_mulbf16512 (__A, __B);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mul_pbh (__m512bh __W, __mmask32 __U,
+		     __m512bh __A, __m512bh __B)
+{
+  return (__m512bh)
+    __builtin_ia32_mulbf16512_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mul_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
+{
+  return (__m512bh)
+    __builtin_ia32_mulbf16512_mask (__A, __B,
+				    (__v32bf) _mm512_setzero_si512 (),
+				    __U);
+}
+
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mul_pbh (__m256bh __A, __m256bh __B)
@@ -190,6 +268,32 @@ _mm_maskz_mul_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
 				    __U);
 }
 
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_div_pbh (__m512bh __A, __m512bh __B)
+{
+  return (__m512bh) __builtin_ia32_divbf16512 (__A, __B);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_div_pbh (__m512bh __W, __mmask32 __U,
+		     __m512bh __A, __m512bh __B)
+{
+  return (__m512bh)
+    __builtin_ia32_divbf16512_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_div_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
+{
+  return (__m512bh)
+    __builtin_ia32_divbf16512_mask (__A, __B,
+				    (__v32bf) _mm512_setzero_si512 (),
+				    __U);
+}
+
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_div_pbh (__m256bh __A, __m256bh __B)
@@ -242,6 +346,32 @@ _mm_maskz_div_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
 				    __U);
 }
 
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_pbh (__m512bh __A, __m512bh __B)
+{
+  return (__m512bh) __builtin_ia32_maxbf16512 (__A, __B);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_pbh (__m512bh __W, __mmask32 __U,
+		     __m512bh __A, __m512bh __B)
+{
+  return (__m512bh)
+    __builtin_ia32_maxbf16512_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
+{
+  return (__m512bh)
+    __builtin_ia32_maxbf16512_mask (__A, __B,
+				    (__v32bf) _mm512_setzero_si512 (),
+				    __U);
+}
+
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_max_pbh (__m256bh __A, __m256bh __B)
@@ -294,6 +424,32 @@ _mm_maskz_max_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
 				    __U);
 }
 
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_pbh (__m512bh __A, __m512bh __B)
+{
+  return (__m512bh) __builtin_ia32_minbf16512 (__A, __B);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_pbh (__m512bh __W, __mmask32 __U,
+		     __m512bh __A, __m512bh __B)
+{
+  return (__m512bh)
+    __builtin_ia32_minbf16512_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
+{
+  return (__m512bh)
+    __builtin_ia32_minbf16512_mask (__A, __B,
+				    (__v32bf) _mm512_setzero_si512 (),
+				    __U);
+}
+
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_min_pbh (__m256bh __A, __m256bh __B)
@@ -346,6 +502,32 @@ _mm_maskz_min_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
 				    __U);
 }
 
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_scalef_pbh (__m512bh __A, __m512bh __B)
+{
+  return (__m512bh) __builtin_ia32_scalefbf16512 (__A, __B);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_scalef_pbh (__m512bh __W, __mmask32 __U,
+			__m512bh __A, __m512bh __B)
+{
+  return (__m512bh)
+    __builtin_ia32_scalefbf16512_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_scalef_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
+{
+  return (__m512bh)
+    __builtin_ia32_scalefbf16512_mask (__A, __B,
+				       (__v32bf) _mm512_setzero_si512 (),
+				       __U);
+}
+
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_scalef_pbh (__m256bh __A, __m256bh __B)
@@ -398,6 +580,41 @@ _mm_maskz_scalef_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
 				       __U);
 }
 
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmadd_pbh (__m512bh __A, __m512bh __B, __m512bh __C)
+{
+  return (__m512bh)
+    __builtin_ia32_fmaddbf16512_mask (__A, __B, __C, (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmadd_pbh (__m512bh __A, __mmask32 __U,
+		       __m512bh __B, __m512bh __C)
+{
+  return (__m512bh)
+    __builtin_ia32_fmaddbf16512_mask (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmadd_pbh (__m512bh __A, __m512bh __B,
+			__m512bh __C, __mmask32 __U)
+{
+  return (__m512bh)
+    __builtin_ia32_fmaddbf16512_mask3 (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmadd_pbh (__mmask32 __U, __m512bh __A,
+			  __m512bh __B, __m512bh __C)
+{
+  return (__m512bh)
+    __builtin_ia32_fmaddbf16512_maskz (__A, __B, __C, __U);
+}
+
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_fmadd_pbh (__m256bh __A, __m256bh __B, __m256bh __C)
@@ -468,6 +685,41 @@ _mm_maskz_fmadd_pbh (__mmask8 __U, __m128bh __A,
     __builtin_ia32_fmaddbf16128_maskz (__A, __B, __C, __U);
 }
 
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmsub_pbh (__m512bh __A, __m512bh __B, __m512bh __C)
+{
+  return (__m512bh)
+    __builtin_ia32_fmsubbf16512_mask (__A, __B, __C, (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmsub_pbh (__m512bh __A, __mmask32 __U,
+		       __m512bh __B, __m512bh __C)
+{
+  return (__m512bh)
+    __builtin_ia32_fmsubbf16512_mask (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmsub_pbh (__m512bh __A, __m512bh __B,
+			__m512bh __C, __mmask32 __U)
+{
+  return (__m512bh)
+    __builtin_ia32_fmsubbf16512_mask3 (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmsub_pbh (__mmask32 __U, __m512bh __A,
+			__m512bh __B, __m512bh __C)
+{
+  return (__m512bh)
+    __builtin_ia32_fmsubbf16512_maskz (__A, __B, __C, __U);
+}
+
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_fmsub_pbh (__m256bh __A, __m256bh __B, __m256bh __C)
@@ -537,6 +789,41 @@ _mm_maskz_fmsub_pbh (__mmask8 __U, __m128bh __A,
     __builtin_ia32_fmsubbf16128_maskz (__A, __B, __C, __U);
 }
 
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmadd_pbh (__m512bh __A, __m512bh __B, __m512bh __C)
+{
+  return (__m512bh)
+    __builtin_ia32_fnmaddbf16512_mask (__A, __B, __C, (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmadd_pbh (__m512bh __A, __mmask32 __U,
+			__m512bh __B, __m512bh __C)
+{
+  return (__m512bh)
+    __builtin_ia32_fnmaddbf16512_mask (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmadd_pbh (__m512bh __A, __m512bh __B,
+			 __m512bh __C, __mmask32 __U)
+{
+  return (__m512bh)
+    __builtin_ia32_fnmaddbf16512_mask3 (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmadd_pbh (__mmask32 __U, __m512bh __A,
+			 __m512bh __B, __m512bh __C)
+{
+  return (__m512bh)
+    __builtin_ia32_fnmaddbf16512_maskz (__A, __B, __C, __U);
+}
+
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_fnmadd_pbh (__m256bh __A, __m256bh __B, __m256bh __C)
@@ -607,6 +894,41 @@ _mm_maskz_fnmadd_pbh (__mmask8 __U, __m128bh __A,
     __builtin_ia32_fnmaddbf16128_maskz (__A, __B, __C, __U);
 }
 
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmsub_pbh (__m512bh __A, __m512bh __B, __m512bh __C)
+{
+  return (__m512bh)
+    __builtin_ia32_fnmsubbf16512_mask (__A, __B, __C, (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmsub_pbh (__m512bh __A, __mmask32 __U,
+			__m512bh __B, __m512bh __C)
+{
+  return (__m512bh)
+    __builtin_ia32_fnmsubbf16512_mask (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmsub_pbh (__m512bh __A, __m512bh __B,
+			 __m512bh __C, __mmask32 __U)
+{
+  return (__m512bh)
+    __builtin_ia32_fnmsubbf16512_mask3 (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmsub_pbh (__mmask32 __U, __m512bh __A,
+			 __m512bh __B, __m512bh __C)
+{
+  return (__m512bh)
+    __builtin_ia32_fnmsubbf16512_maskz (__A, __B, __C, __U);
+}
+
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_fnmsub_pbh (__m256bh __A, __m256bh __B, __m256bh __C)
@@ -677,6 +999,35 @@ _mm_maskz_fnmsub_pbh (__mmask8 __U, __m128bh __A,
     __builtin_ia32_fnmsubbf16128_maskz (__A, __B, __C, __U);
 }
 
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rsqrt_pbh (__m512bh __A)
+{
+  return (__m512bh)
+    __builtin_ia32_rsqrtbf16512_mask (__A,
+				      (__v32bf) _mm512_setzero_si512 (),
+				      (__mmask32) -1);
+
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rsqrt_pbh (__m512bh __W, __mmask32 __U, __m512bh __A)
+{
+  return (__m512bh)
+    __builtin_ia32_rsqrtbf16512_mask (__A,  __W,  __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rsqrt_pbh (__mmask32 __U, __m512bh __A)
+{
+  return (__m512bh)
+    __builtin_ia32_rsqrtbf16512_mask (__A,
+				      (__v32bf) _mm512_setzero_si512 (),
+				      __U);
+}
+
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_rsqrt_pbh (__m256bh __A)
@@ -733,6 +1084,34 @@ _mm_maskz_rsqrt_pbh (__mmask8 __U, __m128bh __A)
 				      __U);
 }
 
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sqrt_pbh (__m512bh __A)
+{
+  return (__m512bh)
+    __builtin_ia32_sqrtbf16512_mask (__A,
+				     (__v32bf) _mm512_setzero_si512 (),
+				     (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sqrt_pbh (__m512bh __W, __mmask32 __U, __m512bh __A)
+{
+  return (__m512bh)
+    __builtin_ia32_sqrtbf16512_mask (__A,  __W,  __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sqrt_pbh (__mmask32 __U, __m512bh __A)
+{
+  return (__m512bh)
+    __builtin_ia32_sqrtbf16512_mask (__A,
+				     (__v32bf) _mm512_setzero_si512 (),
+				     __U);
+}
+
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_sqrt_pbh (__m256bh __A)
@@ -789,6 +1168,34 @@ _mm_maskz_sqrt_pbh (__mmask8 __U, __m128bh __A)
 				     __U);
 }
 
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rcp_pbh (__m512bh __A)
+{
+  return (__m512bh)
+    __builtin_ia32_rcpbf16512_mask (__A,
+				    (__v32bf) _mm512_setzero_si512 (),
+				    (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rcp_pbh (__m512bh __W, __mmask32 __U, __m512bh __A)
+{
+  return (__m512bh)
+    __builtin_ia32_rcpbf16512_mask (__A,  __W,  __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rcp_pbh (__mmask32 __U, __m512bh __A)
+{
+  return (__m512bh)
+    __builtin_ia32_rcpbf16512_mask (__A,
+				    (__v32bf) _mm512_setzero_si512 (),
+				    __U);
+}
+
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_rcp_pbh (__m256bh __A)
@@ -845,6 +1252,33 @@ _mm_maskz_rcp_pbh (__mmask8 __U, __m128bh __A)
 				    __U);
 }
 
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getexp_pbh (__m512bh __A)
+{
+  return (__m512bh)
+    __builtin_ia32_getexpbf16512_mask (__A,
+				       (__v32bf) _mm512_setzero_si512 (),
+				       (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getexp_pbh (__m512bh __W, __mmask32 __U, __m512bh __A)
+{
+  return (__m512bh) __builtin_ia32_getexpbf16512_mask (__A,  __W,  __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getexp_pbh (__mmask32 __U, __m512bh __A)
+{
+  return (__m512bh)
+    __builtin_ia32_getexpbf16512_mask (__A,
+				       (__v32bf) _mm512_setzero_si512 (),
+				       __U);
+}
+
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_getexp_pbh (__m256bh __A)
@@ -903,6 +1337,34 @@ _mm_maskz_getexp_pbh (__mmask8 __U, __m128bh __A)
 
 /* Intrinsics vrndscalebf16.  */
 #ifdef __OPTIMIZE__
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_roundscale_pbh (__m512bh __A, int B)
+{
+  return (__m512bh)
+    __builtin_ia32_rndscalebf16512_mask (__A, B,
+					 (__v32bf) _mm512_setzero_si512 (),
+					 (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_roundscale_pbh (__m512bh __W, __mmask32 __U, __m512bh __A, int B)
+{
+  return (__m512bh)
+    __builtin_ia32_rndscalebf16512_mask (__A, B, __W,  __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_roundscale_pbh (__mmask32 __U, __m512bh __A, int B)
+{
+  return (__m512bh)
+    __builtin_ia32_rndscalebf16512_mask (__A, B,
+					 (__v32bf) _mm512_setzero_si512 (),
+					 __U);
+}
+
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_roundscale_pbh (__m256bh __A, int B)
@@ -962,6 +1424,19 @@ _mm_maskz_roundscale_pbh (__mmask8 __U, __m128bh __A, int B)
 }
 
 #else
+#define _mm512_roundscale_pbh(A, B)					      \
+  (__builtin_ia32_rndscalebf16512_mask ((A), (B),			      \
+					(__v32bf) _mm512_setzero_si512 (),    \
+					(__mmask32) -1))
+
+#define _mm512_mask_roundscale_pbh(A, B, C, D)	    		      \
+  (__builtin_ia32_rndscalebf16512_mask ((C), (D), (A), (B)))
+
+#define _mm512_maskz_roundscale_pbh(A, B, C)				      \
+  (__builtin_ia32_rndscalebf16512_mask ((B), (C),			      \
+					(__v32bf) _mm512_setzero_si512 (),    \
+					(A)))
+
 #define _mm256_roundscale_pbh(A, B)					      \
   (__builtin_ia32_rndscalebf16256_mask ((A), (B),			      \
 					(__v16bf) _mm256_setzero_si256 (),    \
@@ -992,6 +1467,35 @@ _mm_maskz_roundscale_pbh (__mmask8 __U, __m128bh __A, int B)
 
 /* Intrinsics vreducebf16.  */
 #ifdef __OPTIMIZE__
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_reduce_pbh (__m512bh __A, int B)
+{
+  return (__m512bh)
+    __builtin_ia32_reducebf16512_mask (__A, B,
+				       (__v32bf) _mm512_setzero_si512 (),
+				       (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_reduce_pbh (__m512bh __W, __mmask32 __U,
+			__m512bh __A, int B)
+{
+  return (__m512bh)
+    __builtin_ia32_reducebf16512_mask (__A, B, __W,  __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_reduce_pbh (__mmask32 __U, __m512bh __A, int B)
+{
+  return (__m512bh)
+    __builtin_ia32_reducebf16512_mask (__A, B,
+					  (__v32bf) _mm512_setzero_si512 (),
+					  __U);
+}
+
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_reduce_pbh (__m256bh __A, int B)
@@ -1051,6 +1555,19 @@ _mm_maskz_reduce_pbh (__mmask8 __U, __m128bh __A, int B)
 }
 
 #else
+#define _mm512_reduce_pbh(A, B)					      \
+  (__builtin_ia32_reducebf16512_mask ((A), (B),			      \
+				      (__v32bf) _mm512_setzero_si512 (),   \
+				      (__mmask32) -1))
+
+#define _mm512_mask_reduce_pbh(A, B, C, D)				      \
+  (__builtin_ia32_reducebf16512_mask ((C), (D), (A), (B)))
+
+#define _mm512_maskz_reduce_pbh(A, B, C)				      \
+  (__builtin_ia32_reducebf16512_mask ((B), (C),			      \
+				      (__v32bf) _mm512_setzero_si512 (),      \
+				      (A)))
+
 #define _mm256_reduce_pbh(A, B)					      \
   (__builtin_ia32_reducebf16256_mask ((A), (B),			      \
 				      (__v16bf) _mm256_setzero_si256 (),      \
@@ -1082,6 +1599,40 @@ _mm_maskz_reduce_pbh (__mmask8 __U, __m128bh __A, int B)
 
 /* Intrinsics vgetmantbf16.  */
 #ifdef __OPTIMIZE__
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getmant_pbh (__m512bh __A, _MM_MANTISSA_NORM_ENUM __B,
+		    _MM_MANTISSA_SIGN_ENUM __C)
+{
+  return (__m512bh)
+    __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B,
+					(__v32bf) _mm512_setzero_si512 (),
+					(__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getmant_pbh (__m512bh __W, __mmask32 __U, __m512bh __A,
+			 _MM_MANTISSA_NORM_ENUM __B,
+			 _MM_MANTISSA_SIGN_ENUM __C)
+{
+  return (__m512bh)
+    __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B,
+					__W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getmant_pbh (__mmask32 __U, __m512bh __A,
+			  _MM_MANTISSA_NORM_ENUM __B,
+			  _MM_MANTISSA_SIGN_ENUM __C)
+{
+  return (__m512bh)
+    __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B,
+					(__v32bf) _mm512_setzero_si512 (),
+					__U);
+}
+
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_getmant_pbh (__m256bh __A, _MM_MANTISSA_NORM_ENUM __B,
@@ -1151,6 +1702,19 @@ _mm_maskz_getmant_pbh (__mmask8 __U, __m128bh __A,
 }
 
 #else
+#define _mm512_getmant_pbh(A, B, C)					      \
+  (__builtin_ia32_getmantbf16512_mask ((A), (int)(((C)<<2) | (B)),	      \
+				       (__v32bf) _mm512_setzero_si512 (),     \
+				       (__mmask32) -1))
+
+#define _mm512_mask_getmant_pbh(A, B, C, D, E)				      \
+  (__builtin_ia32_getmantbf16512_mask ((C), (int)(((D)<<2) | (E)), (A), (B)))
+
+#define _mm512_maskz_getmant_pbh(A, B, C, D)				      \
+  (__builtin_ia32_getmantbf16512_mask ((B), (int)(((C)<<2) | (D)),	      \
+				       (__v32bf) _mm512_setzero_si512 (),     \
+					  (A)))
+
 #define _mm256_getmant_pbh(A, B, C)					      \
   (__builtin_ia32_getmantbf16256_mask ((A), (int)(((C)<<2) | (B)),	      \
 				       (__v16bf) _mm256_setzero_si256 (),     \
@@ -1180,6 +1744,24 @@ _mm_maskz_getmant_pbh (__mmask8 __U, __m128bh __A,
 
 /* Intrinsics vfpclassbf16.  */
 #ifdef __OPTIMIZE__
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fpclass_pbh_mask (__mmask32 __U, __m512bh __A,
+			      const int __imm)
+{
+  return (__mmask32)
+    __builtin_ia32_fpclassbf16512_mask (__A, __imm, __U);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fpclass_pbh_mask (__m512bh __A, const int __imm)
+{
+  return (__mmask32)
+    __builtin_ia32_fpclassbf16512_mask (__A, __imm,
+					(__mmask32) -1);
+}
+
 extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_fpclass_pbh_mask (__mmask16 __U, __m256bh __A,
@@ -1214,6 +1796,14 @@ _mm_fpclass_pbh_mask (__m128bh __A, const int __imm)
 }
 
 #else
+#define _mm512_mask_fpclass_pbh_mask(U, X, C)				   \
+  ((__mmask32) __builtin_ia32_fpclassbf16512_mask (			   \
+      (__v32bf) (__m512bh) (X), (int) (C), (__mmask32) (U)))
+
+#define _mm512_fpclass_pbh_mask(X, C)					   \
+  ((__mmask32) __builtin_ia32_fpclassbf16512_mask (			   \
+      (__v32bf) (__m512bh) (X), (int) (C), (__mmask32) (-1)))
+
 #define _mm256_mask_fpclass_pbh_mask(U, A, B)			      \
   ((__mmask16) __builtin_ia32_fpclassbf16256_mask ((A), (B), (U)))
 
@@ -1233,6 +1823,24 @@ _mm_fpclass_pbh_mask (__m128bh __A, const int __imm)
 
 /* Intrinsics vcmpbf16.  */
 #ifdef __OPTIMIZE__
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_pbh_mask (__mmask32 __U, __m512bh __A, __m512bh __B,
+			  const int __imm)
+{
+  return (__mmask32)
+    __builtin_ia32_cmpbf16512_mask (__A, __B, __imm, __U);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_pbh_mask (__m512bh __A, __m512bh __B, const int __imm)
+{
+  return (__mmask32)
+    __builtin_ia32_cmpbf16512_mask (__A, __B, __imm,
+				    (__mmask32) -1);
+}
+
 extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_cmp_pbh_mask (__mmask16 __U, __m256bh __A,
@@ -1268,6 +1876,12 @@ _mm_cmp_pbh_mask (__m128bh __A, __m128bh __B, const int __imm)
 }
 
 #else
+#define _mm512_mask_cmp_pbh_mask(A, B, C, D)				\
+  ((__mmask32) __builtin_ia32_cmpbf16512_mask ((B), (C), (D), (A)))
+
+#define _mm512_cmp_pbh_mask(A, B, C)					\
+  ((__mmask32) __builtin_ia32_cmpbf16512_mask ((A), (B), (C), (-1)))
+
 #define _mm256_mask_cmp_pbh_mask(A, B, C, D)			      \
   ((__mmask16) __builtin_ia32_cmpbf16256_mask ((B), (C), (D), (A)))
 
diff --git a/gcc/config/i386/avx10_2convertintrin.h b/gcc/config/i386/avx10_2convertintrin.h
index 8cbdc66..f2fb98f 100644
--- a/gcc/config/i386/avx10_2convertintrin.h
+++ b/gcc/config/i386/avx10_2convertintrin.h
@@ -98,6 +98,103 @@ _mm256_maskz_cvtx2ps_ph ( __mmask16 __U, __m256 __A, __m256 __B)
 						       (__mmask16) __U);
 }
 
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtx2ps_ph (__m512 __A, __m512 __B)
+{
+  return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
+							     (__v16sf) __B,
+							     (__v32hf)
+							     _mm512_setzero_ph (),
+							     (__mmask32) -1,
+							     _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtx2ps_ph (__m512h __W, __mmask32 __U, __m512 __A,
+			__m512 __B)
+{
+  return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
+							     (__v16sf) __B,
+							     (__v32hf) __W,
+							     (__mmask32) __U,
+							     _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtx2ps_ph (__mmask32 __U, __m512 __A, __m512 __B)
+{
+  return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
+							     (__v16sf) __B,
+							     (__v32hf)
+							     _mm512_setzero_ph (),
+							     (__mmask32) __U,
+							     _MM_FROUND_CUR_DIRECTION);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtx_round2ps_ph (__m512 __A, __m512 __B, const int __R)
+{
+  return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
+							    (__v16sf) __B,
+							    (__v32hf)
+							    _mm512_setzero_ph (),
+							    (__mmask32) -1,
+							    __R);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtx_round2ps_ph (__m512h __W, __mmask32 __U, __m512 __A,
+			      __m512 __B, const int __R)
+{
+  return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
+							    (__v16sf) __B,
+							    (__v32hf) __W,
+							    (__mmask32) __U,
+							    __R);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtx_round2ps_ph (__mmask32 __U, __m512 __A,
+			      __m512 __B, const int __R)
+{
+  return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
+							    (__v16sf) __B,
+							    (__v32hf)
+							    _mm512_setzero_ph (),
+							    (__mmask32) __U,
+							    __R);
+}
+
+#else
+#define _mm512_cvtx_round2ps_ph(A, B, R) \
+  ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \
+						       (__v16sf) (B), \
+						       (__v32hf) \
+						       (_mm512_setzero_ph ()), \
+						       (__mmask32) (-1), \
+						       (R)))
+#define _mm512_mask_cvtx_round2ps_ph(W, U, A, B, R) \
+  ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \
+						       (__v16sf) (B), \
+						       (__v32hf) (W), \
+						       (__mmask32) (U), \
+						       (R)))
+#define _mm512_maskz_cvtx_round2ps_ph(U, A, B, R) \
+  ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \
+						       (__v16sf) (B), \
+						       (__v32hf) \
+						       (_mm512_setzero_ph ()), \
+						       (__mmask32) (U), \
+						       (R)))
+#endif  /* __OPTIMIZE__  */
+
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtbiasph_bf8 (__m128i __A, __m128h __B)
@@ -161,6 +258,39 @@ _mm256_maskz_cvtbiasph_bf8 (__mmask16 __U, __m256i __A, __m256h __B)
 							  (__mmask16) __U);
 }
 
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtbiasph_bf8 (__m512i __A, __m512h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A,
+							  (__v32hf) __B,
+							  (__v32qi)(__m256i)
+							  _mm256_undefined_si256 (),
+							  (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtbiasph_bf8 (__m256i __W, __mmask32 __U,
+			   __m512i __A, __m512h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A,
+							  (__v32hf) __B,
+							  (__v32qi)(__m256i) __W,
+							  (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtbiasph_bf8 (__mmask32 __U, __m512i __A, __m512h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A,
+							  (__v32hf) __B,
+							  (__v32qi)(__m256i)
+							  _mm256_setzero_si256 (),
+							  (__mmask32) __U);
+}
+
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvts_biasph_bf8 (__m128i __A, __m128h __B)
@@ -224,6 +354,39 @@ _mm256_maskz_cvts_biasph_bf8 (__mmask16 __U, __m256i __A, __m256h __B)
 							   (__mmask16) __U);
 }
 
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvts_biasph_bf8 (__m512i __A, __m512h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A,
+							   (__v32hf) __B,
+							   (__v32qi)(__m256i)
+							   _mm256_undefined_si256 (),
+							   (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvts_biasph_bf8 (__m256i __W, __mmask32 __U,
+			     __m512i __A, __m512h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A,
+							   (__v32hf) __B,
+							   (__v32qi)(__m256i) __W,
+							   (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvts_biasph_bf8 (__mmask32 __U, __m512i __A, __m512h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A,
+							   (__v32hf) __B,
+							   (__v32qi)(__m256i)
+							   _mm256_setzero_si256 (),
+							   (__mmask32) __U);
+}
+
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtbiasph_hf8 (__m128i __A, __m128h __B)
@@ -287,6 +450,39 @@ _mm256_maskz_cvtbiasph_hf8 (__mmask16 __U, __m256i __A, __m256h __B)
 							  (__mmask16) __U);
 }
 
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtbiasph_hf8 (__m512i __A, __m512h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A,
+							  (__v32hf) __B,
+							  (__v32qi)(__m256i)
+					 		  _mm256_undefined_si256 (),
+							  (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtbiasph_hf8 (__m256i __W, __mmask32 __U, __m512i __A,
+			   __m512h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A,
+							  (__v32hf) __B,
+							  (__v32qi)(__m256i) __W,
+							  (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtbiasph_hf8 (__mmask32 __U, __m512i __A, __m512h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A,
+							  (__v32hf) __B,
+							  (__v32qi)(__m256i)
+							  _mm256_setzero_si256 (),
+							  (__mmask32) __U);
+}
+
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvts_biasph_hf8 (__m128i __A, __m128h __B)
@@ -350,6 +546,39 @@ _mm256_maskz_cvts_biasph_hf8 (__mmask16 __U, __m256i __A, __m256h __B)
 							   (__mmask16) __U);
 }
 
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvts_biasph_hf8 (__m512i __A, __m512h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A,
+							   (__v32hf) __B,
+							   (__v32qi)(__m256i)
+							   _mm256_undefined_si256 (),
+							   (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvts_biasph_hf8 (__m256i __W, __mmask32 __U,
+			     __m512i __A, __m512h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A,
+							   (__v32hf) __B,
+							   (__v32qi)(__m256i) __W,
+							   (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvts_biasph_hf8 (__mmask32 __U, __m512i __A, __m512h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A,
+							   (__v32hf) __B,
+							   (__v32qi)(__m256i)
+							   _mm256_setzero_si256 (),
+							   (__mmask32) __U);
+}
+
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvt2ph_bf8 (__m128h __A, __m128h __B)
@@ -416,6 +645,39 @@ _mm256_maskz_cvt2ph_bf8 (__mmask32 __U, __m256h __A, __m256h __B)
 						       (__mmask32) __U);
 }
 
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt2ph_bf8 (__m512h __A, __m512h __B)
+{
+  return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A,
+						       (__v32hf) __B,
+						       (__v64qi)
+						       _mm512_setzero_si512 (),
+						       (__mmask64) -1);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt2ph_bf8 (__m512i __W, __mmask64 __U,
+			__m512h __A, __m512h __B)
+{
+  return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A,
+						       (__v32hf) __B,
+						       (__v64qi) __W,
+						       (__mmask64) __U);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt2ph_bf8 (__mmask64 __U,  __m512h __A, __m512h __B)
+{
+  return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A,
+						       (__v32hf) __B,
+						       (__v64qi)
+						       _mm512_setzero_si512 (),
+						       (__mmask64) __U);
+}
+
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvts_2ph_bf8 (__m128h __A, __m128h __B)
@@ -482,6 +744,39 @@ _mm256_maskz_cvts_2ph_bf8 (__mmask32 __U, __m256h __A, __m256h __B)
 							(__mmask32) __U);
 }
 
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvts_2ph_bf8 (__m512h __A, __m512h __B)
+{
+  return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A,
+							(__v32hf) __B,
+							(__v64qi)
+							_mm512_setzero_si512 (),
+							(__mmask64) -1);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvts_2ph_bf8 (__m512i __W, __mmask64 __U,
+			  __m512h __A, __m512h __B)
+{
+  return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A,
+							(__v32hf) __B,
+							(__v64qi) __W,
+							(__mmask64) __U);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvts_2ph_bf8 (__mmask64 __U, __m512h __A, __m512h __B)
+{
+  return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A,
+							(__v32hf) __B,
+							(__v64qi)
+							_mm512_setzero_si512 (),
+							(__mmask64) __U);
+}
+
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvt2ph_hf8 (__m128h __A, __m128h __B)
@@ -548,6 +843,39 @@ _mm256_maskz_cvt2ph_hf8 (__mmask32 __U, __m256h __A, __m256h __B)
 						       (__mmask32) __U);
 }
 
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt2ph_hf8 (__m512h __A, __m512h __B)
+{
+  return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A,
+						       (__v32hf) __B,
+						       (__v64qi)
+						       _mm512_setzero_si512 (),
+						       (__mmask64) -1);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt2ph_hf8 (__m512i __W, __mmask64 __U,
+			__m512h __A, __m512h __B)
+{
+  return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A,
+						       (__v32hf) __B,
+						       (__v64qi) __W,
+						       (__mmask64) __U);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt2ph_hf8 (__mmask64 __U, __m512h __A, __m512h __B)
+{
+  return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A,
+						       (__v32hf) __B,
+						       (__v64qi)
+						       _mm512_setzero_si512 (),
+						       (__mmask64) __U);
+}
+
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvts_2ph_hf8 (__m128h __A, __m128h __B)
@@ -614,6 +942,39 @@ _mm256_maskz_cvts_2ph_hf8 (__mmask32 __U, __m256h __A, __m256h __B)
 							(__mmask32) __U);
 }
 
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvts_2ph_hf8 (__m512h __A, __m512h __B)
+{
+  return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A,
+							(__v32hf) __B,
+							(__v64qi)
+							_mm512_setzero_si512 (),
+							(__mmask64) -1);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvts_2ph_hf8 (__m512i __W, __mmask64 __U,
+			  __m512h __A, __m512h __B)
+{
+  return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A,
+							(__v32hf) __B,
+							(__v64qi) __W,
+							(__mmask64) __U);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvts_2ph_hf8 (__mmask64 __U, __m512h __A, __m512h __B)
+{
+  return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A,
+							(__v32hf) __B,
+							(__v64qi)
+							_mm512_setzero_si512 (),
+							(__mmask64) __U);
+}
+
 extern __inline__ __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvthf8_ph (__m128i __A)
@@ -672,6 +1033,35 @@ _mm256_maskz_cvthf8_ph (__mmask16 __U, __m128i __A)
 						      (__mmask16) __U);
 }
 
+extern __inline__ __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvthf8_ph (__m256i __A)
+{
+  return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A,
+						      (__v32hf) (__m512h)
+						      _mm512_undefined_ph (),
+						      (__mmask32) -1);
+}
+
+extern __inline__ __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvthf8_ph (__m512h __W, __mmask32 __U, __m256i __A)
+{
+  return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A,
+						      (__v32hf) (__m512h) __W,
+						      (__mmask32) __U);
+}
+
+extern __inline__ __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvthf8_ph (__mmask32 __U, __m256i __A)
+{
+  return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A,
+						      (__v32hf) (__m512h)
+						      _mm512_setzero_ph (),
+						      (__mmask32) __U);
+}
+
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtph_bf8 (__m128h __A)
@@ -730,6 +1120,35 @@ _mm256_maskz_cvtph_bf8 (__mmask16 __U, __m256h __A)
 						      (__mmask16) __U);
 }
 
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtph_bf8 (__m512h __A)
+{
+  return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A,
+						      (__v32qi) (__m256i)
+						      _mm256_undefined_si256 (),
+						      (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtph_bf8 (__m256i __W, __mmask32 __U, __m512h __A)
+{
+  return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A,
+						      (__v32qi) (__m256i) __W,
+						      (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtph_bf8 (__mmask32 __U, __m512h __A)
+{
+  return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A,
+						      (__v32qi) (__m256i)
+						      _mm256_setzero_si256 (),
+						      (__mmask32) __U);
+}
+
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvts_ph_bf8 (__m128h __A)
@@ -788,6 +1207,35 @@ _mm256_maskz_cvts_ph_bf8 (__mmask16 __U, __m256h __A)
 						       (__mmask16) __U);
 }
 
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvts_ph_bf8 (__m512h __A)
+{
+  return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A,
+						       (__v32qi) (__m256i)
+						       _mm256_undefined_si256 (),
+						       (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvts_ph_bf8 (__m256i __W, __mmask32 __U, __m512h __A)
+{
+  return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A,
+						       (__v32qi) (__m256i) __W,
+						       (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvts_ph_bf8 (__mmask32 __U, __m512h __A)
+{
+  return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A,
+						       (__v32qi) (__m256i)
+						       _mm256_setzero_si256 (),
+						       (__mmask32) __U);
+}
+
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtph_hf8 (__m128h __A)
@@ -846,6 +1294,35 @@ _mm256_maskz_cvtph_hf8 (__mmask16 __U, __m256h __A)
 						      (__mmask16) __U);
 }
 
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtph_hf8 (__m512h __A)
+{
+  return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A,
+						      (__v32qi) (__m256i)
+						      _mm256_undefined_si256 (),
+						      (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtph_hf8 (__m256i __W, __mmask32 __U, __m512h __A)
+{
+  return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A,
+						      (__v32qi)(__m256i) __W,
+						      (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtph_hf8 (__mmask32 __U, __m512h __A)
+{
+  return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A,
+						      (__v32qi) (__m256i)
+						      _mm256_setzero_si256 (),
+						      (__mmask32) __U);
+}
+
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvts_ph_hf8 (__m128h __A)
@@ -904,6 +1381,35 @@ _mm256_maskz_cvts_ph_hf8 (__mmask16 __U, __m256h __A)
 						       (__mmask16) __U);
 }
 
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvts_ph_hf8 (__m512h __A)
+{
+  return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A,
+						       (__v32qi) (__m256i)
+						       _mm256_undefined_si256 (),
+						       (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvts_ph_hf8 (__m256i __W, __mmask32 __U, __m512h __A)
+{
+  return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A,
+						       (__v32qi) (__m256i) __W,
+						       (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvts_ph_hf8 (__mmask32 __U, __m512h __A)
+{
+  return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A,
+						       (__v32qi) (__m256i)
+						       _mm256_setzero_si256 (),
+						       (__mmask32) __U);
+}
+
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtbf8_ph (__m128i __A)
@@ -952,6 +1458,30 @@ _mm256_maskz_cvtbf8_ph (__mmask16 __U, __m128i __A)
 	 (__m256i) _mm256_maskz_cvtepi8_epi16 (__U, __A), 8));
 }
 
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtbf8_ph (__m256i __A)
+{
+  return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_slli_epi16 (
+	 (__m512i) _mm512_cvtepi8_epi16 (__A), 8));
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtbf8_ph (__m512h __S, __mmask32 __U, __m256i __A)
+{
+  return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_mask_slli_epi16 (
+	 (__m512i) __S, __U, (__m512i) _mm512_cvtepi8_epi16 (__A), 8));
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtbf8_ph (__mmask32 __U, __m256i __A)
+{
+  return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_slli_epi16 (
+	 (__m512i) _mm512_maskz_cvtepi8_epi16 (__U, __A), 8));
+}
+
 #ifdef __DISABLE_AVX10_2__
 #undef __DISABLE_AVX10_2__
 #pragma GCC pop_options
diff --git a/gcc/config/i386/avx10_2mediaintrin.h b/gcc/config/i386/avx10_2mediaintrin.h
index 0993e8e..7d30502 100644
--- a/gcc/config/i386/avx10_2mediaintrin.h
+++ b/gcc/config/i386/avx10_2mediaintrin.h
@@ -394,6 +394,198 @@ _mm256_maskz_dpbuuds_epi32 (__mmask8 __U, __m256i __W,
 					 (__mmask8) __U);
 }
 
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpbssd_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpbssd512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpbssd_epi32 (__m512i __W, __mmask16 __U,
+			  __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpbssd_v16si_mask ((__v16si) __W,
+					(__v16si) __A,
+					(__v16si) __B,
+					(__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpbssd_epi32 (__mmask16 __U, __m512i __W,
+			   __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpbssd_v16si_maskz ((__v16si) __W,
+					 (__v16si) __A,
+					 (__v16si) __B,
+					 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpbssds_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpbssds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpbssds_epi32 (__m512i __W, __mmask16 __U,
+			   __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpbssds_v16si_mask ((__v16si) __W,
+					 (__v16si) __A,
+					 (__v16si) __B,
+					 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpbssds_epi32 (__mmask16 __U, __m512i __W,
+			    __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpbssds_v16si_maskz ((__v16si) __W,
+					  (__v16si) __A,
+					  (__v16si) __B,
+					  (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpbsud_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpbsud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpbsud_epi32 (__m512i __W, __mmask16 __U,
+			  __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpbsud_v16si_mask ((__v16si) __W,
+					(__v16si) __A,
+					(__v16si) __B,
+					(__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpbsud_epi32 (__mmask16 __U, __m512i __W,
+			   __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpbsud_v16si_maskz ((__v16si) __W,
+					 (__v16si) __A,
+					 (__v16si) __B,
+					 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpbsuds_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpbsuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpbsuds_epi32 (__m512i __W, __mmask16 __U,
+			   __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpbsuds_v16si_mask ((__v16si) __W,
+					 (__v16si) __A,
+					 (__v16si) __B,
+					 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpbsuds_epi32 (__mmask16 __U, __m512i __W,
+			    __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpbsuds_v16si_maskz ((__v16si) __W,
+					  (__v16si) __A,
+					  (__v16si) __B,
+					  (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpbuud_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpbuud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpbuud_epi32 (__m512i __W, __mmask16 __U,
+			  __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpbuud_v16si_mask ((__v16si) __W,
+					(__v16si) __A,
+					(__v16si) __B,
+					(__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpbuud_epi32 (__mmask16 __U, __m512i __W,
+			   __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpbuud_v16si_maskz ((__v16si) __W,
+					 (__v16si) __A,
+					 (__v16si) __B,
+					 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpbuuds_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpbuuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpbuuds_epi32 (__m512i __W, __mmask16 __U,
+			   __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpbuuds_v16si_mask ((__v16si) __W,
+					 (__v16si) __A,
+					 (__v16si) __B,
+					 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpbuuds_epi32 (__mmask16 __U, __m512i __W,
+			    __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpbuuds_v16si_maskz ((__v16si) __W,
+					  (__v16si) __A,
+					  (__v16si) __B,
+					  (__mmask16) __U);
+}
+
 extern __inline __m128i
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_dpwsud_epi32 (__m128i __W, __mmask8 __U,
@@ -682,6 +874,233 @@ _mm256_maskz_dpwuuds_epi32 (__mmask8 __U, __m256i __W,
 					 (__mmask8) __U);
 }
 
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpwsud_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpwsud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpwsud_epi32 (__m512i __W, __mmask16 __U,
+			  __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpwsud_v16si_mask ((__v16si) __W,
+					(__v16si) __A,
+					(__v16si) __B,
+					(__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpwsud_epi32 (__mmask16 __U, __m512i __W,
+			   __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpwsud_v16si_maskz ((__v16si) __W,
+					 (__v16si) __A,
+					 (__v16si) __B,
+					 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpwsuds_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpwsuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpwsuds_epi32 (__m512i __W, __mmask16 __U,
+			   __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpwsuds_v16si_mask ((__v16si) __W,
+					 (__v16si) __A,
+					 (__v16si) __B,
+					 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpwsuds_epi32 (__mmask16 __U, __m512i __W,
+			    __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpwsuds_v16si_maskz ((__v16si) __W,
+					  (__v16si) __A,
+					  (__v16si) __B,
+					  (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpwusd_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpwusd512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpwusd_epi32 (__m512i __W, __mmask16 __U,
+			  __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpwusd_v16si_mask ((__v16si) __W,
+					(__v16si) __A,
+					(__v16si) __B,
+					(__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpwusd_epi32 (__mmask16 __U, __m512i __W,
+			   __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpwusd_v16si_maskz ((__v16si) __W,
+					 (__v16si) __A,
+					 (__v16si) __B,
+					 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpwusds_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpwusds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpwusds_epi32 (__m512i __W, __mmask16 __U,
+			   __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpwusds_v16si_mask ((__v16si) __W,
+					 (__v16si) __A,
+					 (__v16si) __B,
+					 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpwusds_epi32 (__mmask16 __U, __m512i __W,
+			    __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpwusds_v16si_maskz ((__v16si) __W,
+					  (__v16si) __A,
+					  (__v16si) __B,
+					  (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpwuud_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpwuud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpwuud_epi32 (__m512i __W, __mmask16 __U,
+			  __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpwuud_v16si_mask ((__v16si) __W,
+					(__v16si) __A,
+					(__v16si) __B,
+					(__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpwuud_epi32 (__mmask16 __U, __m512i __W,
+			   __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpwuud_v16si_maskz ((__v16si) __W,
+					 (__v16si) __A,
+					 (__v16si) __B,
+					 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpwuuds_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpwuuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpwuuds_epi32 (__m512i __W, __mmask16 __U,
+			   __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpwuuds_v16si_mask ((__v16si) __W,
+					 (__v16si) __A,
+					 (__v16si) __B,
+					 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpwuuds_epi32 (__mmask16 __U, __m512i __W,
+			    __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpwuuds_v16si_maskz ((__v16si) __W,
+					  (__v16si) __A,
+					  (__v16si) __B,
+					  (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpph_ps (__m512 __W, __m512h __A, __m512h __B)
+{
+  return (__m512)
+    __builtin_ia32_vdpphps512_mask ((__v16sf) __W,
+				    (__v16sf) __A,
+				    (__v16sf) __B,
+				    (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpph_ps (__m512 __W, __mmask16 __U, __m512h __A,
+		     __m512h __B)
+{
+  return (__m512)
+    __builtin_ia32_vdpphps512_mask ((__v16sf) __W,
+				    (__v16sf) __A,
+				    (__v16sf) __B,
+				    (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpph_ps (__mmask16 __U, __m512 __W, __m512h __A,
+		      __m512h __B)
+{
+  return (__m512)
+    __builtin_ia32_vdpphps512_maskz ((__v16sf) __W,
+				     (__v16sf) __A,
+				     (__v16sf) __B,
+				     (__mmask16) __U);
+}
+
 extern __inline __m256
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_dpph_ps (__m256 __W, __m256h __A, __m256h __B)
@@ -800,6 +1219,39 @@ _mm256_maskz_mpsadbw_epu8 (__mmask16 __U, __m256i __X,
 						   (__v16hi) _mm256_setzero_si256 (),
 						   __U);
 }
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mpsadbw_epu8 (__m512i __X, __m512i __Y, const int __M)
+{
+  return (__m512i) __builtin_ia32_mpsadbw512 ((__v64qi) __X,
+					      (__v64qi) __Y,
+					      __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mpsadbw_epu8 (__m512i __W, __mmask32 __U, __m512i __X,
+			  __m512i __Y, const int __M)
+{
+  return (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi) __X,
+						   (__v64qi) __Y,
+						   __M,
+						   (__v32hi) __W,
+						   __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mpsadbw_epu8 (__mmask32 __U, __m512i __X,
+			   __m512i __Y, const int __M)
+{
+  return (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi) __X,
+						   (__v64qi) __Y,
+						   __M,
+						   (__v32hi) _mm512_setzero_epi32 (),
+						   __U);
+}
 #else
 #define _mm_mask_mpsadbw_epu8(W, U, X, Y, M)				\
   (__m128i) __builtin_ia32_mpsadbw128_mask ((__v16qi)(__m128i)(X),	\
@@ -829,6 +1281,23 @@ _mm256_maskz_mpsadbw_epu8 (__mmask16 __U, __m256i __X,
 					    (__v16hi) _mm256_setzero_si256 (),	\
 					    (__mmask16)(U))
 
+#define _mm512_mpsadbw_epu8(X, Y, M)					\
+  (__m512i) __builtin_ia32_mpsadbw512 ((__v64qi)(__m512i)(X),		\
+				       (__v64qi)(__m512i)(Y), (int)(M))
+
+#define _mm512_mask_mpsadbw_epu8(W, U, X, Y, M)				\
+  (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi)(__m512i)(X),	\
+					    (__v64qi)(__m512i)(Y),	\
+					    (int)(M),			\
+					    (__v32hi)(__m512i)(W),	\
+					    (__mmask32)(U))
+
+#define _mm512_maskz_mpsadbw_epu8(U, X, Y, M)				\
+  (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi)(__m512i)(X),	\
+					    (__v64qi)(__m512i)(Y),	\
+					    (int)(M),			\
+					    (__v32hi) _mm512_setzero_epi32 (),	\
+					    (__mmask32)(U))
 #endif
 
 #ifdef __DISABLE_AVX10_2__
diff --git a/gcc/config/i386/avx10_2minmaxintrin.h b/gcc/config/i386/avx10_2minmaxintrin.h
index 0a4a253..f9fe14e 100644
--- a/gcc/config/i386/avx10_2minmaxintrin.h
+++ b/gcc/config/i386/avx10_2minmaxintrin.h
@@ -103,6 +103,43 @@ _mm256_maskz_minmax_pbh (__mmask16 __U, __m256bh __A,
 						       (__mmask16) __U);
 }
 
+extern __inline __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_pbh (__m512bh __A, __m512bh __B, const int __C)
+{
+  return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A,
+						       (__v32bf) __B,
+						       __C,
+						       (__v32bf)(__m512bh)
+						       _mm512_setzero_si512 (),
+						       (__mmask32) -1);
+}
+
+extern __inline __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_pbh (__m512bh __W, __mmask32 __U,
+			__m512bh __A, __m512bh __B, const int __C)
+{
+  return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A,
+						       (__v32bf) __B,
+						       __C,
+						       (__v32bf) __W,
+						       (__mmask32) __U);
+}
+
+extern __inline __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_pbh (__mmask32 __U, __m512bh __A,
+			 __m512bh __B, const int __C)
+{
+  return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A,
+						       (__v32bf) __B,
+						       __C,
+						       (__v32bf)(__m512bh)
+						       _mm512_setzero_si512 (),
+						       (__mmask32) __U);
+}
+
 extern __inline __m128d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_minmax_pd (__m128d __A, __m128d __B, const int __C)
@@ -169,6 +206,84 @@ _mm256_maskz_minmax_pd (__mmask8 __U, __m256d __A, __m256d __B, const int __C)
 		   (__mmask8) __U);
 }
 
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_pd (__m512d __A, __m512d __B, const int __C)
+{
+  return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
+							  (__v8df) __B,
+							  __C,
+							  (__v8df)
+							  _mm512_undefined_pd (),
+							  (__mmask8) -1,
+							  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_pd (__m512d __W, __mmask8 __U, __m512d __A,
+		       __m512d __B, const int __C)
+{
+  return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
+							  (__v8df) __B,
+							  __C,
+							  (__v8df) __W,
+							  (__mmask8) __U,
+							  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_pd (__mmask8 __U, __m512d __A, __m512d __B,
+			const int __C)
+{
+  return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
+							  (__v8df) __B,
+							  __C,
+							  (__v8df)
+							  _mm512_setzero_pd (),
+							  (__mmask8) __U,
+							  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_round_pd (__m512d __A, __m512d __B, const int __C,
+			const int __R)
+{
+  return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
+							  (__v8df) __B,
+							  __C,
+							  (__v8df)
+							  _mm512_undefined_pd (),
+							  (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
+			     __m512d __B, const int __C, const int __R)
+{
+  return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
+							  (__v8df) __B,
+							  __C,
+							  (__v8df) __W,
+							  (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
+			      const int __C, const int __R)
+{
+  return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
+							  (__v8df) __B,
+							  __C,
+							  (__v8df)
+							  _mm512_setzero_pd (),
+							  (__mmask8) __U, __R);
+}
+
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_minmax_ph (__m128h __A, __m128h __B, const int __C)
@@ -235,6 +350,83 @@ _mm256_maskz_minmax_ph (__mmask16 __U, __m256h __A, __m256h __B, const int __C)
 		  (__mmask16) __U);
 }
 
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_ph (__m512h __A, __m512h __B, const int __C)
+{
+  return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
+							  (__v32hf) __B,
+							  __C,
+							  (__v32hf)
+							  _mm512_undefined_ph (),
+							  (__mmask32) -1,
+							  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_ph (__m512h __W, __mmask32 __U, __m512h __A,
+		       __m512h __B, const int __C)
+{
+  return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
+							  (__v32hf) __B,
+							  __C,
+							  (__v32hf) __W,
+							  (__mmask32) __U,
+							  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_ph (__mmask32 __U, __m512h __A, __m512h __B,
+			const int __C)
+{
+  return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
+							  (__v32hf) __B,
+							  __C,
+							  (__v32hf)
+							  _mm512_setzero_ph (),
+							  (__mmask32) __U,
+							  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_round_ph (__m512h __A, __m512h __B, const int __C, const int __R)
+{
+  return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
+							  (__v32hf) __B,
+							  __C,
+							  (__v32hf)
+							  _mm512_undefined_ph (),
+							  (__mmask32) -1, __R);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_round_ph (__m512h __W, __mmask32 __U, __m512h __A,
+			     __m512h __B, const int __C, const int __R)
+{
+  return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
+							  (__v32hf) __B,
+							  __C,
+							  (__v32hf) __W,
+							  (__mmask32) __U, __R);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
+			      const int __C, const int __R)
+{
+  return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
+							  (__v32hf) __B,
+							  __C,
+							  (__v32hf)
+							  _mm512_setzero_ph (),
+							  (__mmask32) __U, __R);
+}
+
 extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_minmax_ps (__m128 __A, __m128 __B, const int __C)
@@ -301,6 +493,83 @@ _mm256_maskz_minmax_ps (__mmask8 __U, __m256 __A, __m256 __B, const int __C)
 		  (__mmask8) __U);
 }
 
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_ps (__m512 __A, __m512 __B, const int __C)
+{
+  return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
+							 (__v16sf) __B,
+							 __C,
+							 (__v16sf)
+							 _mm512_undefined_ps (),
+							 (__mmask16) -1,
+							 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_ps (__m512 __W, __mmask16 __U, __m512 __A,
+		       __m512 __B, const int __C)
+{
+  return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
+							 (__v16sf) __B,
+							 __C,
+							 (__v16sf) __W,
+							 (__mmask16) __U,
+							 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_ps (__mmask16 __U, __m512 __A, __m512 __B,
+			const int __C)
+{
+  return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
+							 (__v16sf) __B,
+							 __C,
+							 (__v16sf)
+							 _mm512_setzero_ps (),
+							 (__mmask16) __U,
+							 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_round_ps (__m512 __A, __m512 __B, const int __C, const int __R)
+{
+  return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
+							 (__v16sf) __B,
+							 __C,
+							 (__v16sf)
+							 _mm512_undefined_ps (),
+							 (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
+			     __m512 __B, const int __C, const int __R)
+{
+  return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
+							 (__v16sf) __B,
+							 __C,
+							 (__v16sf) __W,
+							 (__mmask16) __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
+			      const int __C, const int __R)
+{
+  return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
+							 (__v16sf) __B,
+							 __C,
+							 (__v16sf)
+							 _mm512_setzero_ps (),
+							 (__mmask16) __U, __R);
+}
+
 extern __inline __m128d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_minmax_sd (__m128d __A, __m128d __B, const int __C)
@@ -580,6 +849,29 @@ _mm_maskz_minmax_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
 						 _mm256_setzero_si256 (),     \
 						 (__mmask16) (U)))
 
+#define _mm512_minmax_pbh(A, B, C)					      \
+  ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A),		      \
+						 (__v32bf) (B),		      \
+						 (int) (C),		      \
+						 (__v32bf) (__m512bh)	      \
+						 _mm512_setzero_si512 (),     \
+						 (__mmask32) (-1)))
+
+#define _mm512_mask_minmax_pbh(W, U, A, B, C)				      \
+  ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A),		      \
+						 (__v32bf) (B), 	      \
+						 (int) (C),		      \
+						 (__v32bf) (__m512bh) (W),    \
+						 (__mmask32) (U)))
+
+#define _mm512_maskz_minmax_pbh(U, A, B, C)				      \
+  ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A),		      \
+						 (__v32bf) (B),		      \
+						 (int) (C),		      \
+						 (__v32bf) (__m512bh)	      \
+						 _mm512_setzero_si512 (),     \
+						 (__mmask32) (U)))
+
 #define _mm_minmax_pd(A, B, C)						      \
   ((__m128d) __builtin_ia32_minmaxpd128_mask ((__v2df) (A),		      \
 					      (__v2df) (B),		      \
@@ -626,6 +918,58 @@ _mm_maskz_minmax_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
 					      _mm256_setzero_pd (),	      \
 					      (__mmask8) (U)))
 
+#define _mm512_minmax_pd(A, B, C)					      \
+  ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A),	      \
+						    (__v8df) (B),	      \
+						    (int) (C),		      \
+						    (__v8df) (__m512d)	      \
+						    _mm512_undefined_pd (),   \
+						    (__mmask8) (-1),	      \
+						    _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_minmax_pd(W, U, A, B, C)				      \
+  ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A),	      \
+						    (__v8df) (B),	      \
+						    (int) (C),		      \
+						    (__v8df) (__m512d) (W),   \
+						    (__mmask8) (U),	      \
+						    _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_minmax_pd(U, A, B, C)				      \
+  ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A),	      \
+						    (__v8df) (B),	      \
+						    (int) (C),		      \
+						    (__v8df) (__m512d)	      \
+						    _mm512_setzero_pd (),     \
+						    (__mmask8) (U),	      \
+						    _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_minmax_round_pd(A, B, C, R)				      \
+  ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A),	      \
+						    (__v8df) (B),	      \
+						    (int) (C),		      \
+						    (__v8df) (__m512d)	      \
+						    _mm512_undefined_pd (),   \
+						    (__mmask8) (-1),	      \
+						    (int) (R)))
+
+#define _mm512_mask_minmax_round_pd(W, U, A, B, C, R)			      \
+  ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A),	      \
+						    (__v8df) (B),	      \
+						    (int) (C),		      \
+						    (__v8df) (__m512d) (W),   \
+						    (__mmask8) (U),	      \
+						    (int) (R)))
+
+#define _mm512_maskz_minmax_round_pd(U, A, B, C, R)			      \
+  ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A),	      \
+						    (__v8df) (B),	      \
+						    (int) (C),		      \
+						    (__v8df) (__m512d)	      \
+						    _mm512_setzero_pd (),     \
+						    (__mmask8) (U), 	      \
+						    (int) (R)))
+
 #define _mm_minmax_ph(A, B, C)						      \
   ((__m128h) __builtin_ia32_minmaxph128_mask ((__v8hf) (A),		      \
 					      (__v8hf) (B),		      \
@@ -672,6 +1016,58 @@ _mm_maskz_minmax_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
 					      _mm256_setzero_ph (),	      \
 					      (__mmask16) (U)))
 
+#define _mm512_minmax_ph(A, B, C)					      \
+  ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A),	      \
+						    (__v32hf) (B),	      \
+						    (int) (C),		      \
+						    (__v32hf) (__m512h)	      \
+						    _mm512_undefined_ph (),   \
+						    (__mmask32) (-1),	      \
+						    _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_minmax_ph(W, U, A, B, C)				      \
+  ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A),	      \
+						    (__v32hf) (B),	      \
+						    (int) (C),		      \
+						    (__v32hf) (__m512h) (W),  \
+						    (__mmask32) (U),	      \
+						    _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_minmax_ph(U, A, B, C)				      \
+  ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A),	      \
+						    (__v32hf) (B),	      \
+						    (int) (C),		      \
+						    (__v32hf) (__m512h)	      \
+						    _mm512_setzero_ph (),     \
+						    (__mmask32) (U),	      \
+						    _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_minmax_round_ph(A, B, C, R)				      \
+  ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A),	      \
+						    (__v32hf) (B),	      \
+						    (int) (C),		      \
+						    (__v32hf) (__m512h)	      \
+						    _mm512_undefined_ph (),   \
+						    (__mmask32) (-1),	      \
+						    (int) (R)))
+
+#define _mm512_mask_minmax_round_ph(W, U, A, B, C, R)			      \
+  ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A),	      \
+						    (__v32hf) (B),	      \
+						    (int) (C),		      \
+						    (__v32hf) (__m512h) (W),  \
+						    (__mmask32) (U),	      \
+						    (int) (R)))
+
+#define _mm512_maskz_minmax_round_ph(U, A, B, C, R)			      \
+  ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A),	      \
+						    (__v32hf) (B),	      \
+						    (int) (C),		      \
+						    (__v32hf) (__m512h)	      \
+						    _mm512_setzero_ph (),     \
+						    (__mmask32) (U),	      \
+						    (int) (R)))
+
 #define _mm_minmax_ps(A, B, C)						      \
   ((__m128) __builtin_ia32_minmaxps128_mask ((__v4sf) (A),		      \
 					     (__v4sf) (B),		      \
@@ -718,6 +1114,58 @@ _mm_maskz_minmax_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
 					     _mm256_setzero_ps (),	      \
 					     (__mmask8) (U)))
 
+#define _mm512_minmax_ps(A, B, C)					      \
+  ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A),	      \
+						   (__v16sf) (B),	      \
+						   (int) (C),		      \
+						   (__v16sf) (__m512)	      \
+						   _mm512_undefined_ps (),    \
+						   (__mmask16) (-1),	      \
+						   _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_minmax_ps(W, U, A, B, C)				      \
+  ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A),	      \
+						   (__v16sf) (B),	      \
+						   (int) (C),		      \
+						   (__v16sf) (__m512) (W),    \
+						   (__mmask16) (U),	      \
+						   _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_minmax_ps(U, A, B, C)				      \
+  ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A),	      \
+						   (__v16sf) (B),	      \
+						   (int) (C),		      \
+						   (__v16sf) (__m512)	      \
+						   _mm512_setzero_ps (),      \
+						   (__mmask16) (U),	      \
+						   _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_minmax_round_ps(A, B, C, R)				      \
+  ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A),	      \
+						   (__v16sf) (B),	      \
+						   (int) (C),		      \
+						   (__v16sf) (__m512)	      \
+						   _mm512_undefined_ps (),    \
+						   (__mmask16) (-1),	      \
+						   (int) (R)))
+
+#define _mm512_mask_minmax_round_ps(W, U, A, B, C, R)			      \
+  ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A),	      \
+						   (__v16sf) (B),	      \
+						   (int) (C),		      \
+						   (__v16sf) (__m512) (W),    \
+						   (__mmask16) (U),	      \
+						   (int) (R)))
+
+#define _mm512_maskz_minmax_round_ps(U, A, B, C, R)			      \
+  ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), 	      \
+						   (__v16sf) (B),	      \
+						   (int) (C),		      \
+						   (__v16sf) (__m512)	      \
+						   _mm512_setzero_ps (),      \
+						   (__mmask16) (U),	      \
+						   (int) (R)))
+
 #define _mm_minmax_round_sd(A, B, C, R)					      \
   ((__m128d) __builtin_ia32_minmaxsd_mask_round ((__v2df) (A),		      \
 						 (__v2df) (B),		      \
diff --git a/gcc/config/i386/avx10_2satcvtintrin.h b/gcc/config/i386/avx10_2satcvtintrin.h
index 78bcd72..c4fa19b 100644
--- a/gcc/config/i386/avx10_2satcvtintrin.h
+++ b/gcc/config/i386/avx10_2satcvtintrin.h
@@ -63,37 +63,6 @@ _mm_maskz_ipcvts_bf16_epi8 (__mmask8 __U, __m128bh __A)
 						       (__mmask8) __U);
 }
 
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_ipcvts_bf16_epi8 (__m256bh __A)
-{
-  return
-    (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A,
-						  (__v16hi)
-						  _mm256_undefined_si256 (),
-						  (__mmask16) -1);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_ipcvts_bf16_epi8 (__m256i __W, __mmask16 __U, __m256bh __A)
-{
-  return (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A,
-						       (__v16hi) __W,
-						       (__mmask16) __U);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_ipcvts_bf16_epi8 (__mmask16 __U, __m256bh __A)
-{
-  return
-    (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A,
-						  (__v16hi)
-						  _mm256_setzero_si256 (),
-						  (__mmask16) __U);
-}
-
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_ipcvts_bf16_epu8 (__m128bh __A)
@@ -127,6 +96,37 @@ _mm_maskz_ipcvts_bf16_epu8 (__mmask8 __U, __m128bh __A)
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_ipcvts_bf16_epi8 (__m256bh __A)
+{
+  return
+    (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A,
+						  (__v16hi)
+						  _mm256_undefined_si256 (),
+						  (__mmask16) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_ipcvts_bf16_epi8 (__m256i __W, __mmask16 __U, __m256bh __A)
+{
+  return (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A,
+						       (__v16hi) __W,
+						       (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_ipcvts_bf16_epi8 (__mmask16 __U, __m256bh __A)
+{
+  return
+    (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A,
+						  (__v16hi)
+						  _mm256_setzero_si256 (),
+						  (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_ipcvts_bf16_epu8 (__m256bh __A)
 {
   return
@@ -156,120 +156,66 @@ _mm256_maskz_ipcvts_bf16_epu8 (__mmask16 __U, __m256bh __A)
 						   (__mmask16) __U);
 }
 
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ipcvts_ph_epi8 (__m128h __A)
-{
-  return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A,
-						     (__v8hi)
-						     _mm_undefined_si128 (),
-						     (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_ipcvts_ph_epi8 (__m128i __W, __mmask8 __U, __m128h __A)
-{
-  return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A,
-						     (__v8hi) __W,
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_ipcvts_ph_epi8 (__mmask8 __U, __m128h __A)
-{
-  return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A,
-						     (__v8hi)
-						     _mm_setzero_si128 (),
-						     (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ipcvts_ph_epu8 (__m128h __A)
-{
-  return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A,
-						      (__v8hi)
-						      _mm_undefined_si128 (),
-						      (__mmask8) -1);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_ipcvts_ph_epu8 (__m128i __W, __mmask8 __U, __m128h __A)
-{
-  return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A,
-						      (__v8hi) __W,
-						      (__mmask8) __U);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_ipcvts_ph_epu8 (__mmask8 __U, __m128h __A)
-{
-  return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A,
-						      (__v8hi)
-						      _mm_setzero_si128 (),
-						      (__mmask8) __U);
-}
-
-extern __inline __m128i
+extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ipcvts_ps_epi8 (__m128 __A)
+_mm512_ipcvts_bf16_epi8 (__m512bh __A)
 {
-  return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A,
-						     (__v4si)
-						     _mm_undefined_si128 (),
-						     (__mmask8) -1);
+  return
+    (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A,
+						  (__v32hi)
+						  _mm512_undefined_si512 (),
+						  (__mmask32) -1);
 }
 
-extern __inline __m128i
+extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_ipcvts_ps_epi8 (__m128i __W, __mmask8 __U, __m128 __A)
+_mm512_mask_ipcvts_bf16_epi8 (__m512i __W, __mmask32 __U, __m512bh __A)
 {
-  return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A,
-						     (__v4si) __W,
-						     (__mmask8) __U);
+  return (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A,
+						       (__v32hi) __W,
+						       (__mmask32) __U);
 }
 
-extern __inline __m128i
+extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_ipcvts_ps_epi8 (__mmask8 __U, __m128 __A)
+_mm512_maskz_ipcvts_bf16_epi8 (__mmask32 __U, __m512bh __A)
 {
-  return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A,
-						     (__v4si)
-						     _mm_setzero_si128 (),
-						     (__mmask8) __U);
+  return
+    (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A,
+						  (__v32hi)
+						  _mm512_setzero_si512 (),
+						  (__mmask32) __U);
 }
 
-extern __inline __m128i
+extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ipcvts_ps_epu8 (__m128 __A)
+_mm512_ipcvts_bf16_epu8 (__m512bh __A)
 {
-  return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A,
-						      (__v4si)
-						      _mm_undefined_si128 (),
-						      (__mmask8) -1);
+  return
+    (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A,
+						   (__v32hi)
+						   _mm512_undefined_si512 (),
+						   (__mmask32) -1);
 }
 
-extern __inline __m128i
+extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_ipcvts_ps_epu8 (__m128i __W, __mmask8 __U, __m128 __A)
+_mm512_mask_ipcvts_bf16_epu8 (__m512i __W, __mmask32 __U, __m512bh __A)
 {
-  return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A,
-						      (__v4si) __W,
-						      (__mmask8) __U);
+  return (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A,
+							(__v32hi) __W,
+							(__mmask32) __U);
 }
 
-extern __inline __m128i
+extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_ipcvts_ps_epu8 (__mmask8 __U, __m128 __A)
+_mm512_maskz_ipcvts_bf16_epu8 (__mmask32 __U, __m512bh __A)
 {
-  return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A,
-						      (__v4si)
-						      _mm_setzero_si128 (),
-						      (__mmask8) __U);
+  return
+    (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A,
+						   (__v32hi)
+						   _mm512_setzero_si512 (),
+						   (__mmask32) __U);
 }
 
 extern __inline __m128i
@@ -390,6 +336,183 @@ _mm256_maskz_ipcvtts_bf16_epu8 (__mmask16 __U, __m256bh __A)
 					  (__mmask16) __U);
 }
 
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_bf16_epi8 (__m512bh __A)
+{
+  return
+    (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A,
+						   (__v32hi)
+						   _mm512_undefined_si512 (),
+						   (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_bf16_epi8 (__m512i __W, __mmask32 __U, __m512bh __A)
+{
+  return (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A,
+							(__v32hi) __W,
+							(__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_bf16_epi8 (__mmask32 __U, __m512bh __A)
+{
+  return
+    (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A,
+						   (__v32hi)
+						   _mm512_setzero_si512 (),
+						   (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_bf16_epu8 (__m512bh __A)
+{
+  return (__m512i)
+    __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A,
+					  (__v32hi) _mm512_undefined_si512 (),
+					  (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_bf16_epu8 (__m512i __W, __mmask32 __U, __m512bh __A)
+{
+  return (__m512i) __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A,
+							 (__v32hi) __W,
+							 (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_bf16_epu8 (__mmask32 __U, __m512bh __A)
+{
+  return (__m512i)
+    __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A,
+					  (__v32hi)
+					  _mm512_setzero_si512 (),
+					  (__mmask32) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ipcvts_ph_epi8 (__m128h __A)
+{
+  return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A,
+						     (__v8hi)
+						     _mm_undefined_si128 (),
+						     (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_ipcvts_ph_epi8 (__m128i __W, __mmask8 __U, __m128h __A)
+{
+  return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A,
+						     (__v8hi) __W,
+						     (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_ipcvts_ph_epi8 (__mmask8 __U, __m128h __A)
+{
+  return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A,
+						     (__v8hi)
+						     _mm_setzero_si128 (),
+						     (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ipcvts_ph_epu8 (__m128h __A)
+{
+  return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A,
+						      (__v8hi)
+						      _mm_undefined_si128 (),
+						      (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_ipcvts_ph_epu8 (__m128i __W, __mmask8 __U, __m128h __A)
+{
+  return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A,
+						      (__v8hi) __W,
+						      (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_ipcvts_ph_epu8 (__mmask8 __U, __m128h __A)
+{
+  return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A,
+						      (__v8hi)
+						      _mm_setzero_si128 (),
+						      (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ipcvts_ps_epi8 (__m128 __A)
+{
+  return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A,
+						     (__v4si)
+						     _mm_undefined_si128 (),
+						     (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_ipcvts_ps_epi8 (__m128i __W, __mmask8 __U, __m128 __A)
+{
+  return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A,
+						     (__v4si) __W,
+						     (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_ipcvts_ps_epi8 (__mmask8 __U, __m128 __A)
+{
+  return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A,
+						     (__v4si)
+						     _mm_setzero_si128 (),
+						     (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ipcvts_ps_epu8 (__m128 __A)
+{
+  return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A,
+						      (__v4si)
+						      _mm_undefined_si128 (),
+						      (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_ipcvts_ps_epu8 (__m128i __W, __mmask8 __U, __m128 __A)
+{
+  return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A,
+						      (__v4si) __W,
+						      (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_ipcvts_ps_epu8 (__mmask8 __U, __m128 __A)
+{
+  return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A,
+						      (__v4si)
+						      _mm_setzero_si128 (),
+						      (__mmask8) __U);
+}
+
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_ipcvtts_ph_epi8 (__m128h __A)
@@ -1234,6 +1357,1416 @@ _mm256_maskz_cvtts_ps_epu64 (__mmask8 __U, __m128 __A)
 						  (__mmask8) __U);
 }
 
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvts_ph_epi8 (__m512h __A)
+{
+  return
+    (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A,
+						(__v32hi)
+						_mm512_undefined_si512 (),
+						(__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvts_ph_epi8 (__m512i __W, __mmask32 __U, __m512h __A)
+{
+  return (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A,
+						     (__v32hi) __W,
+						     (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvts_ph_epi8 (__mmask32 __U, __m512h __A)
+{
+  return
+    (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A,
+						(__v32hi)
+						_mm512_setzero_si512 (),
+						(__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvts_ph_epu8 (__m512h __A)
+{
+  return
+    (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A,
+						 (__v32hi)
+						 _mm512_undefined_si512 (),
+						 (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvts_ph_epu8 (__m512i __W, __mmask32 __U, __m512h __A)
+{
+  return (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A,
+						      (__v32hi) __W,
+						      (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvts_ph_epu8 (__mmask32 __U, __m512h __A)
+{
+  return
+    (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A,
+						 (__v32hi)
+						 _mm512_setzero_si512 (),
+						 (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvts_ps_epi8 (__m512 __A)
+{
+  return
+    (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A,
+						(__v16si)
+						_mm512_undefined_si512 (),
+						(__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvts_ps_epi8 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A,
+						     (__v16si) __W,
+						     (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvts_ps_epi8 (__mmask16 __U, __m512 __A)
+{
+  return
+    (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A,
+						(__v16si)
+						_mm512_setzero_si512 (),
+						(__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvts_ps_epu8 (__m512 __A)
+{
+  return
+    (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A,
+						 (__v16si)
+						 _mm512_undefined_si512 (),
+						 (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvts_ps_epu8 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A,
+						      (__v16si) __W,
+						      (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvts_ps_epu8 (__mmask16 __U, __m512 __A)
+{
+  return
+    (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A,
+						 (__v16si)
+						 _mm512_setzero_si512 (),
+						 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_ph_epi8 (__m512h __A)
+{
+  return (__m512i)
+    __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A,
+				       (__v32hi)
+				       _mm512_undefined_si512 (),
+				       (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_ph_epi8 (__m512i __W, __mmask32 __U, __m512h __A)
+{
+  return (__m512i) __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A,
+						      (__v32hi) __W,
+						      (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_ph_epi8 (__mmask32 __U, __m512h __A)
+{
+  return
+    (__m512i) __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A,
+						 (__v32hi)
+						 _mm512_setzero_si512 (),
+						 (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_ph_epu8 (__m512h __A)
+{
+  return (__m512i)
+    __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A,
+					(__v32hi)
+					_mm512_undefined_si512 (),
+					(__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_ph_epu8 (__m512i __W, __mmask32 __U, __m512h __A)
+{
+  return (__m512i) __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A,
+						       (__v32hi) __W,
+						       (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_ph_epu8 (__mmask32 __U, __m512h __A)
+{
+  return (__m512i)
+    __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A,
+					(__v32hi)
+					_mm512_setzero_si512 (),
+					(__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_ps_epi8 (__m512 __A)
+{
+  return (__m512i)
+    __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A,
+				       (__v16si)
+				       _mm512_undefined_si512 (),
+				       (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_ps_epi8 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512i) __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A,
+						      (__v16si) __W,
+						      (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_ps_epi8 (__mmask16 __U, __m512 __A)
+{
+  return (__m512i)
+    __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A,
+				       (__v16si)
+				       _mm512_setzero_si512 (),
+				       (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_ps_epu8 (__m512 __A)
+{
+  return (__m512i)
+    __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A,
+					(__v16si)
+					_mm512_undefined_si512 (),
+					(__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_ps_epu8 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512i) __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A,
+						       (__v16si) __W,
+						       (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_ps_epu8 (__mmask16 __U, __m512 __A)
+{
+  return (__m512i)
+    __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A,
+					(__v16si)
+					_mm512_setzero_si512 (),
+					(__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_pd_epi32 (__m512d __A)
+{
+  return (__m256i)
+    __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A,
+				       (__v8si)
+				       _mm256_undefined_si256 (),
+				       (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_pd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
+{
+  return (__m256i) __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A,
+						      (__v8si) __W,
+						      (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_pd_epi32 (__mmask8 __U, __m512d __A)
+{
+  return
+    (__m256i) __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A,
+						 (__v8si)
+						 _mm256_setzero_si256 (),
+						 (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_pd_epi64 (__m512d __A)
+{
+  return (__m512i)
+    __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A,
+				       (__v8di)
+				       _mm512_undefined_si512 (),
+				       (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_pd_epi64 (__m512i __W, __mmask8 __U, __m512d __A)
+{
+  return (__m512i) __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A,
+						      (__v8di) __W,
+						      (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_pd_epi64 (__mmask8 __U, __m512d __A)
+{
+  return
+    (__m512i) __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A,
+						 (__v8di)
+						 _mm512_setzero_si512 (),
+						 (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_pd_epu32 (__m512d __A)
+{
+  return (__m256i)
+    __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A,
+					(__v8si)
+					_mm256_undefined_si256 (),
+					(__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_pd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
+{
+  return (__m256i) __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A,
+						       (__v8si) __W,
+						       (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_pd_epu32 (__mmask8 __U, __m512d __A)
+{
+  return
+    (__m256i) __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A,
+						  (__v8si)
+						  _mm256_setzero_si256 (),
+						  (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_pd_epu64 (__m512d __A)
+{
+  return (__m512i)
+    __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A,
+					(__v8di)
+					_mm512_undefined_si512 (),
+					(__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_pd_epu64 (__m512i __W, __mmask8 __U, __m512d __A)
+{
+  return (__m512i) __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A,
+						       (__v8di) __W,
+						       (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_pd_epu64 (__mmask8 __U, __m512d __A)
+{
+  return (__m512i)
+    __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A,
+					(__v8di)
+					_mm512_setzero_si512 (),
+					(__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_ps_epi32 (__m512 __A)
+{
+  return (__m512i)
+    __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A,
+				       (__v16si)
+				       _mm512_undefined_si512 (),
+				       (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_ps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512i) __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A,
+						      (__v16si) __W,
+						      (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_ps_epi32 (__mmask16 __U, __m512 __A)
+{
+  return
+    (__m512i) __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A,
+						 (__v16si)
+						 _mm512_setzero_si512 (),
+						 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_ps_epi64 (__m256 __A)
+{
+  return (__m512i)
+    __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A,
+				       (__v8di)
+				       _mm512_undefined_si512 (),
+				       (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_ps_epi64 (__m512i __W, __mmask8 __U, __m256 __A)
+{
+  return (__m512i) __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A,
+						      (__v8di) __W,
+						      (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_ps_epi64 (__mmask8 __U, __m256 __A)
+{
+  return
+    (__m512i) __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A,
+						 (__v8di)
+						 _mm512_setzero_si512 (),
+						 (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_ps_epu32 (__m512 __A)
+{
+  return (__m512i)
+    __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A,
+					(__v16si)
+					_mm512_undefined_si512 (),
+					(__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_ps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512i) __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A,
+						       (__v16si) __W,
+						       (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_ps_epu32 (__mmask16 __U, __m512 __A)
+{
+  return (__m512i)
+    __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A,
+					(__v16si)
+					_mm512_setzero_si512 (),
+					(__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_ps_epu64 (__m256 __A)
+{
+  return (__m512i)
+    __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A,
+					(__v8di)
+					_mm512_undefined_si512 (),
+					(__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_ps_epu64 (__m512i __W, __mmask8 __U, __m256 __A)
+{
+  return (__m512i) __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A,
+						       (__v8di) __W,
+						       (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_ps_epu64 (__mmask8 __U, __m256 __A)
+{
+  return
+    (__m512i) __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A,
+						  (__v8di)
+						  _mm512_setzero_si512 (),
+						  (__mmask8) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvts_roundph_epi8 (__m512h __A, const int __R)
+{
+  return
+    (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A,
+						      (__v32hi)
+						      _mm512_undefined_si512 (),
+						      (__mmask32) -1,
+						      __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvts_roundph_epi8 (__m512i __W, __mmask32 __U, __m512h __A,
+				 const int __R)
+{
+  return (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A,
+							   (__v32hi) __W,
+							   (__mmask32) __U,
+							   __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvts_roundph_epi8 (__mmask32 __U, __m512h __A, const int __R)
+{
+  return
+    (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A,
+						      (__v32hi)
+						      _mm512_setzero_si512 (),
+						      (__mmask32) __U,
+						      __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvts_roundph_epu8 (__m512h __A, const int __R)
+{
+  return
+    (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A,
+						       (__v32hi)
+						       _mm512_undefined_si512 (),
+						       (__mmask32) -1,
+						       __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvts_roundph_epu8 (__m512i __W, __mmask32 __U, __m512h __A,
+				 const int __R)
+{
+  return (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A,
+							    (__v32hi) __W,
+							    (__mmask32) __U,
+							    __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvts_roundph_epu8 (__mmask32 __U, __m512h __A, const int __R)
+{
+  return
+    (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A,
+						       (__v32hi)
+						       _mm512_setzero_si512 (),
+						       (__mmask32) __U,
+						       __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvts_roundps_epi8 (__m512 __A, const int __R)
+{
+  return
+    (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A,
+						      (__v16si)
+						      _mm512_undefined_si512 (),
+						      (__mmask16) -1,
+						      __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvts_roundps_epi8 (__m512i __W, __mmask16 __U, __m512 __A,
+				 const int __R)
+{
+  return (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A,
+							   (__v16si) __W,
+							   (__mmask16) __U,
+							   __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvts_roundps_epi8 (__mmask16 __U, __m512 __A, const int __R)
+{
+  return
+    (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A,
+						      (__v16si)
+						      _mm512_setzero_si512 (),
+						      (__mmask16) __U,
+						      __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvts_roundps_epu8 (__m512 __A, const int __R)
+{
+  return
+    (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A,
+						       (__v16si)
+						       _mm512_undefined_si512 (),
+						       (__mmask16) -1,
+						       __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvts_roundps_epu8 (__m512i __W, __mmask16 __U, __m512 __A,
+				 const int __R)
+{
+  return (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A,
+							    (__v16si) __W,
+							    (__mmask16) __U,
+							    __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvts_roundps_epu8 (__mmask16 __U, __m512 __A, const int __R)
+{
+  return
+    (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A,
+						       (__v16si)
+						       _mm512_setzero_si512 (),
+						       (__mmask16) __U,
+						       __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_roundph_epi8 (__m512h __A, const int __R)
+{
+  return (__m512i)
+    __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A,
+					     (__v32hi)
+					     _mm512_undefined_si512 (),
+					     (__mmask32) -1,
+					     __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_roundph_epi8 (__m512i __W, __mmask32 __U, __m512h __A,
+				  const int __R)
+{
+  return (__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A,
+							    (__v32hi) __W,
+							    (__mmask32) __U,
+							    __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_roundph_epi8 (__mmask32 __U, __m512h __A, const int __R)
+{
+  return
+    (__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A,
+						       (__v32hi)
+						       _mm512_setzero_si512 (),
+						       (__mmask32) __U,
+						       __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_roundph_epu8 (__m512h __A, const int __R)
+{
+  return (__m512i)
+    __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A,
+					      (__v32hi)
+					      _mm512_undefined_si512 (),
+					      (__mmask32) -1,
+					      __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_roundph_epu8 (__m512i __W, __mmask32 __U, __m512h __A,
+				  const int __R)
+{
+  return (__m512i) __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A,
+							     (__v32hi) __W,
+							     (__mmask32) __U,
+							     __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_roundph_epu8 (__mmask32 __U, __m512h __A, const int __R)
+{
+  return (__m512i)
+    __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A,
+					      (__v32hi)
+					      _mm512_setzero_si512 (),
+					      (__mmask32) __U,
+					      __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_roundps_epi8 (__m512 __A, const int __R)
+{
+  return (__m512i)
+    __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A,
+					     (__v16si)
+					     _mm512_undefined_si512 (),
+					     (__mmask16) -1,
+					     __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_roundps_epi8 (__m512i __W, __mmask16 __U, __m512 __A,
+				  const int __R)
+{
+  return (__m512i) __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A,
+							    (__v16si) __W,
+							    (__mmask16) __U,
+							    __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_roundps_epi8 (__mmask16 __U, __m512 __A, const int __R)
+{
+  return (__m512i)
+    __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A,
+					     (__v16si)
+					     _mm512_setzero_si512 (),
+					     (__mmask16) __U,
+					     __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_roundps_epu8 (__m512 __A, const int __R)
+{
+  return (__m512i)
+    __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A,
+					      (__v16si)
+					      _mm512_undefined_si512 (),
+					      (__mmask16) -1,
+					      __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_roundps_epu8 (__m512i __W, __mmask16 __U, __m512 __A,
+				  const int __R)
+{
+  return (__m512i) __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A,
+							     (__v16si) __W,
+							     (__mmask16) __U,
+							     __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_roundps_epu8 (__mmask16 __U, __m512 __A, const int __R)
+{
+  return (__m512i)
+    __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A,
+					      (__v16si)
+					      _mm512_setzero_si512 (),
+					      (__mmask16) __U,
+					      __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundpd_epi32 (__m512d __A, const int __R)
+{
+  return (__m256i)
+    __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A,
+					     (__v8si)
+					     _mm256_undefined_si256 (),
+					     (__mmask8) -1,
+					     __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
+				 const int __R)
+{
+  return (__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A,
+							    (__v8si) __W,
+							    (__mmask8) __U,
+							    __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
+{
+  return
+    (__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A,
+						       (__v8si)
+						       _mm256_setzero_si256 (),
+						       (__mmask8) __U,
+						       __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundpd_epi64 (__m512d __A, const int __R)
+{
+  return (__m512i)
+    __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A,
+					     (__v8di)
+					     _mm512_undefined_si512 (),
+					     (__mmask8) -1,
+					     __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A,
+				 const int __R)
+{
+  return (__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A,
+							    (__v8di) __W,
+							    (__mmask8) __U,
+							    __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundpd_epi64 (__mmask8 __U, __m512d __A, const int __R)
+{
+  return
+    (__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A,
+						       (__v8di)
+						       _mm512_setzero_si512 (),
+						       (__mmask8) __U,
+						       __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundpd_epu32 (__m512d __A, const int __R)
+{
+  return (__m256i)
+    __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A,
+					      (__v8si)
+					      _mm256_undefined_si256 (),
+					      (__mmask8) -1,
+					      __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
+				 const int __R)
+{
+  return (__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A,
+							     (__v8si) __W,
+							     (__mmask8) __U,
+							     __R);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
+{
+  return
+    (__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A,
+							(__v8si)
+							_mm256_setzero_si256 (),
+							(__mmask8) __U,
+							__R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundpd_epu64 (__m512d __A, const int __R)
+{
+  return (__m512i)
+    __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A,
+					      (__v8di)
+					      _mm512_undefined_si512 (),
+					      (__mmask8) -1,
+					      __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A,
+				 const int __R)
+{
+  return (__m512i) __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A,
+							     (__v8di) __W,
+							     (__mmask8) __U,
+							     __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundpd_epu64 (__mmask8 __U, __m512d __A, const int __R)
+{
+  return (__m512i)
+    __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A,
+					      (__v8di)
+					      _mm512_setzero_si512 (),
+					      (__mmask8) __U,
+					      __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundps_epi32 (__m512 __A, const int __R)
+{
+  return (__m512i)
+    __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A,
+					     (__v16si)
+					     _mm512_undefined_si512 (),
+					     (__mmask16) -1,
+					     __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
+				 const int __R)
+{
+  return (__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A,
+							    (__v16si) __W,
+							    (__mmask16) __U,
+							    __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
+{
+  return
+    (__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A,
+						       (__v16si)
+						       _mm512_setzero_si512 (),
+						       (__mmask16) __U,
+						       __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundps_epi64 (__m256 __A, const int __R)
+{
+  return (__m512i)
+    __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A,
+					     (__v8di)
+					     _mm512_undefined_si512 (),
+					     (__mmask8) -1,
+					     __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A,
+				 const int __R)
+{
+  return (__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A,
+							    (__v8di) __W,
+							    (__mmask8) __U,
+							    __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundps_epi64 (__mmask8 __U, __m256 __A, const int __R)
+{
+  return
+    (__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A,
+						       (__v8di)
+						       _mm512_setzero_si512 (),
+						       (__mmask8) __U,
+						       __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundps_epu32 (__m512 __A, const int __R)
+{
+  return (__m512i)
+    __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A,
+					      (__v16si)
+					      _mm512_undefined_si512 (),
+					      (__mmask16) -1,
+					      __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
+				 const int __R)
+{
+  return (__m512i) __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A,
+							     (__v16si) __W,
+							     (__mmask16) __U,
+							     __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
+{
+  return (__m512i)
+    __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A,
+					      (__v16si)
+					      _mm512_setzero_si512 (),
+					      (__mmask16) __U,
+					      __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundps_epu64 (__m256 __A, const int __R)
+{
+  return (__m512i)
+    __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A,
+					      (__v8di)
+					      _mm512_undefined_si512 (),
+					      (__mmask8) -1,
+					      __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A,
+				 const int __R)
+{
+  return (__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A,
+							     (__v8di) __W,
+							     (__mmask8) __U,
+							     __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundps_epu64 (__mmask8 __U, __m256 __A, const int __R)
+{
+  return
+    (__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A,
+							(__v8di)
+							_mm512_setzero_si512 (),
+							(__mmask8) __U,
+							__R);
+}
+#else
+#define _mm512_ipcvts_roundph_epi8(A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \
+					   (__v32hi) \
+					   (_mm512_undefined_si512 ()), \
+					   (__mmask32) (-1), \
+					   (R)))
+
+#define _mm512_mask_ipcvts_roundph_epi8(W, U, A, R) \
+  ((__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \
+						     (__v32hi) (W), \
+						     (__mmask32) (U), \
+						     (R)))
+
+#define _mm512_maskz_ipcvts_roundph_epi8(U, A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \
+					   (__v32hi) \
+					   (_mm512_setzero_si512 ()), \
+					   (__mmask32) (U), \
+					   (R)))
+
+#define _mm512_ipcvts_roundph_epu8(A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \
+					    (__v32hi) \
+					    (_mm512_undefined_si512 ()), \
+					    (__mmask32) (-1), \
+					    (R)))
+
+#define _mm512_mask_ipcvts_roundph_epu8(W, U, A, R) \
+  ((__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \
+						      (__v32hi) (W), \
+						      (__mmask32) (U), \
+						      (R)))
+
+#define _mm512_maskz_ipcvts_roundph_epu8(U, A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \
+					    (__v32hi) \
+					    (_mm512_setzero_si512 ()), \
+					    (__mmask32) (U), \
+					    (R)))
+
+#define _mm512_ipcvts_roundps_epi8(A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \
+					   (__v16si) \
+					   (_mm512_undefined_si512 ()), \
+					   (__mmask16) (-1), \
+					   (R)))
+
+#define _mm512_mask_ipcvts_roundps_epi8(W, U, A, R) \
+  ((__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \
+						     (__v16si) (W), \
+						     (__mmask16) (U), \
+						     (R)))
+
+#define _mm512_maskz_ipcvts_roundps_epi8(U, A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \
+					   (__v16si) \
+					   (_mm512_setzero_si512 ()), \
+					   (__mmask16) (U), \
+					   (R)))
+
+#define _mm512_ipcvts_roundps_epu8(A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \
+					    (__v16si) \
+					    (_mm512_undefined_si512 ()), \
+					    (__mmask16) (-1), \
+					    (R)))
+
+#define _mm512_mask_ipcvts_roundps_epu8(W, U, A, R) \
+  ((__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \
+						      (__v16si) (W), \
+						      (__mmask16) (U), \
+						      (R)))
+
+#define _mm512_maskz_ipcvts_roundps_epu8(U, A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \
+					    (__v16si) \
+					    (_mm512_setzero_si512 ()), \
+					    (__mmask16) (U), \
+					    (R)))
+
+#define _mm512_ipcvtts_roundph_epi8(A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \
+					    (__v32hi) \
+					    (_mm512_undefined_si512 ()), \
+					    (__mmask32) (-1), \
+					    (R)))
+
+#define _mm512_mask_ipcvtts_roundph_epi8(W, U, A, R) \
+  ((__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \
+						      (__v32hi) (W), \
+						      (__mmask32) (U), \
+						      (R)))
+
+#define _mm512_maskz_ipcvtts_roundph_epi8(U, A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \
+					    (__v32hi) \
+					    (_mm512_setzero_si512 ()), \
+					    (__mmask32) (U), \
+					    (R)))
+
+#define _mm512_ipcvtts_roundph_epu8(A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \
+					     (__v32hi) \
+					     (_mm512_undefined_si512 ()), \
+					     (__mmask32) (-1), \
+					     (R)))
+
+#define _mm512_mask_ipcvtts_roundph_epu8(W, U, A, R) \
+  ((__m512i) __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \
+						       (__v32hi) (W), \
+						       (__mmask32) (U), \
+						       (R)))
+
+#define _mm512_maskz_ipcvtts_roundph_epu8(U, A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \
+					     (__v32hi) \
+					     (_mm512_setzero_si512 ()), \
+					     (__mmask32) (U), \
+					     (R)))
+
+#define _mm512_ipcvtts_roundps_epi8(A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \
+					    (__v16si) \
+					    (_mm512_undefined_si512 ()), \
+					    (__mmask16) (-1), \
+					    (R)))
+
+#define _mm512_mask_ipcvtts_roundps_epi8(W, U, A, R) \
+  ((__m512i) __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \
+						      (__v16si) (W), \
+						      (__mmask16) (U), \
+						      (R)))
+
+#define _mm512_maskz_ipcvtts_roundps_epi8(U, A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \
+					    (__v16si) \
+					    (_mm512_setzero_si512 ()), \
+					    (__mmask16) (U), \
+					    (R)))
+
+#define _mm512_ipcvtts_roundps_epu8(A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \
+					     (__v16si) \
+					     (_mm512_undefined_si512 ()), \
+					     (__mmask16) (-1), \
+					     (R)))
+
+#define _mm512_mask_ipcvtts_roundps_epu8(W, U, A, R) \
+  ((__m512i) __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \
+						       (__v16si) (W), \
+						       (__mmask16) (U), \
+						       (R)))
+
+#define _mm512_maskz_ipcvtts_roundps_epu8(U, A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \
+					     (__v16si) \
+					     (_mm512_setzero_si512 ()), \
+					     (__mmask16) (U), \
+					     (R)))
+
+#define _mm512_cvtts_roundpd_epi32(A, R) \
+  ((__m256i) \
+   __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \
+					    (__v8si) \
+					    (_mm256_undefined_si256 ()), \
+					    (__mmask8) (-1), \
+					    (R)))
+
+#define _mm512_mask_cvtts_roundpd_epi32(W, U, A, R) \
+  ((__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \
+						      (__v8si) (W), \
+						      (__mmask8) (U), \
+						      (R)))
+
+#define _mm512_maskz_cvtts_roundpd_epi32(U, A, R) \
+  ((__m256i) \
+   __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \
+					    (__v8si) \
+					    (_mm256_setzero_si256 ()), \
+					    (__mmask8) (U), \
+					    (R)))
+
+#define _mm512_cvtts_roundpd_epi64(A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \
+					    (__v8di) \
+					    (_mm512_undefined_si512 ()), \
+					    (__mmask8) (-1), \
+					    (R)))
+
+#define _mm512_mask_cvtts_roundpd_epi64(W, U, A, R) \
+  ((__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \
+						      (__v8di) (W), \
+						      (__mmask8) (U), \
+						      (R)))
+
+#define _mm512_maskz_cvtts_roundpd_epi64(U, A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \
+					    (__v8di) \
+					    (_mm512_setzero_si512 ()), \
+					    (__mmask8) (U), \
+					    (R)))
+
+#define _mm512_cvtts_roundpd_epu32(A, R) \
+  ((__m256i) \
+   __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \
+					     (__v8si) \
+					     (_mm256_undefined_si256 ()), \
+					     (__mmask8) (-1), \
+					     (R)))
+
+#define _mm512_mask_cvtts_roundpd_epu32(W, U, A, R) \
+  ((__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \
+						       (__v8si) (W), \
+						       (__mmask8) (U), \
+						       (R)))
+
+#define _mm512_maskz_cvtts_roundpd_epu32(U, A, R) \
+  ((__m256i) \
+   __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \
+					     (__v8si) \
+					     (_mm256_setzero_si256 ()), \
+					     (__mmask8) (U), \
+					     (R)))
+
+#define _mm512_cvtts_roundpd_epu64(A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \
+					     (__v8di) \
+					     (_mm512_undefined_si512 ()), \
+					     (__mmask8) (-1), \
+					     (R)))
+
+#define _mm512_mask_cvtts_roundpd_epu64(W, U, A, R) \
+  ((__m512i) __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \
+						       (__v8di) (W), \
+						       (__mmask8) (U), \
+						       (R)))
+
+#define _mm512_maskz_cvtts_roundpd_epu64(U, A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \
+					     (__v8di) \
+					     (_mm512_setzero_si512 ()), \
+					     (__mmask8) (U), \
+					     (R)))
+
+#define _mm512_cvtts_roundps_epi32(A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \
+					    (__v16si) \
+					    (_mm512_undefined_si512 ()), \
+					    (__mmask16) (-1), \
+					    (R)))
+
+#define _mm512_mask_cvtts_roundps_epi32(W, U, A, R) \
+  ((__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \
+						      (__v16si) (W), \
+						      (__mmask16) (U), \
+						      (R)))
+
+#define _mm512_maskz_cvtts_roundps_epi32(U, A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \
+					    (__v16si) \
+					    (_mm512_setzero_si512 ()), \
+					    (__mmask16) (U), \
+					    (R)))
+
+#define _mm512_cvtts_roundps_epi64(A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \
+					    (__v8di) \
+					    (_mm512_undefined_si512 ()), \
+					    (__mmask8) (-1), \
+					    (R)))
+
+#define _mm512_mask_cvtts_roundps_epi64(W, U, A, R) \
+  ((__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \
+						      (__v8di) (W), \
+						      (__mmask8) (U), \
+						      (R)))
+
+#define _mm512_maskz_cvtts_roundps_epi64(U, A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \
+					    (__v8di) \
+					    (_mm512_setzero_si512 ()), \
+					    (__mmask8) (U), \
+					    (R)))
+
+#define _mm512_cvtts_roundps_epu32(A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \
+					     (__v16si) \
+					     (_mm512_undefined_si512 ()), \
+					     (__mmask16) (-1), \
+					     (R)))
+
+#define _mm512_mask_cvtts_roundps_epu32(W, U, A, R) \
+  ((__m512i) __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \
+						       (__v16si) (W), \
+						       (__mmask16) (U), \
+						       (R)))
+
+#define _mm512_maskz_cvtts_roundps_epu32(U, A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \
+					     (__v16si) \
+					     (_mm512_setzero_si512 ()), \
+					     (__mmask16) (U), \
+					     (R)))
+
+#define _mm512_cvtts_roundps_epu64(A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \
+					     (__v8di) \
+					     (_mm512_undefined_si512 ()), \
+					     (__mmask8) (-1), \
+					     (R)))
+
+#define _mm512_mask_cvtts_roundps_epu64(W, U, A, R) \
+  ((__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \
+						       (__v8di) (W), \
+						       (__mmask8) (U), \
+						       (R)))
+
+#define _mm512_maskz_cvtts_roundps_epu64(U, A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \
+					     (__v8di) \
+					     (_mm512_setzero_si512 ()), \
+					     (__mmask8) (U), \
+					     (R)))
+#endif
+
 extern __inline int
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtts_sd_epi32 (__m128d __A)
diff --git a/gcc/config/i386/avx512bf16intrin.h b/gcc/config/i386/avx512bf16intrin.h
index 6740109..6c087e6 100644
--- a/gcc/config/i386/avx512bf16intrin.h
+++ b/gcc/config/i386/avx512bf16intrin.h
@@ -28,9 +28,9 @@
 #ifndef _AVX512BF16INTRIN_H_INCLUDED
 #define _AVX512BF16INTRIN_H_INCLUDED
 
-#if !defined (__AVX512BF16__) || defined (__EVEX512__)
+#if !defined (__AVX512BF16__)
 #pragma GCC push_options
-#pragma GCC target("avx512bf16,no-evex512")
+#pragma GCC target("avx512bf16")
 #define __DISABLE_AVX512BF16__
 #endif /* __AVX512BF16__ */
 
@@ -42,17 +42,6 @@ _mm_cvtsbh_ss (__bf16 __A)
   return __builtin_ia32_cvtbf2sf (__A);
 }
 
-#ifdef __DISABLE_AVX512BF16__
-#undef __DISABLE_AVX512BF16__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX512BF16__ */
-
-#if !defined (__AVX512BF16__) || !defined (__EVEX512__)
-#pragma GCC push_options
-#pragma GCC target("avx512bf16,evex512")
-#define __DISABLE_AVX512BF16_512__
-#endif /* __AVX512BF16_512__ */
-
 /* Internal data types for implementing the intrinsics.  */
 typedef __bf16 __v32bf __attribute__ ((__vector_size__ (64)));
 
@@ -155,8 +144,8 @@ _mm512_mask_cvtpbh_ps (__m512 __S, __mmask16 __U, __m256bh __A)
 	 (__m512i)_mm512_cvtepi16_epi32 ((__m256i)__A), 16)));
 }
 
-#ifdef __DISABLE_AVX512BF16_512__
-#undef __DISABLE_AVX512BF16_512__
+#ifdef __DISABLE_AVX512BF16__
+#undef __DISABLE_AVX512BF16__
 #pragma GCC pop_options
 #endif /* __DISABLE_AVX512BF16_512__ */
 
diff --git a/gcc/config/i386/avx512bf16vlintrin.h b/gcc/config/i386/avx512bf16vlintrin.h
index ffaceac..fd6d183 100644
--- a/gcc/config/i386/avx512bf16vlintrin.h
+++ b/gcc/config/i386/avx512bf16vlintrin.h
@@ -28,9 +28,9 @@
 #ifndef _AVX512BF16VLINTRIN_H_INCLUDED
 #define _AVX512BF16VLINTRIN_H_INCLUDED
 
-#if !defined(__AVX512VL__) || !defined(__AVX512BF16__) || defined (__EVEX512__)
+#if !defined(__AVX512VL__) || !defined(__AVX512BF16__)
 #pragma GCC push_options
-#pragma GCC target("avx512bf16,avx512vl,no-evex512")
+#pragma GCC target("avx512bf16,avx512vl")
 #define __DISABLE_AVX512BF16VL__
 #endif /* __AVX512BF16__ */
 
diff --git a/gcc/config/i386/avx512bitalgintrin.h b/gcc/config/i386/avx512bitalgintrin.h
index 301f125..d7156f9 100644
--- a/gcc/config/i386/avx512bitalgintrin.h
+++ b/gcc/config/i386/avx512bitalgintrin.h
@@ -28,9 +28,9 @@
 #ifndef _AVX512BITALGINTRIN_H_INCLUDED
 #define _AVX512BITALGINTRIN_H_INCLUDED
 
-#if !defined (__AVX512BITALG__) || !defined (__EVEX512__)
+#if !defined (__AVX512BITALG__)
 #pragma GCC push_options
-#pragma GCC target("avx512bitalg,evex512")
+#pragma GCC target("avx512bitalg")
 #define __DISABLE_AVX512BITALG__
 #endif /* __AVX512BITALG__ */
 
diff --git a/gcc/config/i386/avx512bitalgvlintrin.h b/gcc/config/i386/avx512bitalgvlintrin.h
index e4883cf..cf9cff6 100644
--- a/gcc/config/i386/avx512bitalgvlintrin.h
+++ b/gcc/config/i386/avx512bitalgvlintrin.h
@@ -28,9 +28,9 @@
 #ifndef _AVX512BITALGVLINTRIN_H_INCLUDED
 #define _AVX512BITALGVLINTRIN_H_INCLUDED
 
-#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__) || defined (__EVEX512__)
+#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__)
 #pragma GCC push_options
-#pragma GCC target("avx512bitalg,avx512vl,no-evex512")
+#pragma GCC target("avx512bitalg,avx512vl")
 #define __DISABLE_AVX512BITALGVL__
 #endif /* __AVX512BITALGVL__ */
 
diff --git a/gcc/config/i386/avx512bwintrin.h b/gcc/config/i386/avx512bwintrin.h
index 47c4c03..5e9eeaa 100644
--- a/gcc/config/i386/avx512bwintrin.h
+++ b/gcc/config/i386/avx512bwintrin.h
@@ -28,9 +28,9 @@
 #ifndef _AVX512BWINTRIN_H_INCLUDED
 #define _AVX512BWINTRIN_H_INCLUDED
 
-#if !defined (__AVX512BW__) || defined (__EVEX512__)
+#if !defined (__AVX512BW__)
 #pragma GCC push_options
-#pragma GCC target("avx512bw,no-evex512")
+#pragma GCC target("avx512bw")
 #define __DISABLE_AVX512BW__
 #endif /* __AVX512BW__ */
 
@@ -346,17 +346,6 @@ _kandn_mask64 (__mmask64 __A, __mmask64 __B)
   return (__mmask64) __builtin_ia32_kandndi ((__mmask64) __A, (__mmask64) __B);
 }
 
-#ifdef __DISABLE_AVX512BW__
-#undef __DISABLE_AVX512BW__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX512BW__ */
-
-#if !defined (__AVX512BW__) || !defined (__EVEX512__)
-#pragma GCC push_options
-#pragma GCC target("avx512bw,evex512")
-#define __DISABLE_AVX512BW_512__
-#endif /* __AVX512BW_512__ */
-
 /* Internal data types for implementing the intrinsics.  */
 typedef short __v32hi __attribute__ ((__vector_size__ (64)));
 typedef short __v32hi_u __attribute__ ((__vector_size__ (64),	\
@@ -3369,8 +3358,8 @@ _mm512_bsrli_epi128 (__m512i __A, const int __N)
 
 #endif
 
-#ifdef __DISABLE_AVX512BW_512__
-#undef __DISABLE_AVX512BW_512__
+#ifdef __DISABLE_AVX512BW__
+#undef __DISABLE_AVX512BW__
 #pragma GCC pop_options
 #endif /* __DISABLE_AVX512BW_512__ */
 
diff --git a/gcc/config/i386/avx512cdintrin.h b/gcc/config/i386/avx512cdintrin.h
index 206cc49..5a92d25 100644
--- a/gcc/config/i386/avx512cdintrin.h
+++ b/gcc/config/i386/avx512cdintrin.h
@@ -30,7 +30,7 @@
 
 #ifndef __AVX512CD__
 #pragma GCC push_options
-#pragma GCC target("avx512cd,evex512")
+#pragma GCC target("avx512cd")
 #define __DISABLE_AVX512CD__
 #endif /* __AVX512CD__ */
 
diff --git a/gcc/config/i386/avx512dqintrin.h b/gcc/config/i386/avx512dqintrin.h
index 1d10225..a7766b5 100644
--- a/gcc/config/i386/avx512dqintrin.h
+++ b/gcc/config/i386/avx512dqintrin.h
@@ -28,9 +28,9 @@
 #ifndef _AVX512DQINTRIN_H_INCLUDED
 #define _AVX512DQINTRIN_H_INCLUDED
 
-#if !defined (__AVX512DQ__) || defined (__EVEX512__)
+#if !defined (__AVX512DQ__)
 #pragma GCC push_options
-#pragma GCC target("avx512dq,no-evex512")
+#pragma GCC target("avx512dq")
 #define __DISABLE_AVX512DQ__
 #endif /* __AVX512DQ__ */
 
@@ -639,17 +639,6 @@ _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm)
 
 #endif
 
-#ifdef __DISABLE_AVX512DQ__
-#undef __DISABLE_AVX512DQ__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX512DQ__ */
-
-#if !defined (__AVX512DQ__) || !defined (__EVEX512__)
-#pragma GCC push_options
-#pragma GCC target("avx512dq,evex512")
-#define __DISABLE_AVX512DQ_512__
-#endif /* __AVX512DQ_512__ */
-
 extern __inline __m512d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_broadcast_f64x2 (__m128d __A)
@@ -2897,9 +2886,9 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm)
 
 #endif
 
-#ifdef __DISABLE_AVX512DQ_512__
-#undef __DISABLE_AVX512DQ_512__
+#ifdef __DISABLE_AVX512DQ__
+#undef __DISABLE_AVX512DQ__
 #pragma GCC pop_options
-#endif /* __DISABLE_AVX512DQ_512__ */
+#endif /* __DISABLE_AVX512DQ__ */
 
 #endif /* _AVX512DQINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index 9160787..4469f73 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -28,9 +28,9 @@
 #ifndef _AVX512FINTRIN_H_INCLUDED
 #define _AVX512FINTRIN_H_INCLUDED
 
-#if !defined (__AVX512F__) || defined (__EVEX512__)
+#if !defined (__AVX512F__)
 #pragma GCC push_options
-#pragma GCC target("avx512f,no-evex512")
+#pragma GCC target("avx512f")
 #define __DISABLE_AVX512F__
 #endif /* __AVX512F__ */
 
@@ -54,11 +54,12 @@ typedef enum
   _MM_MANT_SIGN_nan		/* DEST = NaN if sign(SRC) = 1 */
 } _MM_MANTISSA_SIGN_ENUM;
 
-/* These _mm{,256}_avx512* intrins are duplicated from their _mm{,256}_* forms
-   from AVX2 or before.  We need to add them to prevent target option mismatch
-   when calling AVX512 intrins implemented with these intrins under no-evex512
-   function attribute.  All AVX512 intrins calling those AVX2 intrins or
-   before will change their calls to these AVX512 version.  */
+/* These _mm{,256}_avx512* intrins are initially duplicated from their
+   _mm{,256}_* forms from AVX2 or before.  At that time, e need to add them
+   to prevent target option mismatch when calling AVX512 intrins implemented
+   with these intrins under no-evex512 function attribute.  Thess intrins will
+   still be here to avoid huge changes.  All AVX512 intrins calling those AVX2
+   intrins or before have changed their calls to these AVX512 version.  */
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_avx512_undefined_ps (void)
 {
@@ -3802,17 +3803,6 @@ _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
 
 #endif
 
-#ifdef __DISABLE_AVX512F__
-#undef __DISABLE_AVX512F__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX512F__ */
-
-#if !defined (__AVX512F__) || !defined (__EVEX512__)
-#pragma GCC push_options
-#pragma GCC target("avx512f,evex512")
-#define __DISABLE_AVX512F_512__
-#endif /* __AVX512F_512__ */
-
 /* Internal data types for implementing the intrinsics.  */
 typedef double __v8df __attribute__ ((__vector_size__ (64)));
 typedef float __v16sf __attribute__ ((__vector_size__ (64)));
@@ -16609,9 +16599,9 @@ _mm512_mask_reduce_max_pd (__mmask8 __U, __m512d __A)
 
 #undef __MM512_REDUCE_OP
 
-#ifdef __DISABLE_AVX512F_512__
-#undef __DISABLE_AVX512F_512__
+#ifdef __DISABLE_AVX512F__
+#undef __DISABLE_AVX512F__
 #pragma GCC pop_options
-#endif /* __DISABLE_AVX512F_512__ */
+#endif /* __DISABLE_AVX512F__ */
 
 #endif /* _AVX512FINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h
index f158f87..471ec05 100644
--- a/gcc/config/i386/avx512fp16intrin.h
+++ b/gcc/config/i386/avx512fp16intrin.h
@@ -28,9 +28,9 @@
 #ifndef _AVX512FP16INTRIN_H_INCLUDED
 #define _AVX512FP16INTRIN_H_INCLUDED
 
-#if !defined (__AVX512FP16__) || defined (__EVEX512__)
+#if !defined (__AVX512FP16__)
 #pragma GCC push_options
-#pragma GCC target("avx512fp16,no-evex512")
+#pragma GCC target("avx512fp16")
 #define __DISABLE_AVX512FP16__
 #endif /* __AVX512FP16__ */
 
@@ -2852,17 +2852,6 @@ _mm_maskz_fmul_round_sch (__mmask8 __A, __m128h __B, __m128h __C, const int __E)
 #define _mm_maskz_cmul_round_sch(U, A, B, R)			      \
   _mm_maskz_fcmul_round_sch ((U), (A), (B), (R))
 
-#ifdef __DISABLE_AVX512FP16__
-#undef __DISABLE_AVX512FP16__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX512FP16__ */
-
-#if !defined (__AVX512FP16__) || !defined (__EVEX512__)
-#pragma GCC push_options
-#pragma GCC target("avx512fp16,evex512")
-#define __DISABLE_AVX512FP16_512__
-#endif /* __AVX512FP16_512__ */
-
 typedef _Float16 __v32hf __attribute__ ((__vector_size__ (64)));
 typedef _Float16 __m512h __attribute__ ((__vector_size__ (64), __may_alias__));
 typedef _Float16 __m512h_u __attribute__ ((__vector_size__ (64),	\
@@ -7238,9 +7227,9 @@ _mm512_set1_pch (_Float16 _Complex __A)
 #define _mm512_maskz_cmul_round_pch(U, A, B, R)			      \
   _mm512_maskz_fcmul_round_pch ((U), (A), (B), (R))
 
-#ifdef __DISABLE_AVX512FP16_512__
-#undef __DISABLE_AVX512FP16_512__
+#ifdef __DISABLE_AVX512FP16__
+#undef __DISABLE_AVX512FP16__
 #pragma GCC pop_options
-#endif /* __DISABLE_AVX512FP16_512__ */
+#endif /* __DISABLE_AVX512FP16__ */
 
 #endif /* _AVX512FP16INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx512fp16vlintrin.h b/gcc/config/i386/avx512fp16vlintrin.h
index 59e6c88..cb98310 100644
--- a/gcc/config/i386/avx512fp16vlintrin.h
+++ b/gcc/config/i386/avx512fp16vlintrin.h
@@ -28,9 +28,9 @@
 #ifndef __AVX512FP16VLINTRIN_H_INCLUDED
 #define __AVX512FP16VLINTRIN_H_INCLUDED
 
-#if !defined(__AVX512VL__) || !defined(__AVX512FP16__) || defined (__EVEX512__)
+#if !defined(__AVX512VL__) || !defined(__AVX512FP16__)
 #pragma GCC push_options
-#pragma GCC target("avx512fp16,avx512vl,no-evex512")
+#pragma GCC target("avx512fp16,avx512vl")
 #define __DISABLE_AVX512FP16VL__
 #endif /* __AVX512FP16VL__ */
 
diff --git a/gcc/config/i386/avx512ifmaintrin.h b/gcc/config/i386/avx512ifmaintrin.h
index ed97350..56790c0 100644
--- a/gcc/config/i386/avx512ifmaintrin.h
+++ b/gcc/config/i386/avx512ifmaintrin.h
@@ -28,9 +28,9 @@
 #ifndef _AVX512IFMAINTRIN_H_INCLUDED
 #define _AVX512IFMAINTRIN_H_INCLUDED
 
-#if !defined (__AVX512IFMA__) || !defined (__EVEX512__)
+#if !defined (__AVX512IFMA__)
 #pragma GCC push_options
-#pragma GCC target("avx512ifma,evex512")
+#pragma GCC target("avx512ifma")
 #define __DISABLE_AVX512IFMA__
 #endif /* __AVX512IFMA__ */
 
diff --git a/gcc/config/i386/avx512ifmavlintrin.h b/gcc/config/i386/avx512ifmavlintrin.h
index 681bda3..6b849c8 100644
--- a/gcc/config/i386/avx512ifmavlintrin.h
+++ b/gcc/config/i386/avx512ifmavlintrin.h
@@ -28,9 +28,9 @@
 #ifndef _AVX512IFMAVLINTRIN_H_INCLUDED
 #define _AVX512IFMAVLINTRIN_H_INCLUDED
 
-#if !defined(__AVX512VL__) || !defined(__AVX512IFMA__) || defined (__EVEX512__)
+#if !defined(__AVX512VL__) || !defined(__AVX512IFMA__)
 #pragma GCC push_options
-#pragma GCC target("avx512ifma,avx512vl,no-evex512")
+#pragma GCC target("avx512ifma,avx512vl")
 #define __DISABLE_AVX512IFMAVL__
 #endif /* __AVX512IFMAVL__ */
 
diff --git a/gcc/config/i386/avx512vbmi2intrin.h b/gcc/config/i386/avx512vbmi2intrin.h
index f5515a8..e8bfe1d 100644
--- a/gcc/config/i386/avx512vbmi2intrin.h
+++ b/gcc/config/i386/avx512vbmi2intrin.h
@@ -28,9 +28,9 @@
 #ifndef __AVX512VBMI2INTRIN_H_INCLUDED
 #define __AVX512VBMI2INTRIN_H_INCLUDED
 
-#if !defined(__AVX512VBMI2__) || !defined (__EVEX512__)
+#if !defined(__AVX512VBMI2__)
 #pragma GCC push_options
-#pragma GCC target("avx512vbmi2,evex512")
+#pragma GCC target("avx512vbmi2")
 #define __DISABLE_AVX512VBMI2__
 #endif /* __AVX512VBMI2__ */
 
diff --git a/gcc/config/i386/avx512vbmi2vlintrin.h b/gcc/config/i386/avx512vbmi2vlintrin.h
index e9857ba..5cdfebd 100644
--- a/gcc/config/i386/avx512vbmi2vlintrin.h
+++ b/gcc/config/i386/avx512vbmi2vlintrin.h
@@ -28,9 +28,9 @@
 #ifndef _AVX512VBMI2VLINTRIN_H_INCLUDED
 #define _AVX512VBMI2VLINTRIN_H_INCLUDED
 
-#if !defined(__AVX512VL__) || !defined(__AVX512VBMI2__) || defined (__EVEX512__)
+#if !defined(__AVX512VL__) || !defined(__AVX512VBMI2__)
 #pragma GCC push_options
-#pragma GCC target("avx512vbmi2,avx512vl,no-evex512")
+#pragma GCC target("avx512vbmi2,avx512vl")
 #define __DISABLE_AVX512VBMI2VL__
 #endif /* __AVX512VBMIVL__ */
 
diff --git a/gcc/config/i386/avx512vbmiintrin.h b/gcc/config/i386/avx512vbmiintrin.h
index 901a2f7..5f5e342 100644
--- a/gcc/config/i386/avx512vbmiintrin.h
+++ b/gcc/config/i386/avx512vbmiintrin.h
@@ -28,9 +28,9 @@
 #ifndef _AVX512VBMIINTRIN_H_INCLUDED
 #define _AVX512VBMIINTRIN_H_INCLUDED
 
-#if !defined (__AVX512VBMI__) || !defined (__EVEX512__)
+#if !defined (__AVX512VBMI__)
 #pragma GCC push_options
-#pragma GCC target("avx512vbmi,evex512")
+#pragma GCC target("avx512vbmi")
 #define __DISABLE_AVX512VBMI__
 #endif /* __AVX512VBMI__ */
 
diff --git a/gcc/config/i386/avx512vbmivlintrin.h b/gcc/config/i386/avx512vbmivlintrin.h
index 90cd590..037ea93 100644
--- a/gcc/config/i386/avx512vbmivlintrin.h
+++ b/gcc/config/i386/avx512vbmivlintrin.h
@@ -28,9 +28,9 @@
 #ifndef _AVX512VBMIVLINTRIN_H_INCLUDED
 #define _AVX512VBMIVLINTRIN_H_INCLUDED
 
-#if !defined(__AVX512VL__) || !defined(__AVX512VBMI__) || defined (__EVEX512__)
+#if !defined(__AVX512VL__) || !defined(__AVX512VBMI__)
 #pragma GCC push_options
-#pragma GCC target("avx512vbmi,avx512vl,no-evex512")
+#pragma GCC target("avx512vbmi,avx512vl")
 #define __DISABLE_AVX512VBMIVL__
 #endif /* __AVX512VBMIVL__ */
 
diff --git a/gcc/config/i386/avx512vlbwintrin.h b/gcc/config/i386/avx512vlbwintrin.h
index 9f0a5b4..537e408 100644
--- a/gcc/config/i386/avx512vlbwintrin.h
+++ b/gcc/config/i386/avx512vlbwintrin.h
@@ -28,9 +28,9 @@
 #ifndef _AVX512VLBWINTRIN_H_INCLUDED
 #define _AVX512VLBWINTRIN_H_INCLUDED
 
-#if !defined(__AVX512VL__) || !defined(__AVX512BW__) || defined (__EVEX512__)
+#if !defined(__AVX512VL__) || !defined(__AVX512BW__)
 #pragma GCC push_options
-#pragma GCC target("avx512vl,avx512bw,no-evex512")
+#pragma GCC target("avx512vl,avx512bw")
 #define __DISABLE_AVX512VLBW__
 #endif /* __AVX512VLBW__ */
 
diff --git a/gcc/config/i386/avx512vldqintrin.h b/gcc/config/i386/avx512vldqintrin.h
index 3b23d4a..5783dbe 100644
--- a/gcc/config/i386/avx512vldqintrin.h
+++ b/gcc/config/i386/avx512vldqintrin.h
@@ -28,9 +28,9 @@
 #ifndef _AVX512VLDQINTRIN_H_INCLUDED
 #define _AVX512VLDQINTRIN_H_INCLUDED
 
-#if !defined(__AVX512VL__) || !defined(__AVX512DQ__) || defined (__EVEX512__)
+#if !defined(__AVX512VL__) || !defined(__AVX512DQ__)
 #pragma GCC push_options
-#pragma GCC target("avx512vl,avx512dq,no-evex512")
+#pragma GCC target("avx512vl,avx512dq")
 #define __DISABLE_AVX512VLDQ__
 #endif /* __AVX512VLDQ__ */
 
diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h
index 4451a1f..50930cd 100644
--- a/gcc/config/i386/avx512vlintrin.h
+++ b/gcc/config/i386/avx512vlintrin.h
@@ -28,9 +28,9 @@
 #ifndef _AVX512VLINTRIN_H_INCLUDED
 #define _AVX512VLINTRIN_H_INCLUDED
 
-#if !defined (__AVX512VL__) || defined (__EVEX512__)
+#if !defined (__AVX512VL__)
 #pragma GCC push_options
-#pragma GCC target("avx512vl,no-evex512")
+#pragma GCC target("avx512vl")
 #define __DISABLE_AVX512VL__
 #endif /* __AVX512VL__ */
 
@@ -13650,7 +13650,7 @@ _mm256_permutex_pd (__m256d __X, const int __M)
 
 #if !defined (__AVX512CD__) || !defined (__AVX512VL__)
 #pragma GCC push_options
-#pragma GCC target("avx512vl,avx512cd,no-evex512")
+#pragma GCC target("avx512vl,avx512cd")
 #define __DISABLE_AVX512VLCD__
 #endif
 
diff --git a/gcc/config/i386/avx512vnniintrin.h b/gcc/config/i386/avx512vnniintrin.h
index 5d0eaff..fe7b663 100644
--- a/gcc/config/i386/avx512vnniintrin.h
+++ b/gcc/config/i386/avx512vnniintrin.h
@@ -28,9 +28,9 @@
 #ifndef __AVX512VNNIINTRIN_H_INCLUDED
 #define __AVX512VNNIINTRIN_H_INCLUDED
 
-#if !defined(__AVX512VNNI__) || !defined (__EVEX512__)
+#if !defined(__AVX512VNNI__)
 #pragma GCC push_options
-#pragma GCC target("avx512vnni,evex512")
+#pragma GCC target("avx512vnni")
 #define __DISABLE_AVX512VNNI__
 #endif /* __AVX512VNNI__ */
 
diff --git a/gcc/config/i386/avx512vnnivlintrin.h b/gcc/config/i386/avx512vnnivlintrin.h
index 7774bbd..01c3c91 100644
--- a/gcc/config/i386/avx512vnnivlintrin.h
+++ b/gcc/config/i386/avx512vnnivlintrin.h
@@ -28,9 +28,9 @@
 #ifndef _AVX512VNNIVLINTRIN_H_INCLUDED
 #define _AVX512VNNIVLINTRIN_H_INCLUDED
 
-#if !defined(__AVX512VL__) || !defined(__AVX512VNNI__) || defined (__EVEX512__)
+#if !defined(__AVX512VL__) || !defined(__AVX512VNNI__)
 #pragma GCC push_options
-#pragma GCC target("avx512vnni,avx512vl,no-evex512")
+#pragma GCC target("avx512vnni,avx512vl")
 #define __DISABLE_AVX512VNNIVL__
 #endif /* __AVX512VNNIVL__ */
 
diff --git a/gcc/config/i386/avx512vp2intersectintrin.h b/gcc/config/i386/avx512vp2intersectintrin.h
index e170cf5..50f7ead 100644
--- a/gcc/config/i386/avx512vp2intersectintrin.h
+++ b/gcc/config/i386/avx512vp2intersectintrin.h
@@ -28,9 +28,9 @@
 #ifndef _AVX512VP2INTERSECTINTRIN_H_INCLUDED
 #define _AVX512VP2INTERSECTINTRIN_H_INCLUDED
 
-#if !defined(__AVX512VP2INTERSECT__) || !defined (__EVEX512__)
+#if !defined(__AVX512VP2INTERSECT__)
 #pragma GCC push_options
-#pragma GCC target("avx512vp2intersect,evex512")
+#pragma GCC target("avx512vp2intersect")
 #define __DISABLE_AVX512VP2INTERSECT__
 #endif /* __AVX512VP2INTERSECT__ */
 
diff --git a/gcc/config/i386/avx512vp2intersectvlintrin.h b/gcc/config/i386/avx512vp2intersectvlintrin.h
index afdd2da..3e0a8ab 100644
--- a/gcc/config/i386/avx512vp2intersectvlintrin.h
+++ b/gcc/config/i386/avx512vp2intersectvlintrin.h
@@ -28,10 +28,9 @@
 #ifndef _AVX512VP2INTERSECTVLINTRIN_H_INCLUDED
 #define _AVX512VP2INTERSECTVLINTRIN_H_INCLUDED
 
-#if !defined(__AVX512VP2INTERSECT__) || !defined(__AVX512VL__) \
-  || defined (__EVEX512__)
+#if !defined(__AVX512VP2INTERSECT__) || !defined(__AVX512VL__)
 #pragma GCC push_options
-#pragma GCC target("avx512vp2intersect,avx512vl,no-evex512")
+#pragma GCC target("avx512vp2intersect,avx512vl")
 #define __DISABLE_AVX512VP2INTERSECTVL__
 #endif /* __AVX512VP2INTERSECTVL__ */
 
diff --git a/gcc/config/i386/avx512vpopcntdqintrin.h b/gcc/config/i386/avx512vpopcntdqintrin.h
index 3357255..e4b89ea 100644
--- a/gcc/config/i386/avx512vpopcntdqintrin.h
+++ b/gcc/config/i386/avx512vpopcntdqintrin.h
@@ -28,9 +28,9 @@
 #ifndef _AVX512VPOPCNTDQINTRIN_H_INCLUDED
 #define _AVX512VPOPCNTDQINTRIN_H_INCLUDED
 
-#if !defined (__AVX512VPOPCNTDQ__) || !defined (__EVEX512__)
+#if !defined (__AVX512VPOPCNTDQ__)
 #pragma GCC push_options
-#pragma GCC target("avx512vpopcntdq,evex512")
+#pragma GCC target("avx512vpopcntdq")
 #define __DISABLE_AVX512VPOPCNTDQ__
 #endif /* __AVX512VPOPCNTDQ__ */
 
diff --git a/gcc/config/i386/avx512vpopcntdqvlintrin.h b/gcc/config/i386/avx512vpopcntdqvlintrin.h
index 17d836f..8eb1d42 100644
--- a/gcc/config/i386/avx512vpopcntdqvlintrin.h
+++ b/gcc/config/i386/avx512vpopcntdqvlintrin.h
@@ -28,10 +28,9 @@
 #ifndef _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED
 #define _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED
 
-#if !defined(__AVX512VPOPCNTDQ__) || !defined(__AVX512VL__) \
-  || defined (__EVEX512__)
+#if !defined(__AVX512VPOPCNTDQ__) || !defined(__AVX512VL__)
 #pragma GCC push_options
-#pragma GCC target("avx512vpopcntdq,avx512vl,no-evex512")
+#pragma GCC target("avx512vpopcntdq,avx512vl")
 #define __DISABLE_AVX512VPOPCNTDQVL__
 #endif /* __AVX512VPOPCNTDQVL__ */
 
diff --git a/gcc/config/i386/cygming.h b/gcc/config/i386/cygming.h
index 3ddcbec..0a3173c 100644
--- a/gcc/config/i386/cygming.h
+++ b/gcc/config/i386/cygming.h
@@ -28,16 +28,15 @@ along with GCC; see the file COPYING3.  If not see
 #undef TARGET_SEH
 #define TARGET_SEH  (TARGET_64BIT_MS_ABI && flag_unwind_tables)
 
+#undef PREFERRED_STACK_BOUNDARY_DEFAULT
+#define PREFERRED_STACK_BOUNDARY_DEFAULT \
+  (TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY)
+
 /* Win64 with SEH cannot represent DRAP stack frames.  Disable its use.
    Force the use of different mechanisms to allocate aligned local data.  */
 #undef MAX_STACK_ALIGNMENT
 #define MAX_STACK_ALIGNMENT  (TARGET_SEH ? 128 : MAX_OFILE_ALIGNMENT)
 
-/* 32-bit Windows aligns the stack on a 4-byte boundary but SSE instructions
-   may require 16-byte alignment.  */
-#undef STACK_REALIGN_DEFAULT
-#define STACK_REALIGN_DEFAULT TARGET_SSE
-
 /* Support hooks for SEH.  */
 #undef  TARGET_ASM_UNWIND_EMIT
 #define TARGET_ASM_UNWIND_EMIT  i386_pe_seh_unwind_emit
@@ -247,9 +246,10 @@ do {							\
 #undef ASM_OUTPUT_LABELREF
 #define  ASM_OUTPUT_LABELREF(STREAM, NAME)	\
 do {						\
+  const char *prefix = "";			\
   if ((NAME)[0] != FASTCALL_PREFIX)		\
-    fputs (user_label_prefix, (STREAM));	\
-  fputs ((NAME), (STREAM));			\
+    prefix = user_label_prefix;			\
+  ix86_asm_output_labelref ((STREAM), prefix, (NAME));	\
 } while (0)
 
 /* This does much the same in memory rather than to a stream.  */
diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc
index 1ff05e5..fe71f55 100644
--- a/gcc/config/i386/driver-i386.cc
+++ b/gcc/config/i386/driver-i386.cc
@@ -374,33 +374,6 @@ detect_caches_intel (bool xeon_mp, unsigned max_level,
 #define has_feature(f) \
   has_cpu_feature (&cpu_model, cpu_features2, f)
 
-/* We will emit a warning when using AVX10.1 and AVX512 options with one
-   enabled and the other disabled.  Add this function to avoid push "-mno-"
-   options under this scenario for -march=native.  */
-
-bool check_avx512_features (__processor_model &cpu_model,
-			    unsigned int (&cpu_features2)[SIZE_OF_CPU_FEATURES],
-			    const enum processor_features feature)
-{
-  if (has_feature (FEATURE_AVX10_1_256)
-      && ((feature == FEATURE_AVX512F)
-	  || (feature == FEATURE_AVX512CD)
-	  || (feature == FEATURE_AVX512DQ)
-	  || (feature == FEATURE_AVX512BW)
-	  || (feature == FEATURE_AVX512VL)
-	  || (feature == FEATURE_AVX512IFMA)
-	  || (feature == FEATURE_AVX512VBMI)
-	  || (feature == FEATURE_AVX512VBMI2)
-	  || (feature == FEATURE_AVX512VNNI)
-	  || (feature == FEATURE_AVX512VPOPCNTDQ)
-	  || (feature == FEATURE_AVX512BITALG)
-	  || (feature == FEATURE_AVX512FP16)
-	  || (feature == FEATURE_AVX512BF16)))
-    return false;
-
-  return true;
-}
-
 /* This will be called by the spec parser in gcc.cc when it sees
    a %:local_cpu_detect(args) construct.  Currently it will be
    called with either "arch [32|64]" or "tune [32|64]" as argument
@@ -627,7 +600,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
 	      if (has_feature (FEATURE_AVX512F))
 		{
 		  /* Assume Diamond Rapids.  */
-		  if (has_feature (FEATURE_AMX_TRANSPOSE))
+		  if (has_feature (FEATURE_AMX_FP8))
 		    cpu = "diamondrapids";
 		  /* Assume Granite Rapids D.  */
 		  else if (has_feature (FEATURE_AMX_COMPLEX))
@@ -909,12 +882,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
 		  options = concat (options, " ",
 				    isa_names_table[i].option, NULL);
 	      }
-	    /* Never push -mno-avx10.1-{256,512} under -march=native to
-	       avoid unnecessary warnings when building libraries.  */
-	    else if (isa_names_table[i].feature != FEATURE_AVX10_1_256
-		     && isa_names_table[i].feature != FEATURE_AVX10_1
-		     && check_avx512_features (cpu_model, cpu_features2,
-					       isa_names_table[i].feature))
+	    else
 	      options = concat (options, neg_option,
 				isa_names_table[i].option + 2, NULL);
 	  }
diff --git a/gcc/config/i386/gcc-auto-profile b/gcc/config/i386/gcc-auto-profile
index 528b34e..0e9e5fe 100755
--- a/gcc/config/i386/gcc-auto-profile
+++ b/gcc/config/i386/gcc-auto-profile
@@ -24,8 +24,16 @@ if [ "$1" = "--all" ] ; then
   shift
 fi
 
-if ! grep -q Intel /proc/cpuinfo ; then
-  echo >&2 "Only Intel CPUs supported"
+if grep -q AuthenticAMD /proc/cpuinfo ; then
+  vendor=AMD
+  if ! grep -q " brs" /proc/cpuinfo && ! grep -q amd_lbr_v2 /proc/cpuinfo ; then
+    echo >&2 "AMD CPU with brs (Zen 3) or amd_lbr_v2 (Zen 4+) feature is required"
+    exit 1
+  fi
+elif grep -q Intel /proc/cpuinfo ; then
+  vendor=Intel
+else
+  echo >&2 "Only AMD and Intel CPUs supported"
   exit 1
 fi
 
@@ -33,7 +41,7 @@ if grep -q hypervisor /proc/cpuinfo ; then
   echo >&2 "Warning: branch profiling may not be functional in VMs"
 fi
 
-case `grep -E -q "^cpu family\s*: 6" /proc/cpuinfo &&
+case `test $vendor = Intel && grep -E -q "^cpu family\s*: 6" /proc/cpuinfo &&
   grep -E "^model\s*:" /proc/cpuinfo | head -n1` in
 model*:\ 46|\
 model*:\ 30|\
@@ -82,6 +90,8 @@ model*:\ 126|\
 model*:\ 167|\
 model*:\ 140|\
 model*:\ 141|\
+model*:\ 143|\
+model*:\ 207|\
 model*:\ 106|\
 model*:\ 108|\
 model*:\ 173|\
@@ -89,15 +99,20 @@ model*:\ 174) E="cpu/event=0xc4,umask=0x20/$FLAGS" ;;
 model*:\ 134|\
 model*:\ 150|\
 model*:\ 156) E="cpu/event=0xc4,umask=0xfe/p$FLAGS" ;;
-model*:\ 143|\
-model*:\ 207) E="cpu/event=0xc4,umask=0x20/p$FLAGS" ;;
-model*:\ 190) E="cpu/event=0xc4,umask=0xc0/$FLAGS" ;;
+model*:\ 190|\
+model*:\ 175|\
+model*:\ 182) E="cpu/event=0xc4,umask=0xc0/$FLAGS" ;;
 model*:\ 190) E="cpu/event=0xc4,umask=0xfe/$FLAGS" ;;
 *)
         if perf list br_inst_retired | grep -q br_inst_retired.near_taken ; then
             E=br_inst_retired.near_taken:p
+        elif perf list ex_ret_brn_tkn | grep -q ex_ret_brn_tkn ; then
+            E=ex_ret_brn_tkn:P$FLAGS
+        elif $vendor = Intel ; then
+echo >&2 "Unknown Intel CPU. Run contrib/gen_autofdo_event.py --all --script to update script."
+	  exit 1
         else
-echo >&2 "Unknown CPU. Run contrib/gen_autofdo_event.py --all --script to update script."
+echo >&2 "AMD CPU without support for ex_ret_brn_tkn event"
 	  exit 1
         fi ;;
 esac
diff --git a/gcc/config/i386/gfniintrin.h b/gcc/config/i386/gfniintrin.h
index c7e21e6..bc433c2 100644
--- a/gcc/config/i386/gfniintrin.h
+++ b/gcc/config/i386/gfniintrin.h
@@ -297,9 +297,9 @@ _mm256_maskz_gf2p8affine_epi64_epi8 (__mmask32 __A, __m256i __B,
 #pragma GCC pop_options
 #endif /* __GFNIAVX512VLBW__ */
 
-#if !defined(__GFNI__) || !defined(__EVEX512__) || !defined(__AVX512F__)
+#if !defined(__GFNI__) || !defined(__AVX512F__)
 #pragma GCC push_options
-#pragma GCC target("gfni,avx512f,evex512")
+#pragma GCC target("gfni,avx512f")
 #define __DISABLE_GFNIAVX512F__
 #endif /* __GFNIAVX512F__ */
 
@@ -341,9 +341,9 @@ _mm512_gf2p8affine_epi64_epi8 (__m512i __A, __m512i __B, const int __C)
 #pragma GCC pop_options
 #endif /* __GFNIAVX512F__ */
 
-#if !defined(__GFNI__) || !defined(__EVEX512__) || !defined(__AVX512BW__)
+#if !defined(__GFNI__) || !defined(__AVX512BW__)
 #pragma GCC push_options
-#pragma GCC target("gfni,avx512bw,evex512")
+#pragma GCC target("gfni,avx512bw")
 #define __DISABLE_GFNIAVX512FBW__
 #endif /* __GFNIAVX512FBW__ */
 
diff --git a/gcc/config/i386/host-mingw32.cc b/gcc/config/i386/host-mingw32.cc
index e083f49..87804a5 100644
--- a/gcc/config/i386/host-mingw32.cc
+++ b/gcc/config/i386/host-mingw32.cc
@@ -135,7 +135,6 @@ mingw32_gt_pch_use_address (void *&addr, size_t size, int fd,
      and earlier, backslashes are invalid in object name.  So, we need
      to check if we are on Windows2000 or higher.  */
   OSVERSIONINFO version_info;
-  int r;
 
   version_info.dwOSVersionInfoSize = sizeof (version_info);
 
@@ -169,25 +168,24 @@ mingw32_gt_pch_use_address (void *&addr, size_t size, int fd,
       return -1;
     }
 
-  /* Retry five times, as here might occure a race with multiple gcc's
-     instances at same time.  */
-  for (r = 0; r < 5; r++)
-   {
-      mmap_addr = MapViewOfFileEx (mmap_handle, FILE_MAP_COPY, 0, offset,
-				   size, addr);
-      if (mmap_addr == addr)
-	break;
-      if (r != 4)
-        Sleep (500);
-   }
-
-  if (mmap_addr != addr)
+  /* Try mapping the file at `addr`.  */
+  mmap_addr = MapViewOfFileEx (mmap_handle, FILE_MAP_COPY, 0, offset,
+			       size, addr);
+  if (mmap_addr == NULL)
     {
-      w32_error (__FUNCTION__, __FILE__, __LINE__, "MapViewOfFileEx");
-      CloseHandle(mmap_handle);
-      return  -1;
+      /* We could not map the file at its original address, so let the
+	 system choose a different one. The PCH can be relocated later.  */
+      mmap_addr = MapViewOfFileEx (mmap_handle, FILE_MAP_COPY, 0, offset,
+				   size, NULL);
+      if (mmap_addr == NULL)
+	{
+	  w32_error (__FUNCTION__, __FILE__, __LINE__, "MapViewOfFileEx");
+	  CloseHandle(mmap_handle);
+	  return  -1;
+	}
     }
 
+  addr = mmap_addr;
   return 1;
 }
 
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index a142711..fe42c6436 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -204,53 +204,53 @@ BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_maskstored256, "__builtin_ia32_mas
 BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI)
 
 /* AVX512F */
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCINT_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCINT64_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCDOUBLE_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCFLOAT_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PINT_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PINT64_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCINT_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCINT64_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCDOUBLE_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCFLOAT_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PINT_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PINT64_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loaddf_mask, "__builtin_ia32_loadsd_mask", IX86_BUILTIN_LOADSD_MASK, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE_V2DF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadsf_mask, "__builtin_ia32_loadss_mask", IX86_BUILTIN_LOADSS_MASK, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT_V4SF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storedf_mask, "__builtin_ia32_storesd_mask", IX86_BUILTIN_STORESD_MASK, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF_UQI)
@@ -297,14 +297,14 @@ BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_CMPCCXADD, CODE_FOR_cmpccxadd_si,
 BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_CMPCCXADD, CODE_FOR_cmpccxadd_di, "__builtin_ia32_cmpccxadd64", IX86_BUILTIN_CMPCCXADD64, UNKNOWN, (int) LONGLONG_FTYPE_PLONGLONG_LONGLONG_LONGLONG_INT)
 
 /* AVX512BW */
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_loadv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCSHORT_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_loadv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCCHAR_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_storev32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PSHORT_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_storev64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PCHAR_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_loadv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCSHORT_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_loadv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCCHAR_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_storev32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PSHORT_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_storev64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PCHAR_V64QI_UDI)
 
 /* AVX512VP2INTERSECT */
-BDESC (0, OPTION_MASK_ISA2_AVX512VP2INTERSECT | OPTION_MASK_ISA2_EVEX512, CODE_FOR_nothing, "__builtin_ia32_2intersectd512", IX86_BUILTIN_2INTERSECTD512, UNKNOWN, (int) VOID_FTYPE_PUHI_PUHI_V16SI_V16SI)
-BDESC (0, OPTION_MASK_ISA2_AVX512VP2INTERSECT | OPTION_MASK_ISA2_EVEX512, CODE_FOR_nothing, "__builtin_ia32_2intersectq512", IX86_BUILTIN_2INTERSECTQ512, UNKNOWN, (int) VOID_FTYPE_PUQI_PUQI_V8DI_V8DI)
+BDESC (0, OPTION_MASK_ISA2_AVX512VP2INTERSECT, CODE_FOR_nothing, "__builtin_ia32_2intersectd512", IX86_BUILTIN_2INTERSECTD512, UNKNOWN, (int) VOID_FTYPE_PUHI_PUHI_V16SI_V16SI)
+BDESC (0, OPTION_MASK_ISA2_AVX512VP2INTERSECT, CODE_FOR_nothing, "__builtin_ia32_2intersectq512", IX86_BUILTIN_2INTERSECTQ512, UNKNOWN, (int) VOID_FTYPE_PUQI_PUQI_V8DI_V8DI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512VP2INTERSECT, CODE_FOR_nothing, "__builtin_ia32_2intersectd256", IX86_BUILTIN_2INTERSECTD256, UNKNOWN, (int) VOID_FTYPE_PUQI_PUQI_V8SI_V8SI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512VP2INTERSECT, CODE_FOR_nothing, "__builtin_ia32_2intersectq256", IX86_BUILTIN_2INTERSECTQ256, UNKNOWN, (int) VOID_FTYPE_PUQI_PUQI_V4DI_V4DI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512VP2INTERSECT, CODE_FOR_nothing, "__builtin_ia32_2intersectd128", IX86_BUILTIN_2INTERSECTD128, UNKNOWN, (int) VOID_FTYPE_PUQI_PUQI_V4SI_V4SI)
@@ -411,9 +411,9 @@ BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl
 BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask_store, "__builtin_ia32_pmovswb256mem_mask", IX86_BUILTIN_PMOVSWB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16HI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask_store_2, "__builtin_ia32_pmovuswb128mem_mask", IX86_BUILTIN_PMOVUSWB128_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8HI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask_store, "__builtin_ia32_pmovuswb256mem_mask", IX86_BUILTIN_PMOVUSWB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16HI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovuswb512mem_mask", IX86_BUILTIN_PMOVUSWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovswb512mem_mask", IX86_BUILTIN_PMOVSWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovwb512mem_mask", IX86_BUILTIN_PMOVWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovuswb512mem_mask", IX86_BUILTIN_PMOVUSWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovswb512mem_mask", IX86_BUILTIN_PMOVSWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovwb512mem_mask", IX86_BUILTIN_PMOVWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI)
 
 /* AVX512FP16 */
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_loadhf_mask, "__builtin_ia32_loadsh_mask", IX86_BUILTIN_LOADSH_MASK, UNKNOWN, (int) V8HF_FTYPE_PCFLOAT16_V8HF_UQI)
@@ -434,17 +434,17 @@ BDESC (OPTION_MASK_ISA_PKU, 0, CODE_FOR_rdpkru,  "__builtin_ia32_rdpkru", IX86_B
 BDESC (OPTION_MASK_ISA_PKU, 0, CODE_FOR_wrpkru,  "__builtin_ia32_wrpkru", IX86_BUILTIN_WRPKRU, UNKNOWN, (int) VOID_FTYPE_UNSIGNED)
 
 /* VBMI2 */
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_compressstorev64qi_mask, "__builtin_ia32_compressstoreuqi512_mask", IX86_BUILTIN_PCOMPRESSBSTORE512, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_compressstorev32hi_mask, "__builtin_ia32_compressstoreuhi512_mask", IX86_BUILTIN_PCOMPRESSWSTORE512, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_compressstorev64qi_mask, "__builtin_ia32_compressstoreuqi512_mask", IX86_BUILTIN_PCOMPRESSBSTORE512, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_compressstorev32hi_mask, "__builtin_ia32_compressstoreuhi512_mask", IX86_BUILTIN_PCOMPRESSWSTORE512, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_USI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressstorev32qi_mask, "__builtin_ia32_compressstoreuqi256_mask", IX86_BUILTIN_PCOMPRESSBSTORE256, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_USI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressstorev16qi_mask, "__builtin_ia32_compressstoreuqi128_mask", IX86_BUILTIN_PCOMPRESSBSTORE128, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressstorev16hi_mask, "__builtin_ia32_compressstoreuhi256_mask", IX86_BUILTIN_PCOMPRESSWSTORE256, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressstorev8hi_mask, "__builtin_ia32_compressstoreuhi128_mask", IX86_BUILTIN_PCOMPRESSWSTORE128, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_UQI)
 
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv64qi_mask, "__builtin_ia32_expandloadqi512_mask", IX86_BUILTIN_PEXPANDBLOAD512, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv64qi_maskz, "__builtin_ia32_expandloadqi512_maskz", IX86_BUILTIN_PEXPANDBLOAD512Z, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv32hi_mask, "__builtin_ia32_expandloadhi512_mask", IX86_BUILTIN_PEXPANDWLOAD512, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv32hi_maskz, "__builtin_ia32_expandloadhi512_maskz", IX86_BUILTIN_PEXPANDWLOAD512Z, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv64qi_mask, "__builtin_ia32_expandloadqi512_mask", IX86_BUILTIN_PEXPANDBLOAD512, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv64qi_maskz, "__builtin_ia32_expandloadqi512_maskz", IX86_BUILTIN_PEXPANDBLOAD512Z, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv32hi_mask, "__builtin_ia32_expandloadhi512_mask", IX86_BUILTIN_PEXPANDWLOAD512, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv32hi_maskz, "__builtin_ia32_expandloadhi512_maskz", IX86_BUILTIN_PEXPANDWLOAD512Z, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI)
 
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv32qi_mask, "__builtin_ia32_expandloadqi256_mask", IX86_BUILTIN_PEXPANDBLOAD256, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_USI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv32qi_maskz, "__builtin_ia32_expandloadqi256_maskz", IX86_BUILTIN_PEXPANDBLOAD256Z, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_USI)
@@ -1384,230 +1384,230 @@ BDESC (OPTION_MASK_ISA_BMI2, 0, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si"
 BDESC (OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64)
 
 /* AVX512F */
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vcvtps2ph512_mask_sae,  "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatunsv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vcvtps2ph512_mask_sae,  "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_floatunsv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512CD, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_UQI)
-BDESC (OPTION_MASK_ISA_AVX512CD, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask"  , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512CD, 0, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_UQI)
+BDESC (OPTION_MASK_ISA_AVX512CD, 0, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask"  , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_srcp14v2df_mask, "__builtin_ia32_rcp14sd_mask", IX86_BUILTIN_RCP14SDMASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_srcp14v4sf_mask, "__builtin_ia32_rcp14ss_mask", IX86_BUILTIN_RCP14SSMASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rsqrt14_v2df_mask, "__builtin_ia32_rsqrt14sd_mask", IX86_BUILTIN_RSQRT14SDMASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rsqrt14_v4sf_mask, "__builtin_ia32_rsqrt14ss_mask", IX86_BUILTIN_RSQRT14SSMASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_unpcklps512_mask,  "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512CD, OPTION_MASK_ISA2_EVEX512, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512CD, OPTION_MASK_ISA2_EVEX512, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512CD, OPTION_MASK_ISA2_EVEX512, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512CD, OPTION_MASK_ISA2_EVEX512, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_unpcklps512_mask,  "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512CD, 0, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512CD, 0, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512CD, 0, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512CD, 0, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movdf_mask, "__builtin_ia32_movesd_mask", IX86_BUILTIN_MOVSD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movsf_mask, "__builtin_ia32_movess_mask", IX86_BUILTIN_MOVSS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
 
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_copysignv16sf3,  "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_copysignv8df3,  "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_floorph512", IX86_BUILTIN_FLOORPH512, (enum rtx_code) ROUND_FLOOR, (int) V32HF_FTYPE_V32HF_ROUND)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_ceilph512", IX86_BUILTIN_CEILPH512, (enum rtx_code) ROUND_CEIL, (int) V32HF_FTYPE_V32HF_ROUND)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_truncph512", IX86_BUILTIN_TRUNCPH512, (enum rtx_code) ROUND_TRUNC, (int) V32HF_FTYPE_V32HF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundps512, "__builtin_ia32_floorps512", IX86_BUILTIN_FLOORPS512, (enum rtx_code) ROUND_FLOOR, (int) V16SF_FTYPE_V16SF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundps512, "__builtin_ia32_ceilps512", IX86_BUILTIN_CEILPS512, (enum rtx_code) ROUND_CEIL, (int) V16SF_FTYPE_V16SF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundps512, "__builtin_ia32_truncps512", IX86_BUILTIN_TRUNCPS512, (enum rtx_code) ROUND_TRUNC, (int) V16SF_FTYPE_V16SF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_floorpd512", IX86_BUILTIN_FLOORPD512, (enum rtx_code) ROUND_FLOOR, (int) V8DF_FTYPE_V8DF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_ceilpd512", IX86_BUILTIN_CEILPD512, (enum rtx_code) ROUND_CEIL, (int) V8DF_FTYPE_V8DF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_truncpd512", IX86_BUILTIN_TRUNCPD512, (enum rtx_code) ROUND_TRUNC, (int) V8DF_FTYPE_V8DF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fix_notruncv16sfv16si, "__builtin_ia32_cvtps2dq512", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_pack_sfix_v8df, "__builtin_ia32_vec_pack_sfix512", IX86_BUILTIN_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_roundv16sf2_sfix, "__builtin_ia32_roundps_az_sfix512", IX86_BUILTIN_ROUNDPS_AZ_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V16SF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundps512_sfix, "__builtin_ia32_floorps_sfix512", IX86_BUILTIN_FLOORPS_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V16SF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundps512_sfix, "__builtin_ia32_ceilps_sfix512", IX86_BUILTIN_CEILPS_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V16SF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_copysignv16sf3,  "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_copysignv8df3,  "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_floorph512", IX86_BUILTIN_FLOORPH512, (enum rtx_code) ROUND_FLOOR, (int) V32HF_FTYPE_V32HF_ROUND)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_ceilph512", IX86_BUILTIN_CEILPH512, (enum rtx_code) ROUND_CEIL, (int) V32HF_FTYPE_V32HF_ROUND)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_truncph512", IX86_BUILTIN_TRUNCPH512, (enum rtx_code) ROUND_TRUNC, (int) V32HF_FTYPE_V32HF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundps512, "__builtin_ia32_floorps512", IX86_BUILTIN_FLOORPS512, (enum rtx_code) ROUND_FLOOR, (int) V16SF_FTYPE_V16SF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundps512, "__builtin_ia32_ceilps512", IX86_BUILTIN_CEILPS512, (enum rtx_code) ROUND_CEIL, (int) V16SF_FTYPE_V16SF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundps512, "__builtin_ia32_truncps512", IX86_BUILTIN_TRUNCPS512, (enum rtx_code) ROUND_TRUNC, (int) V16SF_FTYPE_V16SF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_floorpd512", IX86_BUILTIN_FLOORPD512, (enum rtx_code) ROUND_FLOOR, (int) V8DF_FTYPE_V8DF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_ceilpd512", IX86_BUILTIN_CEILPD512, (enum rtx_code) ROUND_CEIL, (int) V8DF_FTYPE_V8DF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_truncpd512", IX86_BUILTIN_TRUNCPD512, (enum rtx_code) ROUND_TRUNC, (int) V8DF_FTYPE_V8DF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fix_notruncv16sfv16si, "__builtin_ia32_cvtps2dq512", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_pack_sfix_v8df, "__builtin_ia32_vec_pack_sfix512", IX86_BUILTIN_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_roundv16sf2_sfix, "__builtin_ia32_roundps_az_sfix512", IX86_BUILTIN_ROUNDPS_AZ_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V16SF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundps512_sfix, "__builtin_ia32_floorps_sfix512", IX86_BUILTIN_FLOORPS_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V16SF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundps512_sfix, "__builtin_ia32_ceilps_sfix512", IX86_BUILTIN_CEILPS_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V16SF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND)
 
 /* Mask arithmetic operations */
 BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_kashiftqi, "__builtin_ia32_kshiftliqi", IX86_BUILTIN_KSHIFTLI8, UNKNOWN, (int) UQI_FTYPE_UQI_UQI_CONST)
@@ -2433,136 +2433,136 @@ BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_
 BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SF_V4SF_INT_UQI)
 
 /* AVX512DQ.  */
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) UHI_FTYPE_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) UQI_FTYPE_V8DI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) UHI_FTYPE_V16SI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) UQI_FTYPE_V8DI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_UQI)
 
 /* AVX512BW.  */
 BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) USI_FTYPE_USI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) UDI_FTYPE_UDI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask",  IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask",  IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_loadv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_loadv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask"  , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask",  IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask",  IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) UDI_FTYPE_V64QI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) USI_FTYPE_V32HI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) UDI_FTYPE_UDI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask",  IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask",  IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_loadv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_loadv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask"  , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask",  IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask",  IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) UDI_FTYPE_V64QI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) USI_FTYPE_V32HI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI)
 
 /* AVX512IFMA */
-BDESC (OPTION_MASK_ISA_AVX512IFMA, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpmadd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512IFMA, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpmadd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512IFMA, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpmadd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512IFMA, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpmadd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpmadd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpmadd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpmadd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpmadd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
@@ -2577,13 +2577,13 @@ BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_A
 BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXIFMA, CODE_FOR_vpmadd52huqv2di, "__builtin_ia32_vpmadd52huq128", IX86_BUINTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI)
 
 /* AVX512VBMI */
-BDESC (OPTION_MASK_ISA_AVX512VBMI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI, 0, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI, 0, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI, 0, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI, 0, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI, 0, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI)
@@ -2594,16 +2594,16 @@ BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512
 BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI)
 
 /* VBMI2 */
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_compressv64qi_mask, "__builtin_ia32_compressqi512_mask", IX86_BUILTIN_PCOMPRESSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_compressv32hi_mask, "__builtin_ia32_compresshi512_mask", IX86_BUILTIN_PCOMPRESSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_compressv64qi_mask, "__builtin_ia32_compressqi512_mask", IX86_BUILTIN_PCOMPRESSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_compressv32hi_mask, "__builtin_ia32_compresshi512_mask", IX86_BUILTIN_PCOMPRESSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressv32qi_mask, "__builtin_ia32_compressqi256_mask", IX86_BUILTIN_PCOMPRESSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressv16qi_mask, "__builtin_ia32_compressqi128_mask", IX86_BUILTIN_PCOMPRESSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressv16hi_mask, "__builtin_ia32_compresshi256_mask", IX86_BUILTIN_PCOMPRESSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressv8hi_mask, "__builtin_ia32_compresshi128_mask", IX86_BUILTIN_PCOMPRESSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv64qi_mask, "__builtin_ia32_expandqi512_mask", IX86_BUILTIN_PEXPANDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv64qi_maskz, "__builtin_ia32_expandqi512_maskz", IX86_BUILTIN_PEXPANDB512Z, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv32hi_mask, "__builtin_ia32_expandhi512_mask", IX86_BUILTIN_PEXPANDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv32hi_maskz, "__builtin_ia32_expandhi512_maskz", IX86_BUILTIN_PEXPANDW512Z, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv64qi_mask, "__builtin_ia32_expandqi512_mask", IX86_BUILTIN_PEXPANDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv64qi_maskz, "__builtin_ia32_expandqi512_maskz", IX86_BUILTIN_PEXPANDB512Z, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv32hi_mask, "__builtin_ia32_expandhi512_mask", IX86_BUILTIN_PEXPANDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv32hi_maskz, "__builtin_ia32_expandhi512_maskz", IX86_BUILTIN_PEXPANDW512Z, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv32qi_mask, "__builtin_ia32_expandqi256_mask", IX86_BUILTIN_PEXPANDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv32qi_maskz, "__builtin_ia32_expandqi256_maskz", IX86_BUILTIN_PEXPANDB256Z, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv16qi_mask, "__builtin_ia32_expandqi128_mask", IX86_BUILTIN_PEXPANDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI)
@@ -2612,64 +2612,64 @@ BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expan
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv16hi_maskz, "__builtin_ia32_expandhi256_maskz", IX86_BUILTIN_PEXPANDW256Z, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv8hi_mask, "__builtin_ia32_expandhi128_mask", IX86_BUILTIN_PEXPANDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv8hi_maskz, "__builtin_ia32_expandhi128_maskz", IX86_BUILTIN_PEXPANDW128Z, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrd_v32hi, "__builtin_ia32_vpshrd_v32hi", IX86_BUILTIN_VPSHRDV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrd_v32hi_mask, "__builtin_ia32_vpshrd_v32hi_mask", IX86_BUILTIN_VPSHRDV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v32hi, "__builtin_ia32_vpshrd_v32hi", IX86_BUILTIN_VPSHRDV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v32hi_mask, "__builtin_ia32_vpshrd_v32hi_mask", IX86_BUILTIN_VPSHRDV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v16hi, "__builtin_ia32_vpshrd_v16hi", IX86_BUILTIN_VPSHRDV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v16hi_mask, "__builtin_ia32_vpshrd_v16hi_mask", IX86_BUILTIN_VPSHRDV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT_V16HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v8hi, "__builtin_ia32_vpshrd_v8hi", IX86_BUILTIN_VPSHRDV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v8hi_mask, "__builtin_ia32_vpshrd_v8hi_mask", IX86_BUILTIN_VPSHRDV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT_V8HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrd_v16si, "__builtin_ia32_vpshrd_v16si", IX86_BUILTIN_VPSHRDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrd_v16si_mask, "__builtin_ia32_vpshrd_v16si_mask", IX86_BUILTIN_VPSHRDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v16si, "__builtin_ia32_vpshrd_v16si", IX86_BUILTIN_VPSHRDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v16si_mask, "__builtin_ia32_vpshrd_v16si_mask", IX86_BUILTIN_VPSHRDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v8si, "__builtin_ia32_vpshrd_v8si", IX86_BUILTIN_VPSHRDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v8si_mask, "__builtin_ia32_vpshrd_v8si_mask", IX86_BUILTIN_VPSHRDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v4si, "__builtin_ia32_vpshrd_v4si", IX86_BUILTIN_VPSHRDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v4si_mask, "__builtin_ia32_vpshrd_v4si_mask", IX86_BUILTIN_VPSHRDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrd_v8di, "__builtin_ia32_vpshrd_v8di", IX86_BUILTIN_VPSHRDV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrd_v8di_mask, "__builtin_ia32_vpshrd_v8di_mask", IX86_BUILTIN_VPSHRDV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v8di, "__builtin_ia32_vpshrd_v8di", IX86_BUILTIN_VPSHRDV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v8di_mask, "__builtin_ia32_vpshrd_v8di_mask", IX86_BUILTIN_VPSHRDV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v4di, "__builtin_ia32_vpshrd_v4di", IX86_BUILTIN_VPSHRDV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v4di_mask, "__builtin_ia32_vpshrd_v4di_mask", IX86_BUILTIN_VPSHRDV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v2di, "__builtin_ia32_vpshrd_v2di", IX86_BUILTIN_VPSHRDV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v2di_mask, "__builtin_ia32_vpshrd_v2di_mask", IX86_BUILTIN_VPSHRDV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshld_v32hi, "__builtin_ia32_vpshld_v32hi", IX86_BUILTIN_VPSHLDV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshld_v32hi_mask, "__builtin_ia32_vpshld_v32hi_mask", IX86_BUILTIN_VPSHLDV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v32hi, "__builtin_ia32_vpshld_v32hi", IX86_BUILTIN_VPSHLDV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v32hi_mask, "__builtin_ia32_vpshld_v32hi_mask", IX86_BUILTIN_VPSHLDV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v16hi, "__builtin_ia32_vpshld_v16hi", IX86_BUILTIN_VPSHLDV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v16hi_mask, "__builtin_ia32_vpshld_v16hi_mask", IX86_BUILTIN_VPSHLDV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT_V16HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v8hi, "__builtin_ia32_vpshld_v8hi", IX86_BUILTIN_VPSHLDV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v8hi_mask, "__builtin_ia32_vpshld_v8hi_mask", IX86_BUILTIN_VPSHLDV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT_V8HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshld_v16si, "__builtin_ia32_vpshld_v16si", IX86_BUILTIN_VPSHLDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshld_v16si_mask, "__builtin_ia32_vpshld_v16si_mask", IX86_BUILTIN_VPSHLDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v16si, "__builtin_ia32_vpshld_v16si", IX86_BUILTIN_VPSHLDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v16si_mask, "__builtin_ia32_vpshld_v16si_mask", IX86_BUILTIN_VPSHLDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v8si, "__builtin_ia32_vpshld_v8si", IX86_BUILTIN_VPSHLDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v8si_mask, "__builtin_ia32_vpshld_v8si_mask", IX86_BUILTIN_VPSHLDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v4si, "__builtin_ia32_vpshld_v4si", IX86_BUILTIN_VPSHLDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v4si_mask, "__builtin_ia32_vpshld_v4si_mask", IX86_BUILTIN_VPSHLDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshld_v8di, "__builtin_ia32_vpshld_v8di", IX86_BUILTIN_VPSHLDV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshld_v8di_mask, "__builtin_ia32_vpshld_v8di_mask", IX86_BUILTIN_VPSHLDV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v8di, "__builtin_ia32_vpshld_v8di", IX86_BUILTIN_VPSHLDV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v8di_mask, "__builtin_ia32_vpshld_v8di_mask", IX86_BUILTIN_VPSHLDV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v4di, "__builtin_ia32_vpshld_v4di", IX86_BUILTIN_VPSHLDV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v4di_mask, "__builtin_ia32_vpshld_v4di_mask", IX86_BUILTIN_VPSHLDV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v2di, "__builtin_ia32_vpshld_v2di", IX86_BUILTIN_VPSHLDV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v2di_mask, "__builtin_ia32_vpshld_v2di_mask", IX86_BUILTIN_VPSHLDV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_INT)
 
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v32hi, "__builtin_ia32_vpshrdv_v32hi", IX86_BUILTIN_VPSHRDVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v32hi_mask, "__builtin_ia32_vpshrdv_v32hi_mask", IX86_BUILTIN_VPSHRDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v32hi_maskz, "__builtin_ia32_vpshrdv_v32hi_maskz", IX86_BUILTIN_VPSHRDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v32hi, "__builtin_ia32_vpshrdv_v32hi", IX86_BUILTIN_VPSHRDVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v32hi_mask, "__builtin_ia32_vpshrdv_v32hi_mask", IX86_BUILTIN_VPSHRDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v32hi_maskz, "__builtin_ia32_vpshrdv_v32hi_maskz", IX86_BUILTIN_VPSHRDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi, "__builtin_ia32_vpshrdv_v16hi", IX86_BUILTIN_VPSHRDVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi_mask, "__builtin_ia32_vpshrdv_v16hi_mask", IX86_BUILTIN_VPSHRDVV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi_maskz, "__builtin_ia32_vpshrdv_v16hi_maskz", IX86_BUILTIN_VPSHRDVV16HI_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi, "__builtin_ia32_vpshrdv_v8hi", IX86_BUILTIN_VPSHRDVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi_mask, "__builtin_ia32_vpshrdv_v8hi_mask", IX86_BUILTIN_VPSHRDVV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi_maskz, "__builtin_ia32_vpshrdv_v8hi_maskz", IX86_BUILTIN_VPSHRDVV8HI_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v16si, "__builtin_ia32_vpshrdv_v16si", IX86_BUILTIN_VPSHRDVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v16si_mask, "__builtin_ia32_vpshrdv_v16si_mask", IX86_BUILTIN_VPSHRDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v16si_maskz, "__builtin_ia32_vpshrdv_v16si_maskz", IX86_BUILTIN_VPSHRDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si, "__builtin_ia32_vpshrdv_v16si", IX86_BUILTIN_VPSHRDVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si_mask, "__builtin_ia32_vpshrdv_v16si_mask", IX86_BUILTIN_VPSHRDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si_maskz, "__builtin_ia32_vpshrdv_v16si_maskz", IX86_BUILTIN_VPSHRDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si, "__builtin_ia32_vpshrdv_v8si", IX86_BUILTIN_VPSHRDVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si_mask, "__builtin_ia32_vpshrdv_v8si_mask", IX86_BUILTIN_VPSHRDVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si_maskz, "__builtin_ia32_vpshrdv_v8si_maskz", IX86_BUILTIN_VPSHRDVV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si, "__builtin_ia32_vpshrdv_v4si", IX86_BUILTIN_VPSHRDVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si_mask, "__builtin_ia32_vpshrdv_v4si_mask", IX86_BUILTIN_VPSHRDVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si_maskz, "__builtin_ia32_vpshrdv_v4si_maskz", IX86_BUILTIN_VPSHRDVV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v8di, "__builtin_ia32_vpshrdv_v8di", IX86_BUILTIN_VPSHRDVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v8di_mask, "__builtin_ia32_vpshrdv_v8di_mask", IX86_BUILTIN_VPSHRDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v8di_maskz, "__builtin_ia32_vpshrdv_v8di_maskz", IX86_BUILTIN_VPSHRDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di, "__builtin_ia32_vpshrdv_v8di", IX86_BUILTIN_VPSHRDVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di_mask, "__builtin_ia32_vpshrdv_v8di_mask", IX86_BUILTIN_VPSHRDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di_maskz, "__builtin_ia32_vpshrdv_v8di_maskz", IX86_BUILTIN_VPSHRDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di, "__builtin_ia32_vpshrdv_v4di", IX86_BUILTIN_VPSHRDVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di_mask, "__builtin_ia32_vpshrdv_v4di_mask", IX86_BUILTIN_VPSHRDVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di_maskz, "__builtin_ia32_vpshrdv_v4di_maskz", IX86_BUILTIN_VPSHRDVV4DI_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
@@ -2677,27 +2677,27 @@ BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshr
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v2di_mask, "__builtin_ia32_vpshrdv_v2di_mask", IX86_BUILTIN_VPSHRDVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v2di_maskz, "__builtin_ia32_vpshrdv_v2di_maskz", IX86_BUILTIN_VPSHRDVV2DI_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI)
 
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v32hi, "__builtin_ia32_vpshldv_v32hi", IX86_BUILTIN_VPSHLDVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v32hi_mask, "__builtin_ia32_vpshldv_v32hi_mask", IX86_BUILTIN_VPSHLDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v32hi_maskz, "__builtin_ia32_vpshldv_v32hi_maskz", IX86_BUILTIN_VPSHLDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v32hi, "__builtin_ia32_vpshldv_v32hi", IX86_BUILTIN_VPSHLDVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v32hi_mask, "__builtin_ia32_vpshldv_v32hi_mask", IX86_BUILTIN_VPSHLDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v32hi_maskz, "__builtin_ia32_vpshldv_v32hi_maskz", IX86_BUILTIN_VPSHLDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi, "__builtin_ia32_vpshldv_v16hi", IX86_BUILTIN_VPSHLDVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi_mask, "__builtin_ia32_vpshldv_v16hi_mask", IX86_BUILTIN_VPSHLDVV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi_maskz, "__builtin_ia32_vpshldv_v16hi_maskz", IX86_BUILTIN_VPSHLDVV16HI_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi, "__builtin_ia32_vpshldv_v8hi", IX86_BUILTIN_VPSHLDVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi_mask, "__builtin_ia32_vpshldv_v8hi_mask", IX86_BUILTIN_VPSHLDVV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi_maskz, "__builtin_ia32_vpshldv_v8hi_maskz", IX86_BUILTIN_VPSHLDVV8HI_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v16si, "__builtin_ia32_vpshldv_v16si", IX86_BUILTIN_VPSHLDVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v16si_mask, "__builtin_ia32_vpshldv_v16si_mask", IX86_BUILTIN_VPSHLDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v16si_maskz, "__builtin_ia32_vpshldv_v16si_maskz", IX86_BUILTIN_VPSHLDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si, "__builtin_ia32_vpshldv_v16si", IX86_BUILTIN_VPSHLDVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si_mask, "__builtin_ia32_vpshldv_v16si_mask", IX86_BUILTIN_VPSHLDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si_maskz, "__builtin_ia32_vpshldv_v16si_maskz", IX86_BUILTIN_VPSHLDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si, "__builtin_ia32_vpshldv_v8si", IX86_BUILTIN_VPSHLDVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si_mask, "__builtin_ia32_vpshldv_v8si_mask", IX86_BUILTIN_VPSHLDVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si_maskz, "__builtin_ia32_vpshldv_v8si_maskz", IX86_BUILTIN_VPSHLDVV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si, "__builtin_ia32_vpshldv_v4si", IX86_BUILTIN_VPSHLDVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si_mask, "__builtin_ia32_vpshldv_v4si_mask", IX86_BUILTIN_VPSHLDVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si_maskz, "__builtin_ia32_vpshldv_v4si_maskz", IX86_BUILTIN_VPSHLDVV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v8di, "__builtin_ia32_vpshldv_v8di", IX86_BUILTIN_VPSHLDVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v8di_mask, "__builtin_ia32_vpshldv_v8di_mask", IX86_BUILTIN_VPSHLDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v8di_maskz, "__builtin_ia32_vpshldv_v8di_maskz", IX86_BUILTIN_VPSHLDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di, "__builtin_ia32_vpshldv_v8di", IX86_BUILTIN_VPSHLDVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di_mask, "__builtin_ia32_vpshldv_v8di_mask", IX86_BUILTIN_VPSHLDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di_maskz, "__builtin_ia32_vpshldv_v8di_maskz", IX86_BUILTIN_VPSHLDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di, "__builtin_ia32_vpshldv_v4di", IX86_BUILTIN_VPSHLDVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di_mask, "__builtin_ia32_vpshldv_v4di_mask", IX86_BUILTIN_VPSHLDVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di_maskz, "__builtin_ia32_vpshldv_v4di_maskz", IX86_BUILTIN_VPSHLDVV4DI_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
@@ -2706,20 +2706,20 @@ BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshl
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v2di_maskz, "__builtin_ia32_vpshldv_v2di_maskz", IX86_BUILTIN_VPSHLDVV2DI_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI)
 
 /* GFNI */
-BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vgf2p8affineinvqb_v64qi, "__builtin_ia32_vgf2p8affineinvqb_v64qi", IX86_BUILTIN_VGF2P8AFFINEINVQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT)
-BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vgf2p8affineinvqb_v64qi_mask, "__builtin_ia32_vgf2p8affineinvqb_v64qi_mask", IX86_BUILTIN_VGF2P8AFFINEINVQB512MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vgf2p8affineinvqb_v64qi, "__builtin_ia32_vgf2p8affineinvqb_v64qi", IX86_BUILTIN_VGF2P8AFFINEINVQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT)
+BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vgf2p8affineinvqb_v64qi_mask, "__builtin_ia32_vgf2p8affineinvqb_v64qi_mask", IX86_BUILTIN_VGF2P8AFFINEINVQB512MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI)
 BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX, 0, CODE_FOR_vgf2p8affineinvqb_v32qi, "__builtin_ia32_vgf2p8affineinvqb_v32qi", IX86_BUILTIN_VGF2P8AFFINEINVQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT)
 BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vgf2p8affineinvqb_v32qi_mask, "__builtin_ia32_vgf2p8affineinvqb_v32qi_mask", IX86_BUILTIN_VGF2P8AFFINEINVQB256MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT_V32QI_USI)
 BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_vgf2p8affineinvqb_v16qi, "__builtin_ia32_vgf2p8affineinvqb_v16qi", IX86_BUILTIN_VGF2P8AFFINEINVQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT)
 BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vgf2p8affineinvqb_v16qi_mask, "__builtin_ia32_vgf2p8affineinvqb_v16qi_mask", IX86_BUILTIN_VGF2P8AFFINEINVQB128MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT_V16QI_UHI)
-BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vgf2p8affineqb_v64qi, "__builtin_ia32_vgf2p8affineqb_v64qi", IX86_BUILTIN_VGF2P8AFFINEQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT)
-BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vgf2p8affineqb_v64qi_mask, "__builtin_ia32_vgf2p8affineqb_v64qi_mask", IX86_BUILTIN_VGF2P8AFFINEQB512MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vgf2p8affineqb_v64qi, "__builtin_ia32_vgf2p8affineqb_v64qi", IX86_BUILTIN_VGF2P8AFFINEQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT)
+BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vgf2p8affineqb_v64qi_mask, "__builtin_ia32_vgf2p8affineqb_v64qi_mask", IX86_BUILTIN_VGF2P8AFFINEQB512MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI)
 BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX, 0, CODE_FOR_vgf2p8affineqb_v32qi, "__builtin_ia32_vgf2p8affineqb_v32qi", IX86_BUILTIN_VGF2P8AFFINEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT)
 BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vgf2p8affineqb_v32qi_mask, "__builtin_ia32_vgf2p8affineqb_v32qi_mask", IX86_BUILTIN_VGF2P8AFFINEQB256MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT_V32QI_USI)
 BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_vgf2p8affineqb_v16qi, "__builtin_ia32_vgf2p8affineqb_v16qi", IX86_BUILTIN_VGF2P8AFFINEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT)
 BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vgf2p8affineqb_v16qi_mask, "__builtin_ia32_vgf2p8affineqb_v16qi_mask", IX86_BUILTIN_VGF2P8AFFINEQB128MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT_V16QI_UHI)
-BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vgf2p8mulb_v64qi, "__builtin_ia32_vgf2p8mulb_v64qi", IX86_BUILTIN_VGF2P8MULB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
-BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vgf2p8mulb_v64qi_mask, "__builtin_ia32_vgf2p8mulb_v64qi_mask", IX86_BUILTIN_VGF2P8MULB512MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vgf2p8mulb_v64qi, "__builtin_ia32_vgf2p8mulb_v64qi", IX86_BUILTIN_VGF2P8MULB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
+BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vgf2p8mulb_v64qi_mask, "__builtin_ia32_vgf2p8mulb_v64qi_mask", IX86_BUILTIN_VGF2P8MULB512MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
 BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX, 0, CODE_FOR_vgf2p8mulb_v32qi, "__builtin_ia32_vgf2p8mulb_v32qi", IX86_BUILTIN_VGF2P8MULB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
 BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vgf2p8mulb_v32qi_mask, "__builtin_ia32_vgf2p8mulb_v32qi_mask", IX86_BUILTIN_VGF2P8MULB256MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI)
 BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_vgf2p8mulb_v16qi, "__builtin_ia32_vgf2p8mulb_v16qi", IX86_BUILTIN_VGF2P8MULB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI)
@@ -2727,9 +2727,9 @@ BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vgf2p8mulb_v
 
 /* AVX512_VNNI */
 
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpbusd_v16si, "__builtin_ia32_vpdpbusd_v16si", IX86_BUILTIN_VPDPBUSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpbusd_v16si_mask, "__builtin_ia32_vpdpbusd_v16si_mask", IX86_BUILTIN_VPDPBUSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpbusd_v16si_maskz, "__builtin_ia32_vpdpbusd_v16si_maskz", IX86_BUILTIN_VPDPBUSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si, "__builtin_ia32_vpdpbusd_v16si", IX86_BUILTIN_VPDPBUSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si_mask, "__builtin_ia32_vpdpbusd_v16si_mask", IX86_BUILTIN_VPDPBUSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si_maskz, "__builtin_ia32_vpdpbusd_v16si_maskz", IX86_BUILTIN_VPDPBUSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpbusd_v8si, "__builtin_ia32_vpdpbusd_v8si", IX86_BUILTIN_VPDPBUSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
 BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si_mask, "__builtin_ia32_vpdpbusd_v8si_mask", IX86_BUILTIN_VPDPBUSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si_maskz, "__builtin_ia32_vpdpbusd_v8si_maskz", IX86_BUILTIN_VPDPBUSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
@@ -2737,9 +2737,9 @@ BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_A
 BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si_mask, "__builtin_ia32_vpdpbusd_v4si_mask", IX86_BUILTIN_VPDPBUSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si_maskz, "__builtin_ia32_vpdpbusd_v4si_maskz", IX86_BUILTIN_VPDPBUSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
 
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpbusds_v16si, "__builtin_ia32_vpdpbusds_v16si", IX86_BUILTIN_VPDPBUSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpbusds_v16si_mask, "__builtin_ia32_vpdpbusds_v16si_mask", IX86_BUILTIN_VPDPBUSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpbusds_v16si_maskz, "__builtin_ia32_vpdpbusds_v16si_maskz", IX86_BUILTIN_VPDPBUSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si, "__builtin_ia32_vpdpbusds_v16si", IX86_BUILTIN_VPDPBUSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si_mask, "__builtin_ia32_vpdpbusds_v16si_mask", IX86_BUILTIN_VPDPBUSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si_maskz, "__builtin_ia32_vpdpbusds_v16si_maskz", IX86_BUILTIN_VPDPBUSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpbusds_v8si, "__builtin_ia32_vpdpbusds_v8si", IX86_BUILTIN_VPDPBUSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
 BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si_mask, "__builtin_ia32_vpdpbusds_v8si_mask", IX86_BUILTIN_VPDPBUSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si_maskz, "__builtin_ia32_vpdpbusds_v8si_maskz", IX86_BUILTIN_VPDPBUSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
@@ -2747,9 +2747,9 @@ BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_A
 BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si_mask, "__builtin_ia32_vpdpbusds_v4si_mask", IX86_BUILTIN_VPDPBUSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si_maskz, "__builtin_ia32_vpdpbusds_v4si_maskz", IX86_BUILTIN_VPDPBUSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
 
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpwssd_v16si, "__builtin_ia32_vpdpwssd_v16si", IX86_BUILTIN_VPDPWSSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpwssd_v16si_mask, "__builtin_ia32_vpdpwssd_v16si_mask", IX86_BUILTIN_VPDPWSSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpwssd_v16si_maskz, "__builtin_ia32_vpdpwssd_v16si_maskz", IX86_BUILTIN_VPDPWSSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si, "__builtin_ia32_vpdpwssd_v16si", IX86_BUILTIN_VPDPWSSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si_mask, "__builtin_ia32_vpdpwssd_v16si_mask", IX86_BUILTIN_VPDPWSSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si_maskz, "__builtin_ia32_vpdpwssd_v16si_maskz", IX86_BUILTIN_VPDPWSSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpwssd_v8si, "__builtin_ia32_vpdpwssd_v8si", IX86_BUILTIN_VPDPWSSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
 BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si_mask, "__builtin_ia32_vpdpwssd_v8si_mask", IX86_BUILTIN_VPDPWSSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si_maskz, "__builtin_ia32_vpdpwssd_v8si_maskz", IX86_BUILTIN_VPDPWSSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
@@ -2757,9 +2757,9 @@ BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_A
 BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si_mask, "__builtin_ia32_vpdpwssd_v4si_mask", IX86_BUILTIN_VPDPWSSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si_maskz, "__builtin_ia32_vpdpwssd_v4si_maskz", IX86_BUILTIN_VPDPWSSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
 
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpwssds_v16si, "__builtin_ia32_vpdpwssds_v16si", IX86_BUILTIN_VPDPWSSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpwssds_v16si_mask, "__builtin_ia32_vpdpwssds_v16si_mask", IX86_BUILTIN_VPDPWSSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpwssds_v16si_maskz, "__builtin_ia32_vpdpwssds_v16si_maskz", IX86_BUILTIN_VPDPWSSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si, "__builtin_ia32_vpdpwssds_v16si", IX86_BUILTIN_VPDPWSSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si_mask, "__builtin_ia32_vpdpwssds_v16si_mask", IX86_BUILTIN_VPDPWSSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si_maskz, "__builtin_ia32_vpdpwssds_v16si_maskz", IX86_BUILTIN_VPDPWSSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpwssds_v8si, "__builtin_ia32_vpdpwssds_v8si", IX86_BUILTIN_VPDPWSSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
 BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si_mask, "__builtin_ia32_vpdpwssds_v8si_mask", IX86_BUILTIN_VPDPWSSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si_maskz, "__builtin_ia32_vpdpwssds_v8si_maskz", IX86_BUILTIN_VPDPWSSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
@@ -2798,13 +2798,13 @@ BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT16 | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpd
 /* VPCLMULQDQ */
 BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpclmulqdq_v2di, "__builtin_ia32_vpclmulqdq_v2di", IX86_BUILTIN_VPCLMULQDQ2, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT)
 BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX, 0, CODE_FOR_vpclmulqdq_v4di, "__builtin_ia32_vpclmulqdq_v4di", IX86_BUILTIN_VPCLMULQDQ4, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT)
-BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpclmulqdq_v8di, "__builtin_ia32_vpclmulqdq_v8di", IX86_BUILTIN_VPCLMULQDQ8, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT)
+BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vpclmulqdq_v8di, "__builtin_ia32_vpclmulqdq_v8di", IX86_BUILTIN_VPCLMULQDQ8, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT)
 
 /* VPOPCNTDQ */
-BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv16si, "__builtin_ia32_vpopcountd_v16si", IX86_BUILTIN_VPOPCOUNTDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv16si_mask, "__builtin_ia32_vpopcountd_v16si_mask", IX86_BUILTIN_VPOPCOUNTDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv8di, "__builtin_ia32_vpopcountq_v8di", IX86_BUILTIN_VPOPCOUNTQV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI)
-BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv8di_mask, "__builtin_ia32_vpopcountq_v8di_mask", IX86_BUILTIN_VPOPCOUNTQV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, 0, CODE_FOR_vpopcountv16si, "__builtin_ia32_vpopcountd_v16si", IX86_BUILTIN_VPOPCOUNTDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, 0, CODE_FOR_vpopcountv16si_mask, "__builtin_ia32_vpopcountd_v16si_mask", IX86_BUILTIN_VPOPCOUNTDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, 0, CODE_FOR_vpopcountv8di, "__builtin_ia32_vpopcountq_v8di", IX86_BUILTIN_VPOPCOUNTQV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, 0, CODE_FOR_vpopcountv8di_mask, "__builtin_ia32_vpopcountq_v8di_mask", IX86_BUILTIN_VPOPCOUNTQV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
 
 BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv4di, "__builtin_ia32_vpopcountq_v4di", IX86_BUILTIN_VPOPCOUNTQV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI)
 BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv4di_mask, "__builtin_ia32_vpopcountq_v4di_mask", IX86_BUILTIN_VPOPCOUNTQV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI)
@@ -2816,21 +2816,21 @@ BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_v
 BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv8si_mask, "__builtin_ia32_vpopcountd_v8si_mask", IX86_BUILTIN_VPOPCOUNTDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UHI)
 
 /* BITALG */
-BDESC (OPTION_MASK_ISA_AVX512BITALG, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv64qi, "__builtin_ia32_vpopcountb_v64qi", IX86_BUILTIN_VPOPCOUNTBV64QI, UNKNOWN, (int) V64QI_FTYPE_V64QI)
-BDESC (OPTION_MASK_ISA_AVX512BITALG, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv64qi_mask, "__builtin_ia32_vpopcountb_v64qi_mask", IX86_BUILTIN_VPOPCOUNTBV64QI_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, 0, CODE_FOR_vpopcountv64qi, "__builtin_ia32_vpopcountb_v64qi", IX86_BUILTIN_VPOPCOUNTBV64QI, UNKNOWN, (int) V64QI_FTYPE_V64QI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, 0, CODE_FOR_vpopcountv64qi_mask, "__builtin_ia32_vpopcountb_v64qi_mask", IX86_BUILTIN_VPOPCOUNTBV64QI_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
 BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv32qi, "__builtin_ia32_vpopcountb_v32qi", IX86_BUILTIN_VPOPCOUNTBV32QI, UNKNOWN, (int) V32QI_FTYPE_V32QI)
 BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv32qi_mask, "__builtin_ia32_vpopcountb_v32qi_mask", IX86_BUILTIN_VPOPCOUNTBV32QI_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI)
 BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv16qi, "__builtin_ia32_vpopcountb_v16qi", IX86_BUILTIN_VPOPCOUNTBV16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI)
 BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv16qi_mask, "__builtin_ia32_vpopcountb_v16qi_mask", IX86_BUILTIN_VPOPCOUNTBV16QI_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI)
 
-BDESC (OPTION_MASK_ISA_AVX512BITALG, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv32hi, "__builtin_ia32_vpopcountw_v32hi", IX86_BUILTIN_VPOPCOUNTWV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI)
-BDESC (OPTION_MASK_ISA_AVX512BITALG, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv32hi_mask, "__builtin_ia32_vpopcountw_v32hi_mask", IX86_BUILTIN_VPOPCOUNTQV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, 0, CODE_FOR_vpopcountv32hi, "__builtin_ia32_vpopcountw_v32hi", IX86_BUILTIN_VPOPCOUNTWV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, 0, CODE_FOR_vpopcountv32hi_mask, "__builtin_ia32_vpopcountw_v32hi_mask", IX86_BUILTIN_VPOPCOUNTQV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
 BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv16hi, "__builtin_ia32_vpopcountw_v16hi", IX86_BUILTIN_VPOPCOUNTWV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI)
 BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv16hi_mask, "__builtin_ia32_vpopcountw_v16hi_mask", IX86_BUILTIN_VPOPCOUNTQV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv8hi, "__builtin_ia32_vpopcountw_v8hi", IX86_BUILTIN_VPOPCOUNTWV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI)
 BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv8hi_mask, "__builtin_ia32_vpopcountw_v8hi_mask", IX86_BUILTIN_VPOPCOUNTQV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI)
 
-BDESC (OPTION_MASK_ISA_AVX512BITALG, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512vl_vpshufbitqmbv64qi_mask, "__builtin_ia32_vpshufbitqmb512_mask", IX86_BUILTIN_VPSHUFBITQMB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, 0, CODE_FOR_avx512vl_vpshufbitqmbv64qi_mask, "__builtin_ia32_vpshufbitqmb512_mask", IX86_BUILTIN_VPSHUFBITQMB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
 BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vpshufbitqmbv32qi_mask, "__builtin_ia32_vpshufbitqmb256_mask", IX86_BUILTIN_VPSHUFBITQMB256_MASK, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI)
 BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vpshufbitqmbv16qi_mask, "__builtin_ia32_vpshufbitqmb128_mask", IX86_BUILTIN_VPSHUFBITQMB128_MASK, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI)
 
@@ -2840,39 +2840,39 @@ BDESC (0, OPTION_MASK_ISA2_RDPID, CODE_FOR_rdpid, "__builtin_ia32_rdpid", IX86_B
 /* VAES.  */
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdec_v16qi, "__builtin_ia32_vaesdec_v16qi", IX86_BUILTIN_VAESDEC16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI)
 BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdec_v32qi, "__builtin_ia32_vaesdec_v32qi", IX86_BUILTIN_VAESDEC32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesdec_v64qi, "__builtin_ia32_vaesdec_v64qi", IX86_BUILTIN_VAESDEC64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
+BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdec_v64qi, "__builtin_ia32_vaesdec_v64qi", IX86_BUILTIN_VAESDEC64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdeclast_v16qi, "__builtin_ia32_vaesdeclast_v16qi", IX86_BUILTIN_VAESDECLAST16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI)
 BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdeclast_v32qi, "__builtin_ia32_vaesdeclast_v32qi", IX86_BUILTIN_VAESDECLAST32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesdeclast_v64qi, "__builtin_ia32_vaesdeclast_v64qi", IX86_BUILTIN_VAESDECLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
+BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdeclast_v64qi, "__builtin_ia32_vaesdeclast_v64qi", IX86_BUILTIN_VAESDECLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenc_v16qi, "__builtin_ia32_vaesenc_v16qi", IX86_BUILTIN_VAESENC16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI)
 BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenc_v32qi, "__builtin_ia32_vaesenc_v32qi", IX86_BUILTIN_VAESENC32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesenc_v64qi, "__builtin_ia32_vaesenc_v64qi", IX86_BUILTIN_VAESENC64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
+BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenc_v64qi, "__builtin_ia32_vaesenc_v64qi", IX86_BUILTIN_VAESENC64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenclast_v16qi, "__builtin_ia32_vaesenclast_v16qi", IX86_BUILTIN_VAESENCLAST16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI)
 BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenclast_v32qi, "__builtin_ia32_vaesenclast_v32qi", IX86_BUILTIN_VAESENCLAST32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesenclast_v64qi, "__builtin_ia32_vaesenclast_v64qi", IX86_BUILTIN_VAESENCLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
+BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenclast_v64qi, "__builtin_ia32_vaesenclast_v64qi", IX86_BUILTIN_VAESENCLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
 
 /* BF16 */
-BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf, "__builtin_ia32_cvtne2ps2bf16_v32bf", IX86_BUILTIN_CVTNE2PS2BF16_V32BF, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF)
-BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf_mask, "__builtin_ia32_cvtne2ps2bf16_v32bf_mask", IX86_BUILTIN_CVTNE2PS2BF16_V32BF_MASK, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF_V32BF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf_maskz, "__builtin_ia32_cvtne2ps2bf16_v32bf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V32BF_MASKZ, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf, "__builtin_ia32_cvtne2ps2bf16_v32bf", IX86_BUILTIN_CVTNE2PS2BF16_V32BF, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF)
+BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf_mask, "__builtin_ia32_cvtne2ps2bf16_v32bf_mask", IX86_BUILTIN_CVTNE2PS2BF16_V32BF_MASK, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf_maskz, "__builtin_ia32_cvtne2ps2bf16_v32bf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V32BF_MASKZ, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF_USI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v16bf, "__builtin_ia32_cvtne2ps2bf16_v16bf", IX86_BUILTIN_CVTNE2PS2BF16_V16BF, UNKNOWN, (int) V16BF_FTYPE_V8SF_V8SF)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v16bf_mask, "__builtin_ia32_cvtne2ps2bf16_v16bf_mask", IX86_BUILTIN_CVTNE2PS2BF16_V16BF_MASK, UNKNOWN, (int) V16BF_FTYPE_V8SF_V8SF_V16BF_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v16bf_maskz, "__builtin_ia32_cvtne2ps2bf16_v16bf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V16BF_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V8SF_V8SF_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v8bf, "__builtin_ia32_cvtne2ps2bf16_v8bf", IX86_BUILTIN_CVTNE2PS2BF16_V8BF, UNKNOWN, (int) V8BF_FTYPE_V4SF_V4SF)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v8bf_mask, "__builtin_ia32_cvtne2ps2bf16_v8bf_mask", IX86_BUILTIN_CVTNE2PS2BF16_V8BF_MASK, UNKNOWN, (int) V8BF_FTYPE_V4SF_V4SF_V8BF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v8bf_maskz, "__builtin_ia32_cvtne2ps2bf16_v8bf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V8BF_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V4SF_V4SF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtneps2bf16_v16sf, "__builtin_ia32_cvtneps2bf16_v16sf", IX86_BUILTIN_CVTNEPS2BF16_V16SF, UNKNOWN, (int) V16BF_FTYPE_V16SF)
-BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtneps2bf16_v16sf_mask, "__builtin_ia32_cvtneps2bf16_v16sf_mask", IX86_BUILTIN_CVTNEPS2BF16_V16SF_MASK, UNKNOWN, (int) V16BF_FTYPE_V16SF_V16BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtneps2bf16_v16sf_maskz, "__builtin_ia32_cvtneps2bf16_v16sf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V16SF_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V16SF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v16sf, "__builtin_ia32_cvtneps2bf16_v16sf", IX86_BUILTIN_CVTNEPS2BF16_V16SF, UNKNOWN, (int) V16BF_FTYPE_V16SF)
+BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v16sf_mask, "__builtin_ia32_cvtneps2bf16_v16sf_mask", IX86_BUILTIN_CVTNEPS2BF16_V16SF_MASK, UNKNOWN, (int) V16BF_FTYPE_V16SF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v16sf_maskz, "__builtin_ia32_cvtneps2bf16_v16sf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V16SF_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V16SF_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXNECONVERT | OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_vcvtneps2bf16_v8sf, "__builtin_ia32_cvtneps2bf16_v8sf", IX86_BUILTIN_CVTNEPS2BF16_V8SF, UNKNOWN, (int) V8BF_FTYPE_V8SF)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v8sf_mask, "__builtin_ia32_cvtneps2bf16_v8sf_mask", IX86_BUILTIN_CVTNEPS2BF16_V8SF_MASK, UNKNOWN, (int) V8BF_FTYPE_V8SF_V8BF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v8sf_maskz, "__builtin_ia32_cvtneps2bf16_v8sf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V8SF_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V8SF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXNECONVERT | OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_vcvtneps2bf16_v4sf, "__builtin_ia32_cvtneps2bf16_v4sf", IX86_BUILTIN_CVTNEPS2BF16_V4SF, UNKNOWN, (int) V8BF_FTYPE_V4SF)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v4sf_mask, "__builtin_ia32_cvtneps2bf16_v4sf_mask", IX86_BUILTIN_CVTNEPS2BF16_V4SF_MASK, UNKNOWN, (int) V8BF_FTYPE_V4SF_V8BF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v4sf_maskz, "__builtin_ia32_cvtneps2bf16_v4sf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V4SF_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V4SF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_dpbf16ps_v16sf, "__builtin_ia32_dpbf16ps_v16sf", IX86_BUILTIN_DPBF16PS_V16SF, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32BF_V32BF)
-BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_dpbf16ps_v16sf_mask, "__builtin_ia32_dpbf16ps_v16sf_mask", IX86_BUILTIN_DPBF16PS_V16SF_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32BF_V32BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_dpbf16ps_v16sf_maskz, "__builtin_ia32_dpbf16ps_v16sf_maskz", IX86_BUILTIN_DPBF16PS_V16SF_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32BF_V32BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v16sf, "__builtin_ia32_dpbf16ps_v16sf", IX86_BUILTIN_DPBF16PS_V16SF, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32BF_V32BF)
+BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v16sf_mask, "__builtin_ia32_dpbf16ps_v16sf_mask", IX86_BUILTIN_DPBF16PS_V16SF_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32BF_V32BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v16sf_maskz, "__builtin_ia32_dpbf16ps_v16sf_maskz", IX86_BUILTIN_DPBF16PS_V16SF_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32BF_V32BF_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v8sf, "__builtin_ia32_dpbf16ps_v8sf", IX86_BUILTIN_DPBF16PS_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V16BF_V16BF)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v8sf_mask, "__builtin_ia32_dpbf16ps_v8sf_mask", IX86_BUILTIN_DPBF16PS_V8SF_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V16BF_V16BF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v8sf_maskz, "__builtin_ia32_dpbf16ps_v8sf_maskz", IX86_BUILTIN_DPBF16PS_V8SF_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V16BF_V16BF_UQI)
@@ -2885,40 +2885,40 @@ BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_extendbfsf2_1, "__builtin_ia32_cvtbf2sf
 /* AVX512FP16.  */
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv8hf3_mask, "__builtin_ia32_addph128_mask", IX86_BUILTIN_ADDPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv16hf3_mask, "__builtin_ia32_addph256_mask", IX86_BUILTIN_ADDPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv32hf3_mask, "__builtin_ia32_addph512_mask", IX86_BUILTIN_ADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv32hf3_mask, "__builtin_ia32_addph512_mask", IX86_BUILTIN_ADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv8hf3_mask, "__builtin_ia32_subph128_mask", IX86_BUILTIN_SUBPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv16hf3_mask, "__builtin_ia32_subph256_mask", IX86_BUILTIN_SUBPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv32hf3_mask, "__builtin_ia32_subph512_mask", IX86_BUILTIN_SUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv32hf3_mask, "__builtin_ia32_subph512_mask", IX86_BUILTIN_SUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv8hf3_mask, "__builtin_ia32_mulph128_mask", IX86_BUILTIN_MULPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv16hf3_mask, "__builtin_ia32_mulph256_mask", IX86_BUILTIN_MULPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_mulv32hf3_mask, "__builtin_ia32_mulph512_mask", IX86_BUILTIN_MULPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv32hf3_mask, "__builtin_ia32_mulph512_mask", IX86_BUILTIN_MULPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv8hf3_mask, "__builtin_ia32_divph128_mask", IX86_BUILTIN_DIVPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv16hf3_mask, "__builtin_ia32_divph256_mask", IX86_BUILTIN_DIVPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_divv32hf3_mask, "__builtin_ia32_divph512_mask", IX86_BUILTIN_DIVPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv32hf3_mask, "__builtin_ia32_divph512_mask", IX86_BUILTIN_DIVPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmaddv8hf3_mask, "__builtin_ia32_addsh_mask", IX86_BUILTIN_ADDSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsubv8hf3_mask, "__builtin_ia32_subsh_mask", IX86_BUILTIN_SUBSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmmulv8hf3_mask, "__builtin_ia32_mulsh_mask", IX86_BUILTIN_MULSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmdivv8hf3_mask, "__builtin_ia32_divsh_mask", IX86_BUILTIN_DIVSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv8hf3_mask, "__builtin_ia32_maxph128_mask", IX86_BUILTIN_MAXPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv16hf3_mask, "__builtin_ia32_maxph256_mask", IX86_BUILTIN_MAXPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv32hf3_mask, "__builtin_ia32_maxph512_mask", IX86_BUILTIN_MAXPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv32hf3_mask, "__builtin_ia32_maxph512_mask", IX86_BUILTIN_MAXPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv8hf3_mask, "__builtin_ia32_minph128_mask", IX86_BUILTIN_MINPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv16hf3_mask, "__builtin_ia32_minph256_mask", IX86_BUILTIN_MINPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv32hf3_mask, "__builtin_ia32_minph512_mask", IX86_BUILTIN_MINPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv32hf3_mask, "__builtin_ia32_minph512_mask", IX86_BUILTIN_MINPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsmaxv8hf3_mask, "__builtin_ia32_maxsh_mask", IX86_BUILTIN_MAXSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsminv8hf3_mask, "__builtin_ia32_minsh_mask", IX86_BUILTIN_MINSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_cmpv8hf3_mask, "__builtin_ia32_cmpph128_mask", IX86_BUILTIN_CMPPH128_MASK, UNKNOWN, (int) UQI_FTYPE_V8HF_V8HF_INT_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_cmpv16hf3_mask, "__builtin_ia32_cmpph256_mask", IX86_BUILTIN_CMPPH256_MASK, UNKNOWN, (int) UHI_FTYPE_V16HF_V16HF_INT_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cmpv32hf3_mask, "__builtin_ia32_cmpph512_mask", IX86_BUILTIN_CMPPH512_MASK, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_cmpv32hf3_mask, "__builtin_ia32_cmpph512_mask", IX86_BUILTIN_CMPPH512_MASK, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv8hf2_mask, "__builtin_ia32_sqrtph128_mask", IX86_BUILTIN_SQRTPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv16hf2_mask, "__builtin_ia32_sqrtph256_mask", IX86_BUILTIN_SQRTPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv8hf2_mask, "__builtin_ia32_rsqrtph128_mask", IX86_BUILTIN_RSQRTPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv16hf2_mask, "__builtin_ia32_rsqrtph256_mask", IX86_BUILTIN_RSQRTPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_rsqrtv32hf2_mask, "__builtin_ia32_rsqrtph512_mask", IX86_BUILTIN_RSQRTPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv32hf2_mask, "__builtin_ia32_rsqrtph512_mask", IX86_BUILTIN_RSQRTPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmrsqrtv8hf2_mask, "__builtin_ia32_rsqrtsh_mask", IX86_BUILTIN_RSQRTSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv8hf2_mask, "__builtin_ia32_rcpph128_mask", IX86_BUILTIN_RCPPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv16hf2_mask, "__builtin_ia32_rcpph256_mask", IX86_BUILTIN_RCPPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_rcpv32hf2_mask, "__builtin_ia32_rcpph512_mask", IX86_BUILTIN_RCPPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv32hf2_mask, "__builtin_ia32_rcpph512_mask", IX86_BUILTIN_RCPPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmrcpv8hf2_mask, "__builtin_ia32_rcpsh_mask", IX86_BUILTIN_RCPSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_scalefv8hf_mask, "__builtin_ia32_scalefph128_mask", IX86_BUILTIN_SCALEFPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_scalefv16hf_mask, "__builtin_ia32_scalefph256_mask", IX86_BUILTIN_SCALEFPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
@@ -2928,7 +2928,7 @@ BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp1
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_rndscalev16hf_mask, "__builtin_ia32_rndscaleph256_mask", IX86_BUILTIN_RNDSCALEPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_INT_V16HF_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512dq_fpclassv16hf_mask, "__builtin_ia32_fpclassph256_mask", IX86_BUILTIN_FPCLASSPH256, UNKNOWN, (int) HI_FTYPE_V16HF_INT_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512dq_fpclassv8hf_mask, "__builtin_ia32_fpclassph128_mask", IX86_BUILTIN_FPCLASSPH128, UNKNOWN, (int) QI_FTYPE_V8HF_INT_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_fpclassv32hf_mask, "__builtin_ia32_fpclassph512_mask", IX86_BUILTIN_FPCLASSPH512, UNKNOWN, (int) SI_FTYPE_V32HF_INT_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512dq_fpclassv32hf_mask, "__builtin_ia32_fpclassph512_mask", IX86_BUILTIN_FPCLASSPH512, UNKNOWN, (int) SI_FTYPE_V32HF_INT_USI)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512dq_vmfpclassv8hf_mask, "__builtin_ia32_fpclasssh_mask", IX86_BUILTIN_FPCLASSSH_MASK, UNKNOWN, (int) QI_FTYPE_V8HF_INT_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_getexpv16hf_mask, "__builtin_ia32_getexpph256_mask", IX86_BUILTIN_GETEXPPH256, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_getexpv8hf_mask, "__builtin_ia32_getexpph128_mask", IX86_BUILTIN_GETEXPPH128, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
@@ -3366,26 +3366,26 @@ BDESC_END (ARGS, ROUND_ARGS)
 
 /* AVX512F.  */
 BDESC_FIRST (round_args, ROUND_ARGS,
-       OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+       OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmaddv2df3_mask_round, "__builtin_ia32_addsd_mask_round", IX86_BUILTIN_ADDSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmaddv4sf3_mask_round, "__builtin_ia32_addss_mask_round", IX86_BUILTIN_ADDSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) UQI_FTYPE_V8DF_V8DF_INT_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) UHI_FTYPE_V16SF_V16SF_INT_UHI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) UQI_FTYPE_V8DF_V8DF_INT_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) UHI_FTYPE_V16SF_V16SF_INT_UHI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) UQI_FTYPE_V2DF_V2DF_INT_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) UQI_FTYPE_V4SF_V4SF_INT_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtpd2ps512_mask_round,  "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_fixuns_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vcvtph2ps512_mask_round,  "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fixuns_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cvtpd2ps512_mask_round,  "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fixuns_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vcvtph2ps512_mask_round,  "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fixuns_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtsd2ss_mask_round, "__builtin_ia32_cvtsd2ss_mask_round", IX86_BUILTIN_CVTSD2SS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_V4SF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT)
@@ -3393,72 +3393,72 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_
 BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtss2sd_mask_round, "__builtin_ia32_cvtss2sd_mask_round", IX86_BUILTIN_CVTSS2SD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_V2DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fixuns_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fixuns_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatunsv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_unspec_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_unspec_fixuns_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_unspec_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_unspec_fixuns_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_floatunsv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT)
 BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmdivv2df3_mask_round, "__builtin_ia32_divsd_mask_round", IX86_BUILTIN_DIVSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmdivv4sf3_mask_round, "__builtin_ia32_divss_mask_round", IX86_BUILTIN_DIVSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sgetexpv2df_mask_round, "__builtin_ia32_getexpsd_mask_round", IX86_BUILTIN_GETEXPSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sgetexpv4sf_mask_round, "__builtin_ia32_getexpss_mask_round", IX86_BUILTIN_GETEXPSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vgetmantv2df_mask_round, "__builtin_ia32_getmantsd_mask_round", IX86_BUILTIN_GETMANTSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vgetmantv4sf_mask_round, "__builtin_ia32_getmantss_mask_round", IX86_BUILTIN_GETMANTSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsmaxv2df3_mask_round, "__builtin_ia32_maxsd_mask_round", IX86_BUILTIN_MAXSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmsmaxv4sf3_mask_round, "__builtin_ia32_maxss_mask_round", IX86_BUILTIN_MAXSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsminv2df3_mask_round, "__builtin_ia32_minsd_mask_round", IX86_BUILTIN_MINSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmsminv4sf3_mask_round, "__builtin_ia32_minss_mask_round", IX86_BUILTIN_MINSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmmulv2df3_mask_round, "__builtin_ia32_mulsd_mask_round", IX86_BUILTIN_MULSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmmulv4sf3_mask_round, "__builtin_ia32_mulss_mask_round", IX86_BUILTIN_MULSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rndscalev2df_mask_round, "__builtin_ia32_rndscalesd_mask_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rndscalev4sf_mask_round, "__builtin_ia32_rndscaless_mask_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmscalefv2df_mask_round, "__builtin_ia32_scalefsd_mask_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmscalefv4sf_mask_round, "__builtin_ia32_scalefss_mask_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsqrtv2df2_mask_round, "__builtin_ia32_sqrtsd_mask_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmsqrtv4sf2_mask_round, "__builtin_ia32_sqrtss_mask_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsubv2df3_mask_round, "__builtin_ia32_subsd_mask_round", IX86_BUILTIN_SUBSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
@@ -3479,12 +3479,12 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_unspec_sse_cvttss2si_round, "__built
 BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_unspec_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_unspec_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_unspec_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v2df_mask_round, "__builtin_ia32_vfmaddsd3_mask", IX86_BUILTIN_VFMADDSD3_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
@@ -3495,100 +3495,100 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v4sf_mask_round, "__
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v4sf_mask3_round, "__builtin_ia32_vfmaddss3_mask3", IX86_BUILTIN_VFMADDSS3_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v4sf_maskz_round, "__builtin_ia32_vfmaddss3_maskz", IX86_BUILTIN_VFMADDSS3_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmsub_v4sf_mask3_round, "__builtin_ia32_vfmsubss3_mask3", IX86_BUILTIN_VFMSUBSS3_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsub_v8df_mask_round, "__builtin_ia32_vfmsubpd512_mask", IX86_BUILTIN_VFMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsub_v8df_maskz_round, "__builtin_ia32_vfmsubpd512_maskz", IX86_BUILTIN_VFMSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsub_v16sf_mask_round, "__builtin_ia32_vfmsubps512_mask", IX86_BUILTIN_VFMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsub_v16sf_maskz_round, "__builtin_ia32_vfmsubps512_maskz", IX86_BUILTIN_VFMSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmadd_v8df_mask3_round, "__builtin_ia32_vfnmaddpd512_mask3", IX86_BUILTIN_VFNMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmadd_v8df_maskz_round, "__builtin_ia32_vfnmaddpd512_maskz", IX86_BUILTIN_VFNMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmadd_v16sf_mask3_round, "__builtin_ia32_vfnmaddps512_mask3", IX86_BUILTIN_VFNMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmadd_v16sf_maskz_round, "__builtin_ia32_vfnmaddps512_maskz", IX86_BUILTIN_VFNMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v8df_maskz_round, "__builtin_ia32_vfnmsubpd512_maskz", IX86_BUILTIN_VFNMSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v16sf_maskz_round, "__builtin_ia32_vfnmsubps512_maskz", IX86_BUILTIN_VFNMSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsub_v8df_mask_round, "__builtin_ia32_vfmsubpd512_mask", IX86_BUILTIN_VFMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsub_v8df_maskz_round, "__builtin_ia32_vfmsubpd512_maskz", IX86_BUILTIN_VFMSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsub_v16sf_mask_round, "__builtin_ia32_vfmsubps512_mask", IX86_BUILTIN_VFMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsub_v16sf_maskz_round, "__builtin_ia32_vfmsubps512_maskz", IX86_BUILTIN_VFMSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmadd_v8df_mask3_round, "__builtin_ia32_vfnmaddpd512_mask3", IX86_BUILTIN_VFNMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmadd_v8df_maskz_round, "__builtin_ia32_vfnmaddpd512_maskz", IX86_BUILTIN_VFNMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmadd_v16sf_mask3_round, "__builtin_ia32_vfnmaddps512_mask3", IX86_BUILTIN_VFNMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmadd_v16sf_maskz_round, "__builtin_ia32_vfnmaddps512_maskz", IX86_BUILTIN_VFNMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmsub_v8df_maskz_round, "__builtin_ia32_vfnmsubpd512_maskz", IX86_BUILTIN_VFNMSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmsub_v16sf_maskz_round, "__builtin_ia32_vfnmsubps512_maskz", IX86_BUILTIN_VFNMSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
 
 /* AVX512DQ.  */
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_reducepv8df_mask_round, "__builtin_ia32_reducepd512_mask_round", IX86_BUILTIN_REDUCEPD512_MASK_ROUND, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_reducepv16sf_mask_round, "__builtin_ia32_reduceps512_mask_round", IX86_BUILTIN_REDUCEPS512_MASK_ROUND, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducepv8df_mask_round, "__builtin_ia32_reducepd512_mask_round", IX86_BUILTIN_REDUCEPD512_MASK_ROUND, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducepv16sf_mask_round, "__builtin_ia32_reduceps512_mask_round", IX86_BUILTIN_REDUCEPS512_MASK_ROUND, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI_INT)
 BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducesv2df_mask_round, "__builtin_ia32_reducesd_mask_round", IX86_BUILTIN_REDUCESD128_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducesv4sf_mask_round, "__builtin_ia32_reducess_mask_round", IX86_BUILTIN_REDUCESS128_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangesv2df_mask_round, "__builtin_ia32_rangesd128_mask_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangesv4sf_mask_round, "__builtin_ia32_rangess128_mask_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_fixuns_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatunsv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatunsv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fixuns_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fixuns_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_fixuns_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_floatunsv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_floatunsv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_unspec_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_unspec_fixuns_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_unspec_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_unspec_fixuns_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT)
 
 /* AVX512FP16.  */
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv32hf3_mask_round, "__builtin_ia32_addph512_mask_round", IX86_BUILTIN_ADDPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv32hf3_mask_round, "__builtin_ia32_subph512_mask_round", IX86_BUILTIN_SUBPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_mulv32hf3_mask_round, "__builtin_ia32_mulph512_mask_round", IX86_BUILTIN_MULPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_divv32hf3_mask_round, "__builtin_ia32_divph512_mask_round", IX86_BUILTIN_DIVPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv32hf3_mask_round, "__builtin_ia32_addph512_mask_round", IX86_BUILTIN_ADDPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv32hf3_mask_round, "__builtin_ia32_subph512_mask_round", IX86_BUILTIN_SUBPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv32hf3_mask_round, "__builtin_ia32_mulph512_mask_round", IX86_BUILTIN_MULPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv32hf3_mask_round, "__builtin_ia32_divph512_mask_round", IX86_BUILTIN_DIVPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmaddv8hf3_mask_round, "__builtin_ia32_addsh_mask_round", IX86_BUILTIN_ADDSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsubv8hf3_mask_round, "__builtin_ia32_subsh_mask_round", IX86_BUILTIN_SUBSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmmulv8hf3_mask_round, "__builtin_ia32_mulsh_mask_round", IX86_BUILTIN_MULSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmdivv8hf3_mask_round, "__builtin_ia32_divsh_mask_round", IX86_BUILTIN_DIVSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv32hf3_mask_round, "__builtin_ia32_maxph512_mask_round", IX86_BUILTIN_MAXPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv32hf3_mask_round, "__builtin_ia32_minph512_mask_round", IX86_BUILTIN_MINPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv32hf3_mask_round, "__builtin_ia32_maxph512_mask_round", IX86_BUILTIN_MAXPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv32hf3_mask_round, "__builtin_ia32_minph512_mask_round", IX86_BUILTIN_MINPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsmaxv8hf3_mask_round, "__builtin_ia32_maxsh_mask_round", IX86_BUILTIN_MAXSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsminv8hf3_mask_round, "__builtin_ia32_minsh_mask_round", IX86_BUILTIN_MINSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cmpv32hf3_mask_round, "__builtin_ia32_cmpph512_mask_round", IX86_BUILTIN_CMPPH512_MASK_ROUND, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_cmpv32hf3_mask_round, "__builtin_ia32_cmpph512_mask_round", IX86_BUILTIN_CMPPH512_MASK_ROUND, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmcmpv8hf3_mask_round, "__builtin_ia32_cmpsh_mask_round", IX86_BUILTIN_CMPSH_MASK_ROUND, UNKNOWN, (int) UQI_FTYPE_V8HF_V8HF_INT_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_sqrtv32hf2_mask_round, "__builtin_ia32_sqrtph512_mask_round", IX86_BUILTIN_SQRTPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv32hf2_mask_round, "__builtin_ia32_sqrtph512_mask_round", IX86_BUILTIN_SQRTPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsqrtv8hf2_mask_round, "__builtin_ia32_sqrtsh_mask_round", IX86_BUILTIN_SQRTSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_scalefv32hf_mask_round, "__builtin_ia32_scalefph512_mask_round", IX86_BUILTIN_SCALEFPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_scalefv32hf_mask_round, "__builtin_ia32_scalefph512_mask_round", IX86_BUILTIN_SCALEFPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmscalefv8hf_mask_round, "__builtin_ia32_scalefsh_mask_round", IX86_BUILTIN_SCALEFSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_reducepv32hf_mask_round, "__builtin_ia32_reduceph512_mask_round", IX86_BUILTIN_REDUCEPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducepv32hf_mask_round, "__builtin_ia32_reduceph512_mask_round", IX86_BUILTIN_REDUCEPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducesv8hf_mask_round, "__builtin_ia32_reducesh_mask_round", IX86_BUILTIN_REDUCESH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_rndscalev32hf_mask_round, "__builtin_ia32_rndscaleph512_mask_round", IX86_BUILTIN_RNDSCALEPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_rndscalev32hf_mask_round, "__builtin_ia32_rndscaleph512_mask_round", IX86_BUILTIN_RNDSCALEPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_rndscalev8hf_mask_round, "__builtin_ia32_rndscalesh_mask_round", IX86_BUILTIN_RNDSCALESH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_getexpv32hf_mask_round, "__builtin_ia32_getexpph512_mask", IX86_BUILTIN_GETEXPPH512, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_getexpv32hf_mask_round, "__builtin_ia32_getexpph512_mask", IX86_BUILTIN_GETEXPPH512, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_sgetexpv8hf_mask_round, "__builtin_ia32_getexpsh_mask_round", IX86_BUILTIN_GETEXPSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_getmantv32hf_mask_round, "__builtin_ia32_getmantph512_mask", IX86_BUILTIN_GETMANTPH512, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_getmantv32hf_mask_round, "__builtin_ia32_getmantph512_mask", IX86_BUILTIN_GETMANTPH512, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vgetmantv8hf_mask_round, "__builtin_ia32_getmantsh_mask_round", IX86_BUILTIN_GETMANTSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtph2dq_v16si_mask_round, "__builtin_ia32_vcvtph2dq512_mask_round", IX86_BUILTIN_VCVTPH2DQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtph2udq_v16si_mask_round, "__builtin_ia32_vcvtph2udq512_mask_round", IX86_BUILTIN_VCVTPH2UDQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_avx512fp16_fix_truncv16si2_mask_round, "__builtin_ia32_vcvttph2dq512_mask_round", IX86_BUILTIN_VCVTTPH2DQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_avx512fp16_fixuns_truncv16si2_mask_round, "__builtin_ia32_vcvttph2udq512_mask_round", IX86_BUILTIN_VCVTTPH2UDQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtph2qq_v8di_mask_round, "__builtin_ia32_vcvtph2qq512_mask_round", IX86_BUILTIN_VCVTPH2QQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtph2uqq_v8di_mask_round, "__builtin_ia32_vcvtph2uqq512_mask_round", IX86_BUILTIN_VCVTPH2UQQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_avx512fp16_fix_truncv8di2_mask_round, "__builtin_ia32_vcvttph2qq512_mask_round", IX86_BUILTIN_VCVTTPH2QQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_avx512fp16_fixuns_truncv8di2_mask_round, "__builtin_ia32_vcvttph2uqq512_mask_round", IX86_BUILTIN_VCVTTPH2UQQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtph2w_v32hi_mask_round, "__builtin_ia32_vcvtph2w512_mask_round", IX86_BUILTIN_VCVTPH2W512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtph2uw_v32hi_mask_round, "__builtin_ia32_vcvtph2uw512_mask_round", IX86_BUILTIN_VCVTPH2UW512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_avx512fp16_fix_truncv32hi2_mask_round, "__builtin_ia32_vcvttph2w512_mask_round", IX86_BUILTIN_VCVTTPH2W512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_avx512fp16_fixuns_truncv32hi2_mask_round, "__builtin_ia32_vcvttph2uw512_mask_round", IX86_BUILTIN_VCVTTPH2UW512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtw2ph_v32hi_mask_round, "__builtin_ia32_vcvtw2ph512_mask_round", IX86_BUILTIN_VCVTW2PH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HI_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtuw2ph_v32hi_mask_round, "__builtin_ia32_vcvtuw2ph512_mask_round", IX86_BUILTIN_VCVTUW2PH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HI_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtdq2ph_v16si_mask_round, "__builtin_ia32_vcvtdq2ph512_mask_round", IX86_BUILTIN_VCVTDQ2PH512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SI_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtudq2ph_v16si_mask_round, "__builtin_ia32_vcvtudq2ph512_mask_round", IX86_BUILTIN_VCVTUDQ2PH512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SI_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtqq2ph_v8di_mask_round, "__builtin_ia32_vcvtqq2ph512_mask_round", IX86_BUILTIN_VCVTQQ2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtuqq2ph_v8di_mask_round, "__builtin_ia32_vcvtuqq2ph512_mask_round", IX86_BUILTIN_VCVTUQQ2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2dq_v16si_mask_round, "__builtin_ia32_vcvtph2dq512_mask_round", IX86_BUILTIN_VCVTPH2DQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2udq_v16si_mask_round, "__builtin_ia32_vcvtph2udq512_mask_round", IX86_BUILTIN_VCVTPH2UDQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_unspec_avx512fp16_fix_truncv16si2_mask_round, "__builtin_ia32_vcvttph2dq512_mask_round", IX86_BUILTIN_VCVTTPH2DQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_unspec_avx512fp16_fixuns_truncv16si2_mask_round, "__builtin_ia32_vcvttph2udq512_mask_round", IX86_BUILTIN_VCVTTPH2UDQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2qq_v8di_mask_round, "__builtin_ia32_vcvtph2qq512_mask_round", IX86_BUILTIN_VCVTPH2QQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2uqq_v8di_mask_round, "__builtin_ia32_vcvtph2uqq512_mask_round", IX86_BUILTIN_VCVTPH2UQQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_unspec_avx512fp16_fix_truncv8di2_mask_round, "__builtin_ia32_vcvttph2qq512_mask_round", IX86_BUILTIN_VCVTTPH2QQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_unspec_avx512fp16_fixuns_truncv8di2_mask_round, "__builtin_ia32_vcvttph2uqq512_mask_round", IX86_BUILTIN_VCVTTPH2UQQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2w_v32hi_mask_round, "__builtin_ia32_vcvtph2w512_mask_round", IX86_BUILTIN_VCVTPH2W512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2uw_v32hi_mask_round, "__builtin_ia32_vcvtph2uw512_mask_round", IX86_BUILTIN_VCVTPH2UW512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_unspec_avx512fp16_fix_truncv32hi2_mask_round, "__builtin_ia32_vcvttph2w512_mask_round", IX86_BUILTIN_VCVTTPH2W512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_unspec_avx512fp16_fixuns_truncv32hi2_mask_round, "__builtin_ia32_vcvttph2uw512_mask_round", IX86_BUILTIN_VCVTTPH2UW512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtw2ph_v32hi_mask_round, "__builtin_ia32_vcvtw2ph512_mask_round", IX86_BUILTIN_VCVTW2PH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HI_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtuw2ph_v32hi_mask_round, "__builtin_ia32_vcvtuw2ph512_mask_round", IX86_BUILTIN_VCVTUW2PH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HI_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtdq2ph_v16si_mask_round, "__builtin_ia32_vcvtdq2ph512_mask_round", IX86_BUILTIN_VCVTDQ2PH512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SI_V16HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtudq2ph_v16si_mask_round, "__builtin_ia32_vcvtudq2ph512_mask_round", IX86_BUILTIN_VCVTUDQ2PH512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SI_V16HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtqq2ph_v8di_mask_round, "__builtin_ia32_vcvtqq2ph512_mask_round", IX86_BUILTIN_VCVTQQ2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtuqq2ph_v8di_mask_round, "__builtin_ia32_vcvtuqq2ph512_mask_round", IX86_BUILTIN_VCVTUQQ2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2si_round, "__builtin_ia32_vcvtsh2si32_round", IX86_BUILTIN_VCVTSH2SI32_ROUND, UNKNOWN, (int) INT_FTYPE_V8HF_INT)
 BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2siq_round, "__builtin_ia32_vcvtsh2si64_round", IX86_BUILTIN_VCVTSH2SI64_ROUND, UNKNOWN, (int) INT64_FTYPE_V8HF_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2usi_round, "__builtin_ia32_vcvtsh2usi32_round", IX86_BUILTIN_VCVTSH2USI32_ROUND, UNKNOWN, (int) UINT_FTYPE_V8HF_INT)
@@ -3601,32 +3601,32 @@ BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsi2sh_round, "__b
 BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsi2shq_round, "__builtin_ia32_vcvtsi2sh64_round", IX86_BUILTIN_VCVTSI2SH64_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT64_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtusi2sh_round, "__builtin_ia32_vcvtusi2sh32_round", IX86_BUILTIN_VCVTUSI2SH32_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_UINT_INT)
 BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtusi2shq_round, "__builtin_ia32_vcvtusi2sh64_round", IX86_BUILTIN_VCVTUSI2SH64_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_UINT64_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_float_extend_phv8df2_mask_round, "__builtin_ia32_vcvtph2pd512_mask_round", IX86_BUILTIN_VCVTPH2PD512_MASK_ROUND, UNKNOWN, (int) V8DF_FTYPE_V8HF_V8DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_float_extend_phv16sf2_mask_round, "__builtin_ia32_vcvtph2psx512_mask_round", IX86_BUILTIN_VCVTPH2PSX512_MASK_ROUND, UNKNOWN, (int) V16SF_FTYPE_V16HF_V16SF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtpd2ph_v8df_mask_round, "__builtin_ia32_vcvtpd2ph512_mask_round", IX86_BUILTIN_VCVTPD2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtps2ph_v16sf_mask_round, "__builtin_ia32_vcvtps2phx512_mask_round", IX86_BUILTIN_VCVTPS2PHX512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SF_V16HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_float_extend_phv8df2_mask_round, "__builtin_ia32_vcvtph2pd512_mask_round", IX86_BUILTIN_VCVTPH2PD512_MASK_ROUND, UNKNOWN, (int) V8DF_FTYPE_V8HF_V8DF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_float_extend_phv16sf2_mask_round, "__builtin_ia32_vcvtph2psx512_mask_round", IX86_BUILTIN_VCVTPH2PSX512_MASK_ROUND, UNKNOWN, (int) V16SF_FTYPE_V16HF_V16SF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtpd2ph_v8df_mask_round, "__builtin_ia32_vcvtpd2ph512_mask_round", IX86_BUILTIN_VCVTPD2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DF_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtps2ph_v16sf_mask_round, "__builtin_ia32_vcvtps2phx512_mask_round", IX86_BUILTIN_VCVTPS2PHX512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SF_V16HF_UHI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2ss_mask_round, "__builtin_ia32_vcvtsh2ss_mask_round", IX86_BUILTIN_VCVTSH2SS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V8HF_V4SF_V4SF_UQI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2sd_mask_round, "__builtin_ia32_vcvtsh2sd_mask_round", IX86_BUILTIN_VCVTSH2SD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V8HF_V2DF_V2DF_UQI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtss2sh_mask_round, "__builtin_ia32_vcvtss2sh_mask_round", IX86_BUILTIN_VCVTSS2SH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V4SF_V8HF_V8HF_UQI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsd2sh_mask_round, "__builtin_ia32_vcvtsd2sh_mask_round", IX86_BUILTIN_VCVTSD2SH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V2DF_V8HF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmaddsub_v32hf_mask_round, "__builtin_ia32_vfmaddsubph512_mask", IX86_BUILTIN_VFMADDSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmaddsub_v32hf_mask3_round, "__builtin_ia32_vfmaddsubph512_mask3", IX86_BUILTIN_VFMADDSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmaddsub_v32hf_maskz_round, "__builtin_ia32_vfmaddsubph512_maskz", IX86_BUILTIN_VFMADDSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmsubadd_v32hf_mask_round, "__builtin_ia32_vfmsubaddph512_mask", IX86_BUILTIN_VFMSUBADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmsubadd_v32hf_mask3_round, "__builtin_ia32_vfmsubaddph512_mask3", IX86_BUILTIN_VFMSUBADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmsubadd_v32hf_maskz_round, "__builtin_ia32_vfmsubaddph512_maskz", IX86_BUILTIN_VFMSUBADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmadd_v32hf_mask_round, "__builtin_ia32_vfmaddph512_mask", IX86_BUILTIN_VFMADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmadd_v32hf_mask3_round, "__builtin_ia32_vfmaddph512_mask3", IX86_BUILTIN_VFMADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmadd_v32hf_maskz_round, "__builtin_ia32_vfmaddph512_maskz", IX86_BUILTIN_VFMADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fnmadd_v32hf_mask_round, "__builtin_ia32_vfnmaddph512_mask", IX86_BUILTIN_VFNMADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fnmadd_v32hf_mask3_round, "__builtin_ia32_vfnmaddph512_mask3", IX86_BUILTIN_VFNMADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fnmadd_v32hf_maskz_round, "__builtin_ia32_vfnmaddph512_maskz", IX86_BUILTIN_VFNMADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmsub_v32hf_mask_round, "__builtin_ia32_vfmsubph512_mask", IX86_BUILTIN_VFMSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmsub_v32hf_mask3_round, "__builtin_ia32_vfmsubph512_mask3", IX86_BUILTIN_VFMSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmsub_v32hf_maskz_round, "__builtin_ia32_vfmsubph512_maskz", IX86_BUILTIN_VFMSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fnmsub_v32hf_mask_round, "__builtin_ia32_vfnmsubph512_mask", IX86_BUILTIN_VFNMSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fnmsub_v32hf_mask3_round, "__builtin_ia32_vfnmsubph512_mask3", IX86_BUILTIN_VFNMSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fnmsub_v32hf_maskz_round, "__builtin_ia32_vfnmsubph512_maskz", IX86_BUILTIN_VFNMSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddsub_v32hf_mask_round, "__builtin_ia32_vfmaddsubph512_mask", IX86_BUILTIN_VFMADDSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddsub_v32hf_mask3_round, "__builtin_ia32_vfmaddsubph512_mask3", IX86_BUILTIN_VFMADDSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddsub_v32hf_maskz_round, "__builtin_ia32_vfmaddsubph512_maskz", IX86_BUILTIN_VFMADDSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsubadd_v32hf_mask_round, "__builtin_ia32_vfmsubaddph512_mask", IX86_BUILTIN_VFMSUBADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsubadd_v32hf_mask3_round, "__builtin_ia32_vfmsubaddph512_mask3", IX86_BUILTIN_VFMSUBADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsubadd_v32hf_maskz_round, "__builtin_ia32_vfmsubaddph512_maskz", IX86_BUILTIN_VFMSUBADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmadd_v32hf_mask_round, "__builtin_ia32_vfmaddph512_mask", IX86_BUILTIN_VFMADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmadd_v32hf_mask3_round, "__builtin_ia32_vfmaddph512_mask3", IX86_BUILTIN_VFMADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmadd_v32hf_maskz_round, "__builtin_ia32_vfmaddph512_maskz", IX86_BUILTIN_VFMADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmadd_v32hf_mask_round, "__builtin_ia32_vfnmaddph512_mask", IX86_BUILTIN_VFNMADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmadd_v32hf_mask3_round, "__builtin_ia32_vfnmaddph512_mask3", IX86_BUILTIN_VFNMADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmadd_v32hf_maskz_round, "__builtin_ia32_vfnmaddph512_maskz", IX86_BUILTIN_VFNMADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsub_v32hf_mask_round, "__builtin_ia32_vfmsubph512_mask", IX86_BUILTIN_VFMSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsub_v32hf_mask3_round, "__builtin_ia32_vfmsubph512_mask3", IX86_BUILTIN_VFMSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsub_v32hf_maskz_round, "__builtin_ia32_vfmsubph512_maskz", IX86_BUILTIN_VFMSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmsub_v32hf_mask_round, "__builtin_ia32_vfnmsubph512_mask", IX86_BUILTIN_VFNMSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmsub_v32hf_mask3_round, "__builtin_ia32_vfnmsubph512_mask3", IX86_BUILTIN_VFNMSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmsub_v32hf_maskz_round, "__builtin_ia32_vfnmsubph512_maskz", IX86_BUILTIN_VFNMSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfmadd_v8hf_mask_round, "__builtin_ia32_vfmaddsh3_mask", IX86_BUILTIN_VFMADDSH3_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfmadd_v8hf_mask3_round, "__builtin_ia32_vfmaddsh3_mask3", IX86_BUILTIN_VFMADDSH3_MASK3, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfmadd_v8hf_maskz_round, "__builtin_ia32_vfmaddsh3_maskz", IX86_BUILTIN_VFMADDSH3_MASKZ, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
@@ -3634,18 +3634,18 @@ BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfnmadd_v8hf_mask_round
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfnmadd_v8hf_mask3_round, "__builtin_ia32_vfnmaddsh3_mask3", IX86_BUILTIN_VFNMADDSH3_MASK3, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfnmadd_v8hf_maskz_round, "__builtin_ia32_vfnmaddsh3_maskz", IX86_BUILTIN_VFNMADDSH3_MASKZ, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfmsub_v8hf_mask3_round, "__builtin_ia32_vfmsubsh3_mask3", IX86_BUILTIN_VFMSUBSH3_MASK3, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_fma_fmaddc_v32hf_round, "__builtin_ia32_vfmaddcph512_round", IX86_BUILTIN_VFMADDCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmaddc_v32hf_mask1_round, "__builtin_ia32_vfmaddcph512_mask_round", IX86_BUILTIN_VFMADDCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmaddc_v32hf_mask_round, "__builtin_ia32_vfmaddcph512_mask3_round", IX86_BUILTIN_VFMADDCPH512_MASK3_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmaddc_v32hf_maskz_round, "__builtin_ia32_vfmaddcph512_maskz_round", IX86_BUILTIN_VFMADDCPH512_MASKZ_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_fma_fcmaddc_v32hf_round, "__builtin_ia32_vfcmaddcph512_round", IX86_BUILTIN_VFCMADDCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fcmaddc_v32hf_mask1_round, "__builtin_ia32_vfcmaddcph512_mask_round", IX86_BUILTIN_VFCMADDCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fcmaddc_v32hf_mask_round, "__builtin_ia32_vfcmaddcph512_mask3_round", IX86_BUILTIN_VFCMADDCPH512_MASK3_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fcmaddc_v32hf_maskz_round, "__builtin_ia32_vfcmaddcph512_maskz_round", IX86_BUILTIN_VFCMADDCPH512_MASKZ_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fcmulc_v32hf_round, "__builtin_ia32_vfcmulcph512_round", IX86_BUILTIN_VFCMULCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fcmulc_v32hf_mask_round, "__builtin_ia32_vfcmulcph512_mask_round", IX86_BUILTIN_VFCMULCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmulc_v32hf_round, "__builtin_ia32_vfmulcph512_round", IX86_BUILTIN_VFMULCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmulc_v32hf_mask_round, "__builtin_ia32_vfmulcph512_mask_round", IX86_BUILTIN_VFMULCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_fma_fmaddc_v32hf_round, "__builtin_ia32_vfmaddcph512_round", IX86_BUILTIN_VFMADDCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddc_v32hf_mask1_round, "__builtin_ia32_vfmaddcph512_mask_round", IX86_BUILTIN_VFMADDCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddc_v32hf_mask_round, "__builtin_ia32_vfmaddcph512_mask3_round", IX86_BUILTIN_VFMADDCPH512_MASK3_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddc_v32hf_maskz_round, "__builtin_ia32_vfmaddcph512_maskz_round", IX86_BUILTIN_VFMADDCPH512_MASKZ_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_fma_fcmaddc_v32hf_round, "__builtin_ia32_vfcmaddcph512_round", IX86_BUILTIN_VFCMADDCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fcmaddc_v32hf_mask1_round, "__builtin_ia32_vfcmaddcph512_mask_round", IX86_BUILTIN_VFCMADDCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fcmaddc_v32hf_mask_round, "__builtin_ia32_vfcmaddcph512_mask3_round", IX86_BUILTIN_VFCMADDCPH512_MASK3_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fcmaddc_v32hf_maskz_round, "__builtin_ia32_vfcmaddcph512_maskz_round", IX86_BUILTIN_VFCMADDCPH512_MASKZ_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fcmulc_v32hf_round, "__builtin_ia32_vfcmulcph512_round", IX86_BUILTIN_VFCMULCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fcmulc_v32hf_mask_round, "__builtin_ia32_vfcmulcph512_mask_round", IX86_BUILTIN_VFCMULCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmulc_v32hf_round, "__builtin_ia32_vfmulcph512_round", IX86_BUILTIN_VFMULCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmulc_v32hf_mask_round, "__builtin_ia32_vfmulcph512_mask_round", IX86_BUILTIN_VFMULCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fma_fcmaddcsh_v8hf_round, "__builtin_ia32_vfcmaddcsh_round", IX86_BUILTIN_VFCMADDCSH_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fcmaddcsh_v8hf_mask1_round, "__builtin_ia32_vfcmaddcsh_mask_round", IX86_BUILTIN_VFCMADDCSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fcmaddcsh_v8hf_mask3_round, "__builtin_ia32_vfcmaddcsh_mask3_round", IX86_BUILTIN_VFCMADDCSH_MASK3_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc
index 2e7381b..4835b94 100644
--- a/gcc/config/i386/i386-builtins.cc
+++ b/gcc/config/i386/i386-builtins.cc
@@ -801,102 +801,102 @@ ix86_init_mmx_sse_builtins (void)
 		    IX86_BUILTIN_GATHERALTDIV8SI);
 
   /* AVX512F */
-  def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
 		    "__builtin_ia32_gathersiv16sf",
 		    V16SF_FTYPE_V16SF_PCVOID_V16SI_HI_INT,
 		    IX86_BUILTIN_GATHER3SIV16SF);
 
-  def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
 		    "__builtin_ia32_gathersiv8df",
 		    V8DF_FTYPE_V8DF_PCVOID_V8SI_QI_INT,
 		    IX86_BUILTIN_GATHER3SIV8DF);
 
-  def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
 		    "__builtin_ia32_gatherdiv16sf",
 		    V8SF_FTYPE_V8SF_PCVOID_V8DI_QI_INT,
 		    IX86_BUILTIN_GATHER3DIV16SF);
 
-  def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
 		    "__builtin_ia32_gatherdiv8df",
 		    V8DF_FTYPE_V8DF_PCVOID_V8DI_QI_INT,
 		    IX86_BUILTIN_GATHER3DIV8DF);
 
-  def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
 		    "__builtin_ia32_gathersiv16si",
 		    V16SI_FTYPE_V16SI_PCVOID_V16SI_HI_INT,
 		    IX86_BUILTIN_GATHER3SIV16SI);
 
-  def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
 		    "__builtin_ia32_gathersiv8di",
 		    V8DI_FTYPE_V8DI_PCVOID_V8SI_QI_INT,
 		    IX86_BUILTIN_GATHER3SIV8DI);
 
-  def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
 		    "__builtin_ia32_gatherdiv16si",
 		    V8SI_FTYPE_V8SI_PCVOID_V8DI_QI_INT,
 		    IX86_BUILTIN_GATHER3DIV16SI);
 
-  def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
 		    "__builtin_ia32_gatherdiv8di",
 		    V8DI_FTYPE_V8DI_PCVOID_V8DI_QI_INT,
 		    IX86_BUILTIN_GATHER3DIV8DI);
 
-  def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
 		    "__builtin_ia32_gather3altsiv8df ",
 		    V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
 		    IX86_BUILTIN_GATHER3ALTSIV8DF);
 
-  def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
 		    "__builtin_ia32_gather3altdiv16sf ",
 		    V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
 		    IX86_BUILTIN_GATHER3ALTDIV16SF);
 
-  def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
 		    "__builtin_ia32_gather3altsiv8di ",
 		    V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
 		    IX86_BUILTIN_GATHER3ALTSIV8DI);
 
-  def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
 		    "__builtin_ia32_gather3altdiv16si ",
 		    V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
 		    IX86_BUILTIN_GATHER3ALTDIV16SI);
 
-  def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin (OPTION_MASK_ISA_AVX512F, 0,
 	       "__builtin_ia32_scattersiv16sf",
 	       VOID_FTYPE_PVOID_HI_V16SI_V16SF_INT,
 	       IX86_BUILTIN_SCATTERSIV16SF);
 
-  def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin (OPTION_MASK_ISA_AVX512F, 0,
 	       "__builtin_ia32_scattersiv8df",
 	       VOID_FTYPE_PVOID_QI_V8SI_V8DF_INT,
 	       IX86_BUILTIN_SCATTERSIV8DF);
 
-  def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin (OPTION_MASK_ISA_AVX512F, 0,
 	       "__builtin_ia32_scatterdiv16sf",
 	       VOID_FTYPE_PVOID_QI_V8DI_V8SF_INT,
 	       IX86_BUILTIN_SCATTERDIV16SF);
 
-  def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin (OPTION_MASK_ISA_AVX512F, 0,
 	       "__builtin_ia32_scatterdiv8df",
 	       VOID_FTYPE_PVOID_QI_V8DI_V8DF_INT,
 	       IX86_BUILTIN_SCATTERDIV8DF);
 
-  def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin (OPTION_MASK_ISA_AVX512F, 0,
 	       "__builtin_ia32_scattersiv16si",
 	       VOID_FTYPE_PVOID_HI_V16SI_V16SI_INT,
 	       IX86_BUILTIN_SCATTERSIV16SI);
 
-  def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin (OPTION_MASK_ISA_AVX512F, 0,
 	       "__builtin_ia32_scattersiv8di",
 	       VOID_FTYPE_PVOID_QI_V8SI_V8DI_INT,
 	       IX86_BUILTIN_SCATTERSIV8DI);
 
-  def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin (OPTION_MASK_ISA_AVX512F, 0,
 	       "__builtin_ia32_scatterdiv16si",
 	       VOID_FTYPE_PVOID_QI_V8DI_V8SI_INT,
 	       IX86_BUILTIN_SCATTERDIV16SI);
 
-  def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin (OPTION_MASK_ISA_AVX512F, 0,
 	       "__builtin_ia32_scatterdiv8di",
 	       VOID_FTYPE_PVOID_QI_V8DI_V8DI_INT,
 	       IX86_BUILTIN_SCATTERDIV8DI);
@@ -1046,22 +1046,22 @@ ix86_init_mmx_sse_builtins (void)
 	       VOID_FTYPE_PVOID_QI_V2DI_V2DI_INT,
 	       IX86_BUILTIN_SCATTERDIV2DI);
 
-  def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin (OPTION_MASK_ISA_AVX512F, 0,
 	       "__builtin_ia32_scatteraltsiv8df ",
 	       VOID_FTYPE_PDOUBLE_QI_V16SI_V8DF_INT,
 	       IX86_BUILTIN_SCATTERALTSIV8DF);
 
-  def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin (OPTION_MASK_ISA_AVX512F, 0,
 	       "__builtin_ia32_scatteraltdiv16sf ",
 	       VOID_FTYPE_PFLOAT_HI_V8DI_V16SF_INT,
 	       IX86_BUILTIN_SCATTERALTDIV16SF);
 
-  def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin (OPTION_MASK_ISA_AVX512F, 0,
 	       "__builtin_ia32_scatteraltsiv8di ",
 	       VOID_FTYPE_PLONGLONG_QI_V16SI_V8DI_INT,
 	       IX86_BUILTIN_SCATTERALTSIV8DI);
 
-  def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin (OPTION_MASK_ISA_AVX512F, 0,
 	       "__builtin_ia32_scatteraltdiv16si ",
 	       VOID_FTYPE_PINT_HI_V8DI_V16SI_INT,
 	       IX86_BUILTIN_SCATTERALTDIV16SI);
@@ -1676,7 +1676,7 @@ ix86_vectorize_builtin_gather (const_tree mem_vectype,
   enum ix86_builtins code;
   const machine_mode mode = TYPE_MODE (TREE_TYPE (mem_vectype));
 
-  if ((!TARGET_AVX512F || !TARGET_EVEX512) && GET_MODE_SIZE (mode) == 64)
+  if (!TARGET_AVX512F && GET_MODE_SIZE (mode) == 64)
     return NULL_TREE;
 
   if (! TARGET_AVX2
diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
index 0a320ca..457aa05 100644
--- a/gcc/config/i386/i386-c.cc
+++ b/gcc/config/i386/i386-c.cc
@@ -729,12 +729,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
     def_or_undef (parse_in, "__SHA512__");
   if (isa_flag2 & OPTION_MASK_ISA2_SM4)
     def_or_undef (parse_in, "__SM4__");
-  if (isa_flag2 & OPTION_MASK_ISA2_EVEX512)
-    def_or_undef (parse_in, "__EVEX512__");
   if (isa_flag2 & OPTION_MASK_ISA2_USER_MSR)
     def_or_undef (parse_in, "__USER_MSR__");
-  if (isa_flag2 & OPTION_MASK_ISA2_AVX10_1_256)
-    def_or_undef (parse_in, "__AVX10_1_256__");
   if (isa_flag2 & OPTION_MASK_ISA2_AVX10_1)
     def_or_undef (parse_in, "__AVX10_1__");
   if (isa_flag2 & OPTION_MASK_ISA2_APX_F)
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index cdfd94d..3278f1f 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -387,7 +387,7 @@ ix86_expand_move (machine_mode mode, rtx operands[])
       tmp = XEXP (op1, 0);
 
       if (GET_CODE (tmp) != PLUS
-	  || GET_CODE (XEXP (tmp, 0)) != SYMBOL_REF)
+	  || !SYMBOL_REF_P (XEXP (tmp, 0)))
 	break;
 
       op1 = XEXP (tmp, 0);
@@ -487,7 +487,7 @@ ix86_expand_move (machine_mode mode, rtx operands[])
 		op1 = machopic_legitimize_pic_address (op1, mode,
 						       tmp == op1 ? 0 : tmp);
 	    }
-	  if (op0 != op1 && GET_CODE (op0) != MEM)
+	  if (op0 != op1 && !MEM_P (op0))
 	    {
 	      rtx insn = gen_rtx_SET (op0, op1);
 	      emit_insn (insn);
@@ -1396,11 +1396,11 @@ ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
      to cast them temporarily to integer vectors.  */
   if (op1
       && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
-      && (SUBREG_P (op2) || GET_CODE (op2) == CONST_VECTOR)
+      && (SUBREG_P (op2) || CONST_VECTOR_P (op2))
       && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
       && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
       && SUBREG_BYTE (op1) == 0
-      && (GET_CODE (op2) == CONST_VECTOR
+      && (CONST_VECTOR_P (op2)
 	  || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
 	      && SUBREG_BYTE (op2) == 0))
       && can_create_pseudo_p ())
@@ -1415,7 +1415,7 @@ ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
 	case E_V4DFmode:
 	case E_V8DFmode:
 	  dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
-	  if (GET_CODE (op2) == CONST_VECTOR)
+	  if (CONST_VECTOR_P (op2))
 	    {
 	      op2 = gen_lowpart (GET_MODE (dst), op2);
 	      op2 = force_reg (GET_MODE (dst), op2);
@@ -3151,7 +3151,7 @@ ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
 }
 
 /* Expand floating point op0 <=> op1, i.e.
-   dest = op0 == op1 ? 0 : op0 < op1 ? -1 : op0 > op1 ? 1 : 2.  */
+   dest = op0 == op1 ? 0 : op0 < op1 ? -1 : op0 > op1 ? 1 : -128.  */
 
 void
 ix86_expand_fp_spaceship (rtx dest, rtx op0, rtx op1, rtx op2)
@@ -3264,7 +3264,7 @@ ix86_expand_fp_spaceship (rtx dest, rtx op0, rtx op1, rtx op2)
   if (l2)
     {
       emit_label (l2);
-      emit_move_insn (dest, op2 == const0_rtx ? const2_rtx : op2);
+      emit_move_insn (dest, op2 == const0_rtx ? GEN_INT (-128) : op2);
     }
   emit_label (lend);
 }
@@ -3396,8 +3396,7 @@ ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
 	 too common scenario.  */
       start_sequence ();
       compare_op = ix86_expand_fp_compare (code, op0, op1);
-      compare_seq = get_insns ();
-      end_sequence ();
+      compare_seq = end_sequence ();
 
       if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode)
         code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
@@ -3561,8 +3560,7 @@ ix86_expand_int_movcc (rtx operands[])
 
   start_sequence ();
   compare_op = ix86_expand_compare (code, op0, op1);
-  compare_seq = get_insns ();
-  end_sequence ();
+  compare_seq = end_sequence ();
 
   compare_code = GET_CODE (compare_op);
 
@@ -3611,7 +3609,11 @@ ix86_expand_int_movcc (rtx operands[])
 	    negate_cc_compare_p = true;
 	}
 
-      diff = ct - cf;
+      diff = (unsigned HOST_WIDE_INT) ct - cf;
+      /* Make sure we can represent the difference between the two values.  */
+      if ((diff > 0) != ((cf < 0) != (ct < 0) ? cf < 0 : cf < ct))
+	return false;
+
       /*  Sign bit compares are better done using shifts than we do by using
 	  sbb.  */
       if (sign_bit_compare_p
@@ -3669,7 +3671,12 @@ ix86_expand_int_movcc (rtx operands[])
 		    PUT_CODE (compare_op,
 			      reverse_condition (GET_CODE (compare_op)));
 		}
-	      diff = ct - cf;
+
+	      diff = (unsigned HOST_WIDE_INT) ct - cf;
+	      /* Make sure we can represent the difference
+		 between the two values.  */
+	      if ((diff > 0) != ((cf < 0) != (ct < 0) ? cf < 0 : cf < ct))
+		return false;
 
 	      if (reg_overlap_mentioned_p (out, compare_op))
 		tmp = gen_reg_rtx (mode);
@@ -3687,7 +3694,12 @@ ix86_expand_int_movcc (rtx operands[])
 	      else
 		{
 		  std::swap (ct, cf);
-		  diff = ct - cf;
+
+		  diff = (unsigned HOST_WIDE_INT) ct - cf;
+		  /* Make sure we can represent the difference
+		     between the two values.  */
+		  if ((diff > 0) != ((cf < 0) != (ct < 0) ? cf < 0 : cf < ct))
+		    return false;
 		}
 	      tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
 	    }
@@ -3754,9 +3766,15 @@ ix86_expand_int_movcc (rtx operands[])
 		  tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
 		}
 
+	      HOST_WIDE_INT ival = (unsigned HOST_WIDE_INT) cf - ct;
+	      /* Make sure we can represent the difference
+		 between the two values.  */
+	      if ((ival > 0) != ((ct < 0) != (cf < 0) ? ct < 0 : ct < cf))
+		return false;
+
 	      tmp = expand_simple_binop (mode, AND,
 					 copy_rtx (tmp),
-					 gen_int_mode (cf - ct, mode),
+					 gen_int_mode (ival, mode),
 					 copy_rtx (tmp), 1, OPTAB_DIRECT);
 	      if (ct)
 		tmp = expand_simple_binop (mode, PLUS,
@@ -3793,7 +3811,13 @@ ix86_expand_int_movcc (rtx operands[])
 	  if (new_code != UNKNOWN)
 	    {
 	      std::swap (ct, cf);
-	      diff = -diff;
+
+	      diff = (unsigned HOST_WIDE_INT) ct - cf;
+	      /* Make sure we can represent the difference
+		 between the two values.  */
+	      if ((diff > 0) != ((cf < 0) != (ct < 0) ? cf < 0 : cf < ct))
+		return false;
+
 	      code = new_code;
 	    }
 	}
@@ -3996,8 +4020,14 @@ ix86_expand_int_movcc (rtx operands[])
 					 copy_rtx (out), 1, OPTAB_DIRECT);
 	    }
 
+	  HOST_WIDE_INT ival = (unsigned HOST_WIDE_INT) cf - ct;
+	  /* Make sure we can represent the difference
+	     between the two values.  */
+	  if ((ival > 0) != ((ct < 0) != (cf < 0) ? ct < 0 : ct < cf))
+	    return false;
+
 	  out = expand_simple_binop (mode, AND, copy_rtx (out),
-				     gen_int_mode (cf - ct, mode),
+				     gen_int_mode (ival, mode),
 				     copy_rtx (out), 1, OPTAB_DIRECT);
 	  if (ct)
 	    out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
@@ -4138,6 +4168,10 @@ ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
     return false;
 
   mode = GET_MODE (dest);
+  if (immediate_operand (if_false, mode))
+    if_false = force_reg (mode, if_false);
+  if (immediate_operand (if_true, mode))
+    if_true = force_reg (mode, if_true);
 
   /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
      but MODE may be a vector mode and thus not appropriate.  */
@@ -4186,7 +4220,7 @@ ix86_valid_mask_cmp_mode (machine_mode mode)
   if ((inner_mode == QImode || inner_mode == HImode) && !TARGET_AVX512BW)
     return false;
 
-  return (vector_size == 64 && TARGET_EVEX512) || TARGET_AVX512VL;
+  return vector_size == 64 || TARGET_AVX512VL;
 }
 
 /* Return true if integer mask comparison should be used.  */
@@ -4687,6 +4721,8 @@ ix86_expand_fp_movcc (rtx operands[])
       compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
     }
 
+  operands[2] = force_reg (mode, operands[2]);
+  operands[3] = force_reg (mode, operands[3]);
   emit_insn (gen_rtx_SET (operands[0],
 			  gen_rtx_IF_THEN_ELSE (mode, compare_op,
 						operands[2], operands[3])));
@@ -4882,7 +4918,7 @@ ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1,
 	case LEU:
 	  /* x <= cst can be handled as x < cst + 1 unless there is
 	     wrap around in cst + 1.  */
-	  if (GET_CODE (cop1) == CONST_VECTOR
+	  if (CONST_VECTOR_P (cop1)
 	      && GET_MODE_INNER (mode) != TImode)
 	    {
 	      unsigned int n_elts = GET_MODE_NUNITS (mode), i;
@@ -4926,7 +4962,7 @@ ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1,
 	case GEU:
 	  /* x >= cst can be handled as x > cst - 1 unless there is
 	     wrap around in cst - 1.  */
-	  if (GET_CODE (cop1) == CONST_VECTOR
+	  if (CONST_VECTOR_P (cop1)
 	      && GET_MODE_INNER (mode) != TImode)
 	    {
 	      unsigned int n_elts = GET_MODE_NUNITS (mode), i;
@@ -4997,9 +5033,9 @@ ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1,
 	    }
 	}
 
-      if (GET_CODE (cop0) == CONST_VECTOR)
+      if (CONST_VECTOR_P (cop0))
 	cop0 = force_reg (mode, cop0);
-      else if (GET_CODE (cop1) == CONST_VECTOR)
+      else if (CONST_VECTOR_P (cop1))
 	cop1 = force_reg (mode, cop1);
 
       rtx optrue = op_true ? op_true : CONSTM1_RTX (data_mode);
@@ -5022,7 +5058,7 @@ ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1,
 	      && GET_MODE_SIZE (GET_MODE_INNER (mode)) >= 4
 	      /* Don't do it if not using integer masks and we'd end up with
 		 the right values in the registers though.  */
-	      && ((GET_MODE_SIZE (mode) == 64 && TARGET_EVEX512)
+	      && (GET_MODE_SIZE (mode) == 64
 		  || !vector_all_ones_operand (optrue, data_mode)
 		  || opfalse != CONST0_RTX (data_mode))))
 	{
@@ -5198,7 +5234,7 @@ ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1,
   if (*negate)
     std::swap (op_true, op_false);
 
-  if (GET_CODE (cop1) == CONST_VECTOR)
+  if (CONST_VECTOR_P (cop1))
     cop1 = force_reg (mode, cop1);
 
   /* Allow the comparison to be done in one mode, but the movcc to
@@ -6152,7 +6188,7 @@ ix86_extract_perm_from_pool_constant (int* perm, rtx mem)
 
   rtx constant = get_pool_constant (XEXP (mem, 0));
 
-  if (GET_CODE (constant) != CONST_VECTOR)
+  if (!CONST_VECTOR_P (constant))
     return false;
 
   /* There could be some rtx like
@@ -6162,7 +6198,7 @@ ix86_extract_perm_from_pool_constant (int* perm, rtx mem)
     {
       constant = simplify_subreg (mode, constant, GET_MODE (constant), 0);
 
-      if (constant == nullptr || GET_CODE (constant) != CONST_VECTOR)
+      if (constant == nullptr || !CONST_VECTOR_P (constant))
 	return false;
     }
 
@@ -6208,7 +6244,7 @@ ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
       return size;
     }
 
-  if (GET_CODE (operand) == CONST_VECTOR)
+  if (CONST_VECTOR_P (operand))
     {
       scalar_int_mode imode = int_mode_for_mode (mode).require ();
       /* Caution: if we looked through a constant pool memory above,
@@ -6342,7 +6378,7 @@ ix86_split_long_move (rtx operands[])
 	 fp moves, that force all constants to memory to allow combining.  */
 
       if (MEM_P (operands[1])
-	  && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
+	  && SYMBOL_REF_P (XEXP (operands[1], 0))
 	  && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
 	operands[1] = get_pool_constant (XEXP (operands[1], 0));
       if (push_operand (operands[0], VOIDmode))
@@ -7863,7 +7899,8 @@ expand_set_or_cpymem_via_loop (rtx destmem, rtx srcmem,
 			       rtx count, machine_mode mode, int unroll,
 			       int expected_size, bool issetmem)
 {
-  rtx_code_label *out_label, *top_label;
+  rtx_code_label *out_label = nullptr;
+  rtx_code_label *top_label = nullptr;
   rtx iter, tmp;
   machine_mode iter_mode = counter_mode (count);
   int piece_size_n = GET_MODE_SIZE (mode) * unroll;
@@ -7871,9 +7908,19 @@ expand_set_or_cpymem_via_loop (rtx destmem, rtx srcmem,
   rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
   rtx size;
   int i;
+  int loop_count;
 
-  top_label = gen_label_rtx ();
-  out_label = gen_label_rtx ();
+  if (expected_size != -1 && CONST_INT_P (count))
+    loop_count = INTVAL (count) / GET_MODE_SIZE (mode) / unroll;
+  else
+    loop_count = -1;
+
+  /* Don't generate the loop if the loop count is 1.  */
+  if (loop_count != 1)
+    {
+      top_label = gen_label_rtx ();
+      out_label = gen_label_rtx ();
+    }
   iter = gen_reg_rtx (iter_mode);
 
   size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
@@ -7887,7 +7934,8 @@ expand_set_or_cpymem_via_loop (rtx destmem, rtx srcmem,
     }
   emit_move_insn (iter, const0_rtx);
 
-  emit_label (top_label);
+  if (loop_count != 1)
+    emit_label (top_label);
 
   tmp = convert_modes (Pmode, iter_mode, iter, true);
 
@@ -7955,21 +8003,25 @@ expand_set_or_cpymem_via_loop (rtx destmem, rtx srcmem,
   if (tmp != iter)
     emit_move_insn (iter, tmp);
 
-  emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
-			   true, top_label);
-  if (expected_size != -1)
+  if (loop_count != 1)
     {
-      expected_size /= GET_MODE_SIZE (mode) * unroll;
-      if (expected_size == 0)
-	predict_jump (0);
-      else if (expected_size > REG_BR_PROB_BASE)
-	predict_jump (REG_BR_PROB_BASE - 1);
+      emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
+			       true, top_label);
+      if (expected_size != -1)
+	{
+	  expected_size /= GET_MODE_SIZE (mode) * unroll;
+	  if (expected_size == 0)
+	    predict_jump (0);
+	  else if (expected_size > REG_BR_PROB_BASE)
+	    predict_jump (REG_BR_PROB_BASE - 1);
+	  else
+	    predict_jump (REG_BR_PROB_BASE
+			  - (REG_BR_PROB_BASE + expected_size / 2)
+			    / expected_size);
+	}
       else
-        predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2)
-		      / expected_size);
+	predict_jump (REG_BR_PROB_BASE * 80 / 100);
     }
-  else
-    predict_jump (REG_BR_PROB_BASE * 80 / 100);
   iter = ix86_zero_extend_to_Pmode (iter);
   tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
 			     true, OPTAB_LIB_WIDEN);
@@ -7982,7 +8034,8 @@ expand_set_or_cpymem_via_loop (rtx destmem, rtx srcmem,
       if (tmp != srcptr)
 	emit_move_insn (srcptr, tmp);
     }
-  emit_label (out_label);
+  if (loop_count != 1)
+    emit_label (out_label);
 }
 
 /* Divide COUNTREG by SCALE.  */
@@ -8185,19 +8238,13 @@ expand_cpymem_epilogue (rtx destmem, rtx srcmem,
   rtx src, dest;
   if (CONST_INT_P (count))
     {
-      HOST_WIDE_INT countval = INTVAL (count);
-      HOST_WIDE_INT epilogue_size = countval % max_size;
-      int i;
-
-      /* For now MAX_SIZE should be a power of 2.  This assert could be
-	 relaxed, but it'll require a bit more complicated epilogue
-	 expanding.  */
-      gcc_assert ((max_size & (max_size - 1)) == 0);
-      for (i = max_size; i >= 1; i >>= 1)
-	{
-	  if (epilogue_size & i)
-	    destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
-	}
+      unsigned HOST_WIDE_INT countval = UINTVAL (count);
+      unsigned HOST_WIDE_INT epilogue_size = countval % max_size;
+      unsigned int destalign = MEM_ALIGN (destmem);
+      cfun->machine->by_pieces_in_use = true;
+      move_by_pieces (destmem, srcmem, epilogue_size, destalign,
+		      RETURN_BEGIN);
+      cfun->machine->by_pieces_in_use = false;
       return;
     }
   if (max_size > 8)
@@ -8358,6 +8405,78 @@ expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
 				 1, max_size / 2, true);
 }
 
+/* Callback routine for store_by_pieces.  Return the RTL of a register
+   containing GET_MODE_SIZE (MODE) bytes in the RTL register op_p which
+   is an integer or a word vector register.  If PREV_P isn't nullptr,
+   it has the RTL info from the previous iteration.  */
+
+static rtx
+setmem_epilogue_gen_val (void *op_p, void *prev_p, HOST_WIDE_INT,
+			 fixed_size_mode mode)
+{
+  rtx target;
+  by_pieces_prev *prev = (by_pieces_prev *) prev_p;
+  if (prev)
+    {
+      rtx prev_op = prev->data;
+      if (prev_op)
+	{
+	  machine_mode prev_mode = GET_MODE (prev_op);
+	  if (prev_mode == mode)
+	    return prev_op;
+	  if (VECTOR_MODE_P (prev_mode)
+	      && VECTOR_MODE_P (mode)
+	      && GET_MODE_INNER (prev_mode) == GET_MODE_INNER (mode))
+	    {
+	      target = gen_rtx_SUBREG (mode, prev_op, 0);
+	      return target;
+	    }
+	}
+    }
+
+  rtx op = (rtx) op_p;
+  machine_mode op_mode = GET_MODE (op);
+
+  if (VECTOR_MODE_P (mode))
+    {
+      gcc_assert (GET_MODE_INNER (mode) == QImode);
+
+      unsigned int op_size = GET_MODE_SIZE (op_mode);
+      unsigned int size = GET_MODE_SIZE (mode);
+      unsigned int nunits = op_size / GET_MODE_SIZE (QImode);
+      machine_mode vec_mode
+	= mode_for_vector (QImode, nunits).require ();
+      target = gen_reg_rtx (vec_mode);
+      op = gen_rtx_SUBREG (vec_mode, op, 0);
+      emit_move_insn (target, op);
+      if (op_size == size)
+	return target;
+
+      rtx tmp = gen_reg_rtx (mode);
+      target = gen_rtx_SUBREG (mode, target, 0);
+      emit_move_insn (tmp, target);
+      return tmp;
+    }
+
+  if (VECTOR_MODE_P (op_mode))
+    {
+      gcc_assert (GET_MODE_INNER (op_mode) == word_mode);
+      target = gen_reg_rtx (word_mode);
+      op = gen_rtx_SUBREG (word_mode, op, 0);
+      emit_move_insn (target, op);
+    }
+  else
+    target = op;
+
+  if (mode == GET_MODE (target))
+    return target;
+
+  rtx tmp = gen_reg_rtx (mode);
+  target = gen_rtx_SUBREG (mode, target, 0);
+  emit_move_insn (tmp, target);
+  return tmp;
+}
+
 /* Output code to set at most count & (max_size - 1) bytes starting by DEST.  */
 static void
 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
@@ -8367,24 +8486,14 @@ expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
 
   if (CONST_INT_P (count))
     {
-      HOST_WIDE_INT countval = INTVAL (count);
-      HOST_WIDE_INT epilogue_size = countval % max_size;
-      int i;
-
-      /* For now MAX_SIZE should be a power of 2.  This assert could be
-	 relaxed, but it'll require a bit more complicated epilogue
-	 expanding.  */
-      gcc_assert ((max_size & (max_size - 1)) == 0);
-      for (i = max_size; i >= 1; i >>= 1)
-	{
-	  if (epilogue_size & i)
-	    {
-	      if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
-		destmem = emit_memset (destmem, destptr, vec_value, i);
-	      else
-		destmem = emit_memset (destmem, destptr, value, i);
-	    }
-	}
+      unsigned HOST_WIDE_INT countval = UINTVAL (count);
+      unsigned HOST_WIDE_INT epilogue_size = countval % max_size;
+      unsigned int destalign = MEM_ALIGN (destmem);
+      cfun->machine->by_pieces_in_use = true;
+      store_by_pieces (destmem, epilogue_size, setmem_epilogue_gen_val,
+		       vec_value ? vec_value : value, destalign, true,
+		       RETURN_BEGIN);
+      cfun->machine->by_pieces_in_use = false;
       return;
     }
   if (max_size > 32)
@@ -8516,6 +8625,7 @@ expand_small_cpymem_or_setmem (rtx destmem, rtx srcmem,
   rtx_code_label *label = ix86_expand_aligntest (count, size, false);
   machine_mode mode = int_mode_for_size (size * BITS_PER_UNIT, 1).else_blk ();
   rtx modesize;
+  rtx scalar_value = value;
   int n;
 
   /* If we do not have vector value to copy, we must reduce size.  */
@@ -8535,11 +8645,57 @@ expand_small_cpymem_or_setmem (rtx destmem, rtx srcmem,
     {
       /* Choose appropriate vector mode.  */
       if (size >= 32)
-	mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
+	switch (MOVE_MAX)
+	  {
+	  case 64:
+	    if (size >= 64)
+	      {
+		mode = V64QImode;
+		break;
+	      }
+	    /* FALLTHRU */
+	  case 32:
+	    mode = V32QImode;
+	    break;
+	  case 16:
+	    mode = V16QImode;
+	    break;
+	  case 8:
+	    mode = DImode;
+	    break;
+	  default:
+	    gcc_unreachable ();
+	  }
       else if (size >= 16)
 	mode = TARGET_SSE ? V16QImode : DImode;
       srcmem = change_address (srcmem, mode, srcptr);
     }
+  if (issetmem && vec_value && GET_MODE_SIZE (mode) > size)
+    {
+      /* For memset with vector and the size is smaller than the vector
+	 size, first try the narrower vector, otherwise, use the
+	 original value. */
+      machine_mode inner_mode = GET_MODE_INNER (mode);
+      unsigned int nunits = size / GET_MODE_SIZE (inner_mode);
+      if (nunits > 1)
+	{
+	  mode = mode_for_vector (GET_MODE_INNER (mode),
+				  nunits).require ();
+	  value = gen_rtx_SUBREG (mode, value, 0);
+	}
+      else
+	{
+	  scalar_int_mode smode
+	    = smallest_int_mode_for_size (size * BITS_PER_UNIT).require ();
+	  gcc_assert (GET_MODE_SIZE (GET_MODE (scalar_value))
+		      >= GET_MODE_SIZE (smode));
+	  mode = smode;
+	  if (GET_MODE (scalar_value) == mode)
+	    value = scalar_value;
+	  else
+	    value = gen_rtx_SUBREG (mode, scalar_value, 0);
+	}
+    }
   destmem = change_address (destmem, mode, destptr);
   modesize = GEN_INT (GET_MODE_SIZE (mode));
   gcc_assert (GET_MODE_SIZE (mode) <= size);
@@ -8901,31 +9057,34 @@ expand_set_or_cpymem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
 /* Return true if ALG can be used in current context.
    Assume we expand memset if MEMSET is true.  */
 static bool
-alg_usable_p (enum stringop_alg alg, bool memset, bool have_as)
+alg_usable_p (enum stringop_alg alg, bool memset,
+	      addr_space_t dst_as, addr_space_t src_as)
 {
   if (alg == no_stringop)
     return false;
   /* It is not possible to use a library call if we have non-default
      address space.  We can do better than the generic byte-at-a-time
      loop, used as a fallback.  */
-  if (alg == libcall && have_as)
+  if (alg == libcall &&
+      !(ADDR_SPACE_GENERIC_P (dst_as) && ADDR_SPACE_GENERIC_P (src_as)))
     return false;
   if (alg == vector_loop)
     return TARGET_SSE || TARGET_AVX;
   /* Algorithms using the rep prefix want at least edi and ecx;
      additionally, memset wants eax and memcpy wants esi.  Don't
      consider such algorithms if the user has appropriated those
-     registers for their own purposes, or if we have a non-default
-     address space, since some string insns cannot override the segment.  */
+     registers for their own purposes, or if we have the destination
+     in the non-default address space, since string insns cannot
+     override the destination segment.  */
   if (alg == rep_prefix_1_byte
       || alg == rep_prefix_4_byte
       || alg == rep_prefix_8_byte)
     {
-      if (have_as)
-	return false;
       if (fixed_regs[CX_REG]
 	  || fixed_regs[DI_REG]
-	  || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]))
+	  || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG])
+	  || !ADDR_SPACE_GENERIC_P (dst_as)
+	  || !(ADDR_SPACE_GENERIC_P (src_as) || Pmode == word_mode))
 	return false;
     }
   return true;
@@ -8935,8 +9094,8 @@ alg_usable_p (enum stringop_alg alg, bool memset, bool have_as)
 static enum stringop_alg
 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
 	    unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
-	    bool memset, bool zero_memset, bool have_as,
-	    int *dynamic_check, bool *noalign, bool recur)
+	    bool memset, bool zero_memset, addr_space_t dst_as,
+	    addr_space_t src_as, int *dynamic_check, bool *noalign, bool recur)
 {
   const struct stringop_algs *algs;
   bool optimize_for_speed;
@@ -8968,7 +9127,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
   for (i = 0; i < MAX_STRINGOP_ALGS; i++)
     {
       enum stringop_alg candidate = algs->size[i].alg;
-      bool usable = alg_usable_p (candidate, memset, have_as);
+      bool usable = alg_usable_p (candidate, memset, dst_as, src_as);
       any_alg_usable_p |= usable;
 
       if (candidate != libcall && candidate && usable)
@@ -8984,17 +9143,17 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
 
   /* If user specified the algorithm, honor it if possible.  */
   if (ix86_stringop_alg != no_stringop
-      && alg_usable_p (ix86_stringop_alg, memset, have_as))
+      && alg_usable_p (ix86_stringop_alg, memset, dst_as, src_as))
     return ix86_stringop_alg;
   /* rep; movq or rep; movl is the smallest variant.  */
   else if (!optimize_for_speed)
     {
       *noalign = true;
       if (!count || (count & 3) || (memset && !zero_memset))
-	return alg_usable_p (rep_prefix_1_byte, memset, have_as)
+	return alg_usable_p (rep_prefix_1_byte, memset, dst_as, src_as)
 	       ? rep_prefix_1_byte : loop_1_byte;
       else
-	return alg_usable_p (rep_prefix_4_byte, memset, have_as)
+	return alg_usable_p (rep_prefix_4_byte, memset, dst_as, src_as)
 	       ? rep_prefix_4_byte : loop;
     }
   /* Very tiny blocks are best handled via the loop, REP is expensive to
@@ -9018,7 +9177,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
 	      enum stringop_alg candidate = algs->size[i].alg;
 
 	      if (candidate != libcall
-		  && alg_usable_p (candidate, memset, have_as))
+		  && alg_usable_p (candidate, memset, dst_as, src_as))
 		{
 		  alg = candidate;
 		  alg_noalign = algs->size[i].noalign;
@@ -9038,7 +9197,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
 		  else if (!any_alg_usable_p)
 		    break;
 		}
-	      else if (alg_usable_p (candidate, memset, have_as)
+	      else if (alg_usable_p (candidate, memset, dst_as, src_as)
 		       && !(TARGET_PREFER_KNOWN_REP_MOVSB_STOSB
 			    && candidate == rep_prefix_1_byte
 			    /* NB: If min_size != max_size, size is
@@ -9060,7 +9219,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
      choice in ix86_costs.  */
   if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
       && (algs->unknown_size == libcall
-	  || !alg_usable_p (algs->unknown_size, memset, have_as)))
+	  || !alg_usable_p (algs->unknown_size, memset, dst_as, src_as)))
     {
       enum stringop_alg alg;
       HOST_WIDE_INT new_expected_size = (max > 0 ? max : 4096) / 2;
@@ -9075,8 +9234,9 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
 	    *dynamic_check = 128;
 	  return loop_1_byte;
 	}
-      alg = decide_alg (count, new_expected_size, min_size, max_size, memset,
-			zero_memset, have_as, dynamic_check, noalign, true);
+      alg = decide_alg (count, new_expected_size, min_size, max_size,
+			memset, zero_memset, dst_as, src_as,
+			dynamic_check, noalign, true);
       gcc_assert (*dynamic_check == -1);
       if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
 	*dynamic_check = max;
@@ -9088,7 +9248,11 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
   /* Try to use some reasonable fallback algorithm.  Note that for
      non-default address spaces we default to a loop instead of
      a libcall.  */
-  return (alg_usable_p (algs->unknown_size, memset, have_as)
+
+  bool have_as = !(ADDR_SPACE_GENERIC_P (dst_as)
+		   && ADDR_SPACE_GENERIC_P (src_as));
+
+  return (alg_usable_p (algs->unknown_size, memset, dst_as, src_as)
 	  ? algs->unknown_size : have_as ? loop : libcall);
 }
 
@@ -9135,13 +9299,26 @@ decide_alignment (int align,
 static rtx
 promote_duplicated_reg (machine_mode mode, rtx val)
 {
+  if (val == const0_rtx)
+    return copy_to_mode_reg (mode, CONST0_RTX (mode));
+
   machine_mode valmode = GET_MODE (val);
+  if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+    {
+      /* Duplicate the scalar value for integer vector.  */
+      gcc_assert ((val == const0_rtx || val == constm1_rtx)
+		  || GET_MODE_INNER (mode) == valmode);
+      rtx dup = gen_reg_rtx (mode);
+      bool ok = ix86_expand_vector_init_duplicate (false, mode, dup,
+						   val);
+      gcc_assert (ok);
+      return dup;
+    }
+
   rtx tmp;
   int nops = mode == DImode ? 3 : 2;
 
-  gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
-  if (val == const0_rtx)
-    return copy_to_mode_reg (mode, CONST0_RTX (mode));
+  gcc_assert (mode == SImode || mode == DImode);
   if (CONST_INT_P (val))
     {
       HOST_WIDE_INT v = INTVAL (val) & 255;
@@ -9307,14 +9484,13 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
   bool need_zero_guard = false;
   bool noalign;
   machine_mode move_mode = VOIDmode;
-  machine_mode wider_mode;
   int unroll_factor = 1;
   /* TODO: Once value ranges are available, fill in proper data.  */
   unsigned HOST_WIDE_INT min_size = 0;
   unsigned HOST_WIDE_INT max_size = -1;
   unsigned HOST_WIDE_INT probable_max_size = -1;
   bool misaligned_prologue_used = false;
-  bool have_as;
+  addr_space_t dst_as, src_as = ADDR_SPACE_GENERIC;
 
   if (CONST_INT_P (align_exp))
     align = INTVAL (align_exp);
@@ -9352,16 +9528,15 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
   if (count > (HOST_WIDE_INT_1U << 30))
     return false;
 
-  have_as = !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst));
+  dst_as = MEM_ADDR_SPACE (dst);
   if (!issetmem)
-    have_as |= !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src));
+    src_as = MEM_ADDR_SPACE (src);
 
   /* Step 0: Decide on preferred algorithm, desired alignment and
      size of chunks to be copied by main loop.  */
   alg = decide_alg (count, expected_size, min_size, probable_max_size,
-		    issetmem,
-		    issetmem && val_exp == const0_rtx, have_as,
-		    &dynamic_check, &noalign, false);
+		    issetmem, issetmem && val_exp == const0_rtx,
+		    dst_as, src_as, &dynamic_check, &noalign, false);
 
   if (dump_file)
     fprintf (dump_file, "Selected stringop expansion strategy: %s\n",
@@ -9371,11 +9546,6 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
     return false;
   gcc_assert (alg != no_stringop);
 
-  /* For now vector-version of memset is generated only for memory zeroing, as
-     creating of promoted vector value is very cheap in this case.  */
-  if (issetmem && alg == vector_loop && val_exp != const0_rtx)
-    alg = unrolled_loop;
-
   if (!count)
     count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
   destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
@@ -9384,6 +9554,7 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
 
   unroll_factor = 1;
   move_mode = word_mode;
+  int nunits;
   switch (alg)
     {
     case libcall:
@@ -9404,27 +9575,15 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
     case vector_loop:
       need_zero_guard = true;
       unroll_factor = 4;
-      /* Find the widest supported mode.  */
-      move_mode = word_mode;
-      while (GET_MODE_WIDER_MODE (move_mode).exists (&wider_mode)
-	     && optab_handler (mov_optab, wider_mode) != CODE_FOR_nothing)
-	move_mode = wider_mode;
-
-      if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (move_mode) > 128)
-	move_mode = TImode;
-      if (TARGET_AVX512_SPLIT_REGS && GET_MODE_BITSIZE (move_mode) > 256)
-	move_mode = OImode;
-
-      /* Find the corresponding vector mode with the same size as MOVE_MODE.
-	 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.).  */
-      if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
+      /* Get the vector mode to move STORE_MAX_PIECES/MOVE_MAX bytes.  */
+      nunits = issetmem ? STORE_MAX_PIECES : MOVE_MAX;
+      nunits /= GET_MODE_SIZE (word_mode);
+      if (nunits > 1)
 	{
-	  int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
-	  if (!mode_for_vector (word_mode, nunits).exists (&move_mode)
-	      || optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
-	    move_mode = word_mode;
+	  move_mode = mode_for_vector (word_mode, nunits).require ();
+	  gcc_assert (optab_handler (mov_optab, move_mode)
+		      != CODE_FOR_nothing);
 	}
-      gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
       break;
     case rep_prefix_8_byte:
       move_mode = DImode;
@@ -9480,20 +9639,41 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
        && ((desired_align > align && !align_bytes)
 	   || (!count && epilogue_size_needed > 1)));
 
+  /* Destination is aligned after the misaligned prologue.  */
+  bool aligned_dstmem = misaligned_prologue_used;
+
+  if (noalign && !misaligned_prologue_used)
+    {
+      /* Also use misaligned prologue if alignment isn't needed and
+	 destination isn't aligned.   Since alignment isn't needed,
+	 the destination after prologue won't be aligned.  */
+      aligned_dstmem = (GET_MODE_ALIGNMENT (move_mode)
+			<= MEM_ALIGN (dst));
+      if (!aligned_dstmem)
+	misaligned_prologue_used = true;
+    }
+
   /* Do the cheap promotion to allow better CSE across the
      main loop and epilogue (ie one load of the big constant in the
      front of all code.
      For now the misaligned move sequences do not have fast path
      without broadcasting.  */
-  if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
+  if (issetmem
+      && (alg == vector_loop
+	  || CONST_INT_P (val_exp)
+	  || misaligned_prologue_used))
     {
       if (alg == vector_loop)
 	{
-	  gcc_assert (val_exp == const0_rtx);
-	  vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
 	  promoted_val = promote_duplicated_reg_to_size (val_exp,
 							 GET_MODE_SIZE (word_mode),
 							 desired_align, align);
+	  /* Duplicate the promoted scalar value if not 0 nor -1.  */
+	  vec_promoted_val
+	    = promote_duplicated_reg (move_mode,
+				      (val_exp == const0_rtx
+				       || val_exp == constm1_rtx)
+				      ? val_exp : promoted_val);
 	}
       else
 	{
@@ -9518,7 +9698,8 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
       if (!issetmem)
         src = change_address (src, BLKmode, srcreg);
       dst = change_address (dst, BLKmode, destreg);
-      set_mem_align (dst, desired_align * BITS_PER_UNIT);
+      if (aligned_dstmem)
+	set_mem_align (dst, desired_align * BITS_PER_UNIT);
       epilogue_size_needed = 0;
       if (need_zero_guard
 	  && min_size < (unsigned HOST_WIDE_INT) size_needed)
@@ -10066,7 +10247,7 @@ construct_plt_address (rtx symbol)
 {
   rtx tmp, unspec;
 
-  gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
+  gcc_assert (SYMBOL_REF_P (symbol));
   gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
   gcc_assert (Pmode == DImode);
 
@@ -10100,7 +10281,7 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
   tree fndecl;
   bool call_no_callee_saved_registers = false;
 
-  if (GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
+  if (SYMBOL_REF_P (XEXP (fnaddr, 0)))
     {
       fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0));
       if (fndecl)
@@ -10108,9 +10289,11 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
 	  if (lookup_attribute ("interrupt",
 				TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
 	    error ("interrupt service routine cannot be called directly");
-	  else if (lookup_attribute ("no_callee_saved_registers",
-				     TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
+	  else if (ix86_type_no_callee_saved_registers_p (TREE_TYPE (fndecl)))
 	    call_no_callee_saved_registers = true;
+	  if (fndecl == current_function_decl
+	      && decl_binds_to_current_def_p (fndecl))
+	    cfun->machine->recursive_function = true;
 	}
     }
   else
@@ -10120,8 +10303,7 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
 	  tree mem_expr = MEM_EXPR (fnaddr);
 	  if (mem_expr != nullptr
 	      && TREE_CODE (mem_expr) == MEM_REF
-	      && lookup_attribute ("no_callee_saved_registers",
-				   TYPE_ATTRIBUTES (TREE_TYPE (mem_expr))))
+	      && ix86_type_no_callee_saved_registers_p (TREE_TYPE (mem_expr)))
 	    call_no_callee_saved_registers = true;
 	}
 
@@ -10136,7 +10318,7 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
   if (TARGET_MACHO && !TARGET_64BIT)
     {
 #if TARGET_MACHO
-      if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
+      if (flag_pic && SYMBOL_REF_P (XEXP (fnaddr, 0)))
 	fnaddr = machopic_indirect_call_target (fnaddr);
 #endif
     }
@@ -10146,7 +10328,7 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
 	 check if PLT was explicitly avoided via no-plt or "noplt" attribute, making
 	 it an indirect call.  */
       if (flag_pic
-	  && GET_CODE (addr) == SYMBOL_REF
+	  && SYMBOL_REF_P (addr)
 	  && ix86_call_use_plt_p (addr))
 	{
 	  if (flag_plt
@@ -10220,7 +10402,7 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
   if (ix86_cmodel == CM_LARGE_PIC
       && !TARGET_PECOFF
       && MEM_P (fnaddr)
-      && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
+      && SYMBOL_REF_P (XEXP (fnaddr, 0))
       && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
     fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
   /* Since x32 GOT slot is 64 bit with zero upper 32 bits, indirect
@@ -10323,7 +10505,7 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
     }
 
   if (TARGET_MACHO && TARGET_64BIT && !sibcall
-      && ((GET_CODE (addr) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (addr))
+      && ((SYMBOL_REF_P (addr) && !SYMBOL_REF_LOCAL_P (addr))
 	  || !fndecl || TREE_PUBLIC (fndecl)))
     {
       /* We allow public functions defined in a TU to bind locally for PIC
@@ -10346,6 +10528,7 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
       char c_mask = CALL_USED_REGISTERS_MASK (is_64bit_ms_abi);
       for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++)
 	if (!fixed_regs[i]
+	    && i != HARD_FRAME_POINTER_REGNUM
 	    && !(ix86_call_used_regs[i] == 1
 		 || (ix86_call_used_regs[i] & c_mask))
 	    && !STACK_REGNO_P (i)
@@ -11244,6 +11427,54 @@ fixup_modeless_constant (rtx x, machine_mode mode)
   return x;
 }
 
+/* Expand the outgoing argument ARG to extract unsigned char and short
+   integer constants suitable for the predicates and the instruction
+   templates which expect the unsigned expanded value.  */
+
+static rtx
+ix86_expand_unsigned_small_int_cst_argument (tree arg)
+{
+  /* When passing 0xff as an unsigned char function argument with the
+     C frontend promotion, expand_normal gets
+
+     <integer_cst 0x7fffe6aa23a8 type <integer_type 0x7fffe98225e8 int> constant 255>
+
+     and returns the rtx value using the sign-extended representation:
+
+     (const_int 255 [0xff])
+
+     Without the C frontend promotion, expand_normal gets
+
+     <integer_cst 0x7fffe9824018 type <integer_type 0x7fffe9822348 unsigned char > constant 255>
+
+     and returns
+
+     (const_int -1 [0xffffffffffffffff])
+
+     which doesn't work with the predicates nor the instruction templates
+     which expect the unsigned expanded value.  Extract the unsigned char
+     and short integer constants to return
+
+     (const_int 255 [0xff])
+
+     so that the expanded value is always unsigned, without the C frontend
+     promotion.  */
+
+  if (TREE_CODE (arg) == INTEGER_CST)
+    {
+      tree type = TREE_TYPE (arg);
+      if (INTEGRAL_TYPE_P (type)
+	  && TYPE_UNSIGNED (type)
+	  && TYPE_PRECISION (type) < TYPE_PRECISION (integer_type_node))
+	{
+	  HOST_WIDE_INT cst = TREE_INT_CST_LOW (arg);
+	  return GEN_INT (cst);
+	}
+    }
+
+  return expand_normal (arg);
+}
+
 /* Subroutine of ix86_expand_builtin to take care of insns with
    variable number of operands.  */
 
@@ -12142,7 +12373,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
   for (i = 0; i < nargs; i++)
     {
       tree arg = CALL_EXPR_ARG (exp, i);
-      rtx op = expand_normal (arg);
+      rtx op = ix86_expand_unsigned_small_int_cst_argument (arg);
       machine_mode mode = insn_p->operand[i + 1].mode;
       /* Need to fixup modeless constant before testing predicate.  */
       op = fixup_modeless_constant (op, mode);
@@ -12383,7 +12614,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
 static rtx
 ix86_erase_embedded_rounding (rtx pat)
 {
-  if (GET_CODE (pat) == INSN)
+  if (NONJUMP_INSN_P (pat))
     pat = PATTERN (pat);
 
   gcc_assert (GET_CODE (pat) == SET);
@@ -12837,7 +13068,7 @@ ix86_expand_round_builtin (const struct builtin_description *d,
   for (i = 0; i < nargs; i++)
     {
       tree arg = CALL_EXPR_ARG (exp, i);
-      rtx op = expand_normal (arg);
+      rtx op = ix86_expand_unsigned_small_int_cst_argument (arg);
       machine_mode mode = insn_p->operand[i + 1].mode;
       bool match = insn_p->operand[i + 1].predicate (op, mode);
 
@@ -13322,7 +13553,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
       machine_mode mode = insn_p->operand[i + 1].mode;
 
       arg = CALL_EXPR_ARG (exp, i + arg_adjust);
-      op = expand_normal (arg);
+      op = ix86_expand_unsigned_small_int_cst_argument (arg);
 
       if (i == memory)
 	{
@@ -15466,7 +15697,7 @@ rdseed_step:
       op0 = expand_normal (arg0);
       op1 = expand_normal (arg1);
       op2 = expand_normal (arg2);
-      op3 = expand_normal (arg3);
+      op3 = ix86_expand_unsigned_small_int_cst_argument (arg3);
       op4 = expand_normal (arg4);
       /* Note the arg order is different from the operand order.  */
       mode0 = insn_data[icode].operand[1].mode;
@@ -15681,7 +15912,7 @@ rdseed_step:
       arg3 = CALL_EXPR_ARG (exp, 3);
       arg4 = CALL_EXPR_ARG (exp, 4);
       op0 = expand_normal (arg0);
-      op1 = expand_normal (arg1);
+      op1 = ix86_expand_unsigned_small_int_cst_argument (arg1);
       op2 = expand_normal (arg2);
       op3 = expand_normal (arg3);
       op4 = expand_normal (arg4);
@@ -16130,7 +16361,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target,
     {
     case VEC_BCAST_PXOR:
       if ((mode == V8SImode && !TARGET_AVX2)
-	  || (mode == V16SImode && !(TARGET_AVX512F && TARGET_EVEX512)))
+	  || (mode == V16SImode && !TARGET_AVX512F))
 	return false;
       emit_move_insn (target, CONST0_RTX (mode));
       return true;
@@ -16138,7 +16369,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target,
     case VEC_BCAST_PCMPEQ:
       if ((mode == V4SImode && !TARGET_SSE2)
 	  || (mode == V8SImode && !TARGET_AVX2)
-	  || (mode == V16SImode && !(TARGET_AVX512F && TARGET_EVEX512)))
+	  || (mode == V16SImode && !TARGET_AVX512F))
 	return false;
       emit_move_insn (target, CONSTM1_RTX (mode));
       return true;
@@ -16158,7 +16389,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target,
 	  tmp2 = gen_reg_rtx (V32QImode);
 	  emit_insn (gen_absv32qi2 (tmp2, tmp1));
 	}
-      else if (mode == V16SImode && TARGET_AVX512BW && TARGET_EVEX512)
+      else if (mode == V16SImode && TARGET_AVX512BW)
 	{
 	  tmp1 = gen_reg_rtx (V64QImode);
 	  emit_move_insn (tmp1, CONSTM1_RTX (V64QImode));
@@ -16184,7 +16415,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target,
 	  tmp2 = gen_reg_rtx (V32QImode);
 	  emit_insn (gen_addv32qi3 (tmp2, tmp1, tmp1));
 	}
-      else if (mode == V16SImode && TARGET_AVX512BW && TARGET_EVEX512)
+      else if (mode == V16SImode && TARGET_AVX512BW)
 	{
 	  tmp1 = gen_reg_rtx (V64QImode);
 	  emit_move_insn (tmp1, CONSTM1_RTX (V64QImode));
@@ -16210,7 +16441,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target,
 	  tmp2 = gen_reg_rtx (V16HImode);
 	  emit_insn (gen_lshrv16hi3 (tmp2, tmp1, GEN_INT (entry->arg)));
 	}
-      else if (mode == V16SImode && TARGET_AVX512BW && TARGET_EVEX512)
+      else if (mode == V16SImode && TARGET_AVX512BW)
 	{
 	  tmp1 = gen_reg_rtx (V32HImode);
 	  emit_move_insn (tmp1, CONSTM1_RTX (V32HImode));
@@ -16236,7 +16467,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target,
 	  emit_insn (gen_lshrv8si3 (target, tmp1, GEN_INT (entry->arg)));
 	  return true;
 	}
-      else if (mode == V16SImode && TARGET_AVX512F && TARGET_EVEX512)
+      else if (mode == V16SImode && TARGET_AVX512F)
 	{
 	  tmp1 = gen_reg_rtx (V16SImode);
 	  emit_move_insn (tmp1, CONSTM1_RTX (V16SImode));
@@ -16262,7 +16493,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target,
 	  tmp2 = gen_reg_rtx (V16HImode);
 	  emit_insn (gen_ashlv16hi3 (tmp2, tmp1, GEN_INT (entry->arg)));
 	}
-      else if (mode == V16SImode && TARGET_AVX512BW && TARGET_EVEX512)
+      else if (mode == V16SImode && TARGET_AVX512BW)
 	{
 	  tmp1 = gen_reg_rtx (V32HImode);
 	  emit_move_insn (tmp1, CONSTM1_RTX (V32HImode));
@@ -16288,7 +16519,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target,
 	  emit_insn (gen_ashlv8si3 (target, tmp1, GEN_INT (entry->arg)));
 	  return true;
 	}
-      else if (mode == V16SImode && TARGET_AVX512F && TARGET_EVEX512)
+      else if (mode == V16SImode && TARGET_AVX512F)
 	{
 	  tmp1 = gen_reg_rtx (V16SImode);
 	  emit_move_insn (tmp1, CONSTM1_RTX (V16SImode));
@@ -16342,8 +16573,7 @@ ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
       if (GET_MODE (reg) != innermode)
 	reg = gen_lowpart (innermode, reg);
       SET_SRC (PATTERN (insn)) = gen_vec_duplicate (mode, reg);
-      seq = get_insns ();
-      end_sequence ();
+      seq = end_sequence ();
       if (seq)
 	emit_insn_before (seq, insn);
 
@@ -16659,7 +16889,6 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
 
     case E_V32HFmode:
     case E_V32BFmode:
-      gcc_assert (TARGET_EVEX512);
       if (TARGET_AVX512BW)
 	return ix86_vector_duplicate_value (mode, target, val);
       else
@@ -16712,9 +16941,6 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
   bool use_vector_set = false;
   rtx (*gen_vec_set_0) (rtx, rtx, rtx) = NULL;
 
-  if (GET_MODE_SIZE (mode) == 64 && !TARGET_EVEX512)
-    return false;
-
   switch (mode)
     {
     case E_V2DImode:
@@ -18670,6 +18896,33 @@ emit_reduc_half (rtx dest, rtx src, int i)
     case E_V8HFmode:
     case E_V4SImode:
     case E_V2DImode:
+      if (TARGET_SSE_REDUCTION_PREFER_PSHUF)
+	{
+	  if (i == 128)
+	    {
+	      d = gen_reg_rtx (V4SImode);
+	      tem = gen_sse2_pshufd_1 (
+		  d, force_reg (V4SImode, gen_lowpart (V4SImode, src)),
+		  GEN_INT (2), GEN_INT (3), GEN_INT (2), GEN_INT (3));
+	      break;
+	    }
+	  else if (i == 64)
+	    {
+	      d = gen_reg_rtx (V4SImode);
+	      tem = gen_sse2_pshufd_1 (
+		  d, force_reg (V4SImode, gen_lowpart (V4SImode, src)),
+		  GEN_INT (1), GEN_INT (1), GEN_INT (1), GEN_INT (1));
+	      break;
+	    }
+	  else if (i == 32)
+	    {
+	      d = gen_reg_rtx (V8HImode);
+	      tem = gen_sse2_pshuflw_1 (
+		  d, force_reg (V8HImode, gen_lowpart (V8HImode, src)),
+		  GEN_INT (1), GEN_INT (1), GEN_INT (1), GEN_INT (1));
+	      break;
+	    }
+	}
       d = gen_reg_rtx (V1TImode);
       tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
 				GEN_INT (i / 2));
@@ -19256,8 +19509,6 @@ ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
   e1 = gen_reg_rtx (mode);
   x1 = gen_reg_rtx (mode);
 
-  /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
-
   b = force_reg (mode, b);
 
   /* x0 = rcp(b) estimate */
@@ -19270,20 +19521,42 @@ ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
     emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
 						UNSPEC_RCP)));
 
-  /* e0 = x0 * b */
-  emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
+  unsigned vector_size = GET_MODE_SIZE (mode);
 
-  /* e0 = x0 * e0 */
-  emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
+  /* (a - (rcp(b) * a * b)) * rcp(b) + rcp(b) * a
+     N-R step with 2 fma implementation.  */
+  if (TARGET_FMA
+      || (TARGET_AVX512F && vector_size == 64)
+      || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
+    {
+      /* e0 = x0 * a  */
+      emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
+      /* e1 = e0 * b - a  */
+      emit_insn (gen_rtx_SET (e1, gen_rtx_FMA (mode, e0, b,
+					       gen_rtx_NEG (mode, a))));
+      /* res = - e1 * x0 + e0  */
+      emit_insn (gen_rtx_SET (res, gen_rtx_FMA (mode,
+					       gen_rtx_NEG (mode, e1),
+					       x0, e0)));
+    }
+  else
+    /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
+    {
+      /* e0 = x0 * b */
+      emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
+
+      /* e1 = x0 + x0 */
+      emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
 
-  /* e1 = x0 + x0 */
-  emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
+      /* e0 = x0 * e0 */
+      emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
 
-  /* x1 = e1 - e0 */
-  emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
+      /* x1 = e1 - e0 */
+      emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
 
-  /* res = a * x1 */
-  emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
+      /* res = a * x1 */
+      emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
+    }
 }
 
 /* Output code to perform a Newton-Rhapson approximation of a
@@ -19356,7 +19629,7 @@ ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip)
 
   unsigned vector_size = GET_MODE_SIZE (mode);
   if (TARGET_FMA
-      || (TARGET_AVX512F && TARGET_EVEX512 && vector_size == 64)
+      || (TARGET_AVX512F && vector_size == 64)
       || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
     emit_insn (gen_rtx_SET (e2,
 			    gen_rtx_FMA (mode, e0, x0, mthree)));
@@ -22018,8 +22291,7 @@ expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
      V4SImode this *will* succeed.  For V8HImode or V16QImode it may not.  */
   start_sequence ();
   ok = expand_vec_perm_1 (&dfinal);
-  seq = get_insns ();
-  end_sequence ();
+  seq = end_sequence ();
 
   if (!ok)
     return false;
@@ -22355,8 +22627,7 @@ expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
 
   start_sequence ();
   ok = expand_vec_perm_1 (&dfirst);
-  seq = get_insns ();
-  end_sequence ();
+  seq = end_sequence ();
 
   if (!ok)
     return false;
@@ -22464,8 +22735,7 @@ expand_vec_perm_2perm_interleave (struct expand_vec_perm_d *d, bool two_insn)
     {
       start_sequence ();
       ok = expand_vec_perm_1 (&dfirst);
-      seq1 = get_insns ();
-      end_sequence ();
+      seq1 = end_sequence ();
 
       if (!ok)
 	return false;
@@ -22475,8 +22745,7 @@ expand_vec_perm_2perm_interleave (struct expand_vec_perm_d *d, bool two_insn)
     {
       start_sequence ();
       ok = expand_vec_perm_1 (&dsecond);
-      seq2 = get_insns ();
-      end_sequence ();
+      seq2 = end_sequence ();
 
       if (!ok)
 	return false;
@@ -22590,8 +22859,7 @@ expand_vec_perm_2perm_pblendv (struct expand_vec_perm_d *d, bool two_insn)
     {
       start_sequence ();
       ok = expand_vec_perm_1 (&dfirst);
-      seq1 = get_insns ();
-      end_sequence ();
+      seq1 = end_sequence ();
 
       if (!ok)
 	return false;
@@ -22601,8 +22869,7 @@ expand_vec_perm_2perm_pblendv (struct expand_vec_perm_d *d, bool two_insn)
     {
       start_sequence ();
       ok = expand_vec_perm_1 (&dsecond);
-      seq2 = get_insns ();
-      end_sequence ();
+      seq2 = end_sequence ();
 
       if (!ok)
 	return false;
@@ -22796,8 +23063,7 @@ expand_vec_perm2_vperm2f128_vblend (struct expand_vec_perm_d *d)
   canonicalize_perm (&dfirst);
   start_sequence ();
   ok = ix86_expand_vec_perm_const_1 (&dfirst);
-  seq1 = get_insns ();
-  end_sequence ();
+  seq1 = end_sequence ();
 
   if (!ok)
     return false;
@@ -22805,8 +23071,7 @@ expand_vec_perm2_vperm2f128_vblend (struct expand_vec_perm_d *d)
   canonicalize_perm (&dsecond);
   start_sequence ();
   ok = ix86_expand_vec_perm_const_1 (&dsecond);
-  seq2 = get_insns ();
-  end_sequence ();
+  seq2 = end_sequence ();
 
   if (!ok)
     return false;
@@ -24290,9 +24555,6 @@ ix86_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
   unsigned int i, nelt, which;
   bool two_args;
 
-  if (GET_MODE_SIZE (vmode) == 64 && !TARGET_EVEX512)
-    return false;
-
   /* For HF and BF mode vector, convert it to HI using subreg.  */
   if (GET_MODE_INNER (vmode) == HFmode || GET_MODE_INNER (vmode) == BFmode)
     {
@@ -24834,7 +25096,6 @@ ix86_expand_vecop_qihi2 (enum rtx_code code, rtx dest, rtx op1, rtx op2)
      ix86_expand_vecop_qihi.  */
   if (!TARGET_AVX512BW
       || (qimode == V16QImode && !TARGET_AVX512VL)
-      || (qimode == V32QImode && !TARGET_EVEX512)
       /* There are no V64HImode instructions.  */
       || qimode == V64QImode)
      return false;
@@ -25068,7 +25329,7 @@ const_vector_equal_evenodd_p (rtx op)
 {
   machine_mode mode = GET_MODE (op);
   int i, nunits = GET_MODE_NUNITS (mode);
-  if (GET_CODE (op) != CONST_VECTOR
+  if (!CONST_VECTOR_P (op)
       || nunits != CONST_VECTOR_NUNITS (op))
     return false;
   for (i = 0; i < nunits; i += 2)
@@ -25303,7 +25564,7 @@ ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
   machine_mode mode = GET_MODE (op0);
   rtx t1, t2, t3, t4, t5, t6;
 
-  if (TARGET_AVX512DQ && TARGET_EVEX512 && mode == V8DImode)
+  if (TARGET_AVX512DQ && mode == V8DImode)
     emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
   else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
     emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
@@ -25411,7 +25672,7 @@ ix86_notrack_prefixed_insn_p (rtx_insn *insn)
 
       /* Do not emit 'notrack' if it's not an indirect call.  */
       if (MEM_P (addr)
-	  && GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
+	  && SYMBOL_REF_P (XEXP (addr, 0)))
 	return false;
       else
 	return find_reg_note (insn, REG_CALL_NOCF_CHECK, 0);
@@ -26033,8 +26294,7 @@ ix86_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
 	}
     }
 
-  *prep_seq = get_insns ();
-  end_sequence ();
+  *prep_seq = end_sequence ();
 
   start_sequence ();
 
@@ -26045,8 +26305,7 @@ ix86_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
       end_sequence ();
       return NULL_RTX;
     }
-  *gen_seq = get_insns ();
-  end_sequence ();
+  *gen_seq = end_sequence ();
 
   return res;
 }
@@ -26089,8 +26348,7 @@ ix86_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
       return NULL_RTX;
     }
 
-  *prep_seq = get_insns ();
-  end_sequence ();
+  *prep_seq = end_sequence ();
 
   target = gen_rtx_REG (cc_mode, FLAGS_REG);
   dfv = ix86_get_flags_cc ((rtx_code) cmp_code);
@@ -26121,8 +26379,7 @@ ix86_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
       return NULL_RTX;
     }
 
-  *gen_seq = get_insns ();
-  end_sequence ();
+  *gen_seq = end_sequence ();
 
   return gen_rtx_fmt_ee ((rtx_code) cmp_code, VOIDmode, target, const0_rtx);
 }
@@ -26136,8 +26393,7 @@ ix86_gen_bcst_mem (machine_mode mode, rtx x)
 {
   if (!TARGET_AVX512F
       || !CONST_VECTOR_P (x)
-      || (!TARGET_AVX512VL
-	  && (GET_MODE_SIZE (mode) != 64 || !TARGET_EVEX512))
+      || (!TARGET_AVX512VL && GET_MODE_SIZE (mode) != 64)
       || !VALID_BCST_MODE_P (GET_MODE_INNER (mode))
 	 /* Disallow HFmode broadcast.  */
       || GET_MODE_SIZE (GET_MODE_INNER (mode)) < 4)
@@ -26233,8 +26489,8 @@ do_mem_operand:
       if (rtx_equal_p (op, args[2]))
 	return 0xaa;
       /* Check if CONST_VECTOR is the ones-complement of args[2].  */
-      if (GET_CODE (op) == CONST_VECTOR
-	  && GET_CODE (args[2]) == CONST_VECTOR
+      if (CONST_VECTOR_P (op)
+	  && CONST_VECTOR_P (args[2])
 	  && rtx_equal_p (simplify_const_unary_operation (NOT, GET_MODE (op),
 							  op, GET_MODE (op)),
 			  args[2]))
@@ -26247,8 +26503,8 @@ do_mem_operand:
       if (rtx_equal_p (op, args[0]))
 	return 0xf0;
       /* Check if CONST_VECTOR is the ones-complement of args[0].  */
-      if (GET_CODE (op) == CONST_VECTOR
-	  && GET_CODE (args[0]) == CONST_VECTOR
+      if (CONST_VECTOR_P (op)
+	  && CONST_VECTOR_P (args[0])
 	  && rtx_equal_p (simplify_const_unary_operation (NOT, GET_MODE (op),
 							  op, GET_MODE (op)),
 			  args[0]))
@@ -26261,8 +26517,8 @@ do_mem_operand:
       if (rtx_equal_p (op, args[1]))
 	return 0xcc;
       /* Check if CONST_VECTOR is the ones-complement of args[1].  */
-      if (GET_CODE (op) == CONST_VECTOR
-	  && GET_CODE (args[1]) == CONST_VECTOR
+      if (CONST_VECTOR_P (op)
+	  && CONST_VECTOR_P (args[1])
 	  && rtx_equal_p (simplify_const_unary_operation (NOT, GET_MODE (op),
 							  op, GET_MODE (op)),
 			  args[1]))
@@ -26492,15 +26748,6 @@ ix86_expand_ternlog (machine_mode mode, rtx op0, rtx op1, rtx op2, int idx,
 	  && (!op2 || !side_effects_p (op2))
 	  && op0)
 	{
-	  if (GET_MODE (op0) != mode)
-	    op0 = gen_lowpart (mode, op0);
-	  if (!TARGET_64BIT && !register_operand (op0, mode))
-	    {
-	      /* Avoid force_reg (mode, op0).  */
-	      rtx reg = gen_reg_rtx (mode);
-	      emit_move_insn (reg, op0);
-	      op0 = reg;
-	    }
 	  emit_move_insn (target, gen_rtx_XOR (mode, op0, CONSTM1_RTX (mode)));
 	  return target;
 	}
@@ -26525,15 +26772,6 @@ ix86_expand_ternlog (machine_mode mode, rtx op0, rtx op1, rtx op2, int idx,
 	  && (!op2 || !side_effects_p (op2))
 	  && op1)
 	{
-	  if (GET_MODE (op1) != mode)
-	    op1 = gen_lowpart (mode, op1);
-	  if (!TARGET_64BIT && !register_operand (op1, mode))
-	    {
-	      /* Avoid force_reg (mode, op1).  */
-	      rtx reg = gen_reg_rtx (mode);
-	      emit_move_insn (reg, op1);
-	      op1 = reg;
-	    }
 	  emit_move_insn (target, gen_rtx_XOR (mode, op1, CONSTM1_RTX (mode)));
 	  return target;
 	}
@@ -26565,15 +26803,6 @@ ix86_expand_ternlog (machine_mode mode, rtx op0, rtx op1, rtx op2, int idx,
 	  && (!op1 || !side_effects_p (op1))
 	  && op2)
 	{
-	  if (GET_MODE (op2) != mode)
-	    op2 = gen_lowpart (mode, op2);
-	  if (!TARGET_64BIT && !register_operand (op2, mode))
-	    {
-	      /* Avoid force_reg (mode, op2).  */
-	      rtx reg = gen_reg_rtx (mode);
-	      emit_move_insn (reg, op2);
-	      op2 = reg;
-	    }
 	  emit_move_insn (target, gen_rtx_XOR (mode, op2, CONSTM1_RTX (mode)));
 	  return target;
 	}
@@ -26806,6 +27035,109 @@ ix86_expand_ternlog (machine_mode mode, rtx op0, rtx op1, rtx op2, int idx,
   return target;
 }
 
+/* GF2P8AFFINEQB matrixes to implement shift and rotate.  */
+
+static const uint64_t matrix_ashift[8] =
+{
+  0,
+  0x0001020408102040, /* 1 l */
+  0x0000010204081020, /* 2 l */
+  0x0000000102040810, /* 3 l */
+  0x0000000001020408, /* 4 l */
+  0x0000000000010204, /* 5 l */
+  0x0000000000000102, /* 6 l */
+  0x0000000000000001  /* 7 l */
+};
+
+static const uint64_t matrix_lshiftrt[8] =
+{
+  0,
+  0x0204081020408000, /* 1 r */
+  0x0408102040800000, /* 2 r */
+  0x0810204080000000, /* 3 r */
+  0x1020408000000000, /* 4 r */
+  0x2040800000000000, /* 5 r */
+  0x4080000000000000, /* 6 r */
+  0x8000000000000000  /* 7 r */
+};
+
+static const uint64_t matrix_ashiftrt[8] =
+{
+  0,
+  0x0204081020408080, /* 1 r */
+  0x0408102040808080, /* 2 r */
+  0x0810204080808080, /* 3 r */
+  0x1020408080808080, /* 4 r */
+  0x2040808080808080, /* 5 r */
+  0x4080808080808080, /* 6 r */
+  0x8080808080808080  /* 7 r */
+};
+
+static const uint64_t matrix_rotate[8] =
+{
+  0,
+  0x8001020408102040, /* 1 rol8 */
+  0x4080010204081020, /* 2 rol8 */
+  0x2040800102040810, /* 3 rol8 */
+  0x1020408001020408, /* 4 rol8 */
+  0x0810204080010204, /* 5 rol8 */
+  0x0408102040800102, /* 6 rol8 */
+  0x0204081020408001  /* 7 rol8 */
+};
+
+static const uint64_t matrix_rotatert[8] =
+{
+  0,
+  0x0204081020408001, /* 1 ror8 */
+  0x0408102040800102, /* 2 ror8 */
+  0x0810204080010204, /* 3 ror8 */
+  0x1020408001020408, /* 4 ror8 */
+  0x2040800102040810, /* 5 ror8 */
+  0x4080010204081020, /* 6 ror8 */
+  0x8001020408102040  /* 7 ror8 */
+};
+
+/* Return rtx to load a 64bit GF2P8AFFINE GP(2) matrix implementing a shift
+   for CODE and shift count COUNT into register with vector of size of SRC.  */
+
+rtx
+ix86_vgf2p8affine_shift_matrix (rtx src, rtx count, enum rtx_code code)
+{
+  machine_mode mode = GET_MODE (src);
+  const uint64_t *matrix;
+  unsigned shift = INTVAL (count) & 7;
+  gcc_assert (shift > 0 && shift < 8);
+
+  switch (code)
+    {
+    case ASHIFT:
+      matrix = matrix_ashift;
+      break;
+    case ASHIFTRT:
+      matrix = matrix_ashiftrt;
+      break;
+    case LSHIFTRT:
+      matrix = matrix_lshiftrt;
+      break;
+    case ROTATE:
+      matrix = matrix_rotate;
+      break;
+    case ROTATERT:
+      matrix = matrix_rotatert;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  int nelts = GET_MODE_NUNITS (mode);
+  rtvec vec = rtvec_alloc (nelts);
+  uint64_t ma = matrix[shift];
+  for (int i = 0; i < nelts; i++)
+    RTVEC_ELT (vec, i) = gen_int_mode ((ma >> ((i % 8) * 8)) & 0xff, QImode);
+
+  return force_reg (mode, gen_rtx_CONST_VECTOR (mode, vec));
+}
+
 /* Trunc a vector to a narrow vector, like v4di -> v4si.  */
 
 void
diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index c35ac24..0608dd2 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -296,9 +296,8 @@ scalar_chain::scalar_chain (enum machine_mode smode_, enum machine_mode vmode_)
   insns_conv = BITMAP_ALLOC (NULL);
   queue = NULL;
 
-  n_sse_to_integer = 0;
-  n_integer_to_sse = 0;
-
+  cost_sse_integer = 0;
+  weighted_cost_sse_integer = 0 ;
   max_visits = x86_stv_max_visits;
 }
 
@@ -337,20 +336,52 @@ scalar_chain::mark_dual_mode_def (df_ref def)
   /* Record the def/insn pair so we can later efficiently iterate over
      the defs to convert on insns not in the chain.  */
   bool reg_new = bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
+  basic_block bb = BLOCK_FOR_INSN (DF_REF_INSN (def));
+  profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
+  bool speed_p = optimize_bb_for_speed_p (bb);
+  int cost = 0;
+
   if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def)))
     {
       if (!bitmap_set_bit (insns_conv, DF_REF_INSN_UID (def))
 	  && !reg_new)
 	return;
-      n_integer_to_sse++;
+
+      /* Cost integer to sse moves.  */
+      if (speed_p)
+	cost = COSTS_N_INSNS (ix86_cost->integer_to_sse) / 2;
+      else if (TARGET_64BIT || smode == SImode)
+	cost = COSTS_N_BYTES (4);
+      /* vmovd (4 bytes) + vpinsrd (6 bytes).  */
+      else if (TARGET_SSE4_1)
+	cost = COSTS_N_BYTES (10);
+      /* movd (4 bytes) + movd (4 bytes) + unpckldq (4 bytes).  */
+      else
+	cost = COSTS_N_BYTES (12);
     }
   else
     {
       if (!reg_new)
 	return;
-      n_sse_to_integer++;
+
+      /* Cost sse to integer moves.  */
+      if (speed_p)
+	cost = COSTS_N_INSNS (ix86_cost->sse_to_integer) / 2;
+      else if (TARGET_64BIT || smode == SImode)
+	cost = COSTS_N_BYTES (4);
+      /* vmovd (4 bytes) + vpextrd (6 bytes).  */
+      else if (TARGET_SSE4_1)
+	cost = COSTS_N_BYTES (10);
+      /* movd (4 bytes) + psrlq (5 bytes) + movd (4 bytes).  */
+      else
+	cost = COSTS_N_BYTES (13);
     }
 
+  if (speed_p)
+    weighted_cost_sse_integer += bb->count.to_sreal_scale (entry_count) * cost;
+
+  cost_sse_integer += cost;
+
   if (dump_file)
     fprintf (dump_file,
 	     "  Mark r%d def in insn %d as requiring both modes in chain #%d\n",
@@ -518,26 +549,28 @@ scalar_chain::build (bitmap candidates, unsigned insn_uid, bitmap disallowed)
    instead of using a scalar one.  */
 
 int
-general_scalar_chain::vector_const_cost (rtx exp)
+general_scalar_chain::vector_const_cost (rtx exp, basic_block bb)
 {
   gcc_assert (CONST_INT_P (exp));
 
   if (standard_sse_constant_p (exp, vmode))
     return ix86_cost->sse_op;
+  if (optimize_bb_for_size_p (bb))
+    return COSTS_N_BYTES (8);
   /* We have separate costs for SImode and DImode, use SImode costs
      for smaller modes.  */
-  return ix86_cost->sse_load[smode == DImode ? 1 : 0];
+  return COSTS_N_INSNS (ix86_cost->sse_load[smode == DImode ? 1 : 0]) / 2;
 }
 
-/* Compute a gain for chain conversion.  */
+/* Return true if it's cost profitable for chain conversion.  */
 
-int
+bool
 general_scalar_chain::compute_convert_gain ()
 {
   bitmap_iterator bi;
   unsigned insn_uid;
   int gain = 0;
-  int cost = 0;
+  sreal weighted_gain = 0;
 
   if (dump_file)
     fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
@@ -547,7 +580,7 @@ general_scalar_chain::compute_convert_gain ()
      smaller modes than SImode the int load/store costs need to be
      adjusted as well.  */
   unsigned sse_cost_idx = smode == DImode ? 1 : 0;
-  unsigned m = smode == DImode ? (TARGET_64BIT ? 1 : 2) : 1;
+  int m = smode == DImode ? (TARGET_64BIT ? 1 : 2) : 1;
 
   EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
     {
@@ -555,26 +588,58 @@ general_scalar_chain::compute_convert_gain ()
       rtx def_set = single_set (insn);
       rtx src = SET_SRC (def_set);
       rtx dst = SET_DEST (def_set);
+      basic_block bb = BLOCK_FOR_INSN (insn);
       int igain = 0;
+      profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
+      bool speed_p = optimize_bb_for_speed_p (bb);
+      sreal bb_freq = bb->count.to_sreal_scale (entry_count);
 
       if (REG_P (src) && REG_P (dst))
-	igain += 2 * m - ix86_cost->xmm_move;
+	{
+	  if (!speed_p)
+	    /* reg-reg move is 2 bytes, while SSE 3.  */
+	    igain += COSTS_N_BYTES (2 * m - 3);
+	  else
+	    /* Move costs are normalized to reg-reg move having cost 2.  */
+	    igain += COSTS_N_INSNS (2 * m - ix86_cost->xmm_move) / 2;
+	}
       else if (REG_P (src) && MEM_P (dst))
-	igain
-	  += m * ix86_cost->int_store[2] - ix86_cost->sse_store[sse_cost_idx];
+	{
+	  if (!speed_p)
+	    /* Integer load/store is 3+ bytes and SSE 4+.  */
+	    igain += COSTS_N_BYTES (3 * m - 4);
+	  else
+	    igain
+	      += COSTS_N_INSNS (m * ix86_cost->int_store[2]
+				- ix86_cost->sse_store[sse_cost_idx]) / 2;
+	}
       else if (MEM_P (src) && REG_P (dst))
-	igain += m * ix86_cost->int_load[2] - ix86_cost->sse_load[sse_cost_idx];
+	{
+	  if (!speed_p)
+	    igain += COSTS_N_BYTES (3 * m - 4);
+	  else
+	    igain += COSTS_N_INSNS (m * ix86_cost->int_load[2]
+				    - ix86_cost->sse_load[sse_cost_idx]) / 2;
+	}
       else
 	{
 	  /* For operations on memory operands, include the overhead
 	     of explicit load and store instructions.  */
 	  if (MEM_P (dst))
-	    igain += optimize_insn_for_size_p ()
-		     ? -COSTS_N_BYTES (8)
-		     : (m * (ix86_cost->int_load[2]
-			     + ix86_cost->int_store[2])
-			- (ix86_cost->sse_load[sse_cost_idx] +
-			   ix86_cost->sse_store[sse_cost_idx]));
+	    {
+	      if (!speed_p)
+		/* ??? This probably should account size difference
+		   of SSE and integer load rather than full SSE load.  */
+		igain -= COSTS_N_BYTES (8);
+	      else
+		{
+		  int cost = (m * (ix86_cost->int_load[2]
+				   + ix86_cost->int_store[2])
+			     - (ix86_cost->sse_load[sse_cost_idx] +
+				ix86_cost->sse_store[sse_cost_idx]));
+		  igain += COSTS_N_INSNS (cost) / 2;
+		}
+	    }
 
 	  switch (GET_CODE (src))
 	    {
@@ -595,7 +660,7 @@ general_scalar_chain::compute_convert_gain ()
 	      igain += ix86_cost->shift_const - ix86_cost->sse_op;
 
 	      if (CONST_INT_P (XEXP (src, 0)))
-		igain -= vector_const_cost (XEXP (src, 0));
+		igain -= vector_const_cost (XEXP (src, 0), bb);
 	      break;
 
 	    case ROTATE:
@@ -631,16 +696,17 @@ general_scalar_chain::compute_convert_gain ()
 		igain += m * ix86_cost->add;
 
 	      if (CONST_INT_P (XEXP (src, 0)))
-		igain -= vector_const_cost (XEXP (src, 0));
+		igain -= vector_const_cost (XEXP (src, 0), bb);
 	      if (CONST_INT_P (XEXP (src, 1)))
-		igain -= vector_const_cost (XEXP (src, 1));
+		igain -= vector_const_cost (XEXP (src, 1), bb);
 	      if (MEM_P (XEXP (src, 1)))
 		{
-		  if (optimize_insn_for_size_p ())
+		  if (!speed_p)
 		    igain -= COSTS_N_BYTES (m == 2 ? 3 : 5);
 		  else
-		    igain += m * ix86_cost->int_load[2]
-			     - ix86_cost->sse_load[sse_cost_idx];
+		    igain += COSTS_N_INSNS
+			       (m * ix86_cost->int_load[2]
+				 - ix86_cost->sse_load[sse_cost_idx]) / 2;
 		}
 	      break;
 
@@ -698,7 +764,7 @@ general_scalar_chain::compute_convert_gain ()
 	    case CONST_INT:
 	      if (REG_P (dst))
 		{
-		  if (optimize_insn_for_size_p ())
+		  if (!speed_p)
 		    {
 		      /* xor (2 bytes) vs. xorps (3 bytes).  */
 		      if (src == const0_rtx)
@@ -722,14 +788,14 @@ general_scalar_chain::compute_convert_gain ()
 		      /* DImode can be immediate for TARGET_64BIT
 			 and SImode always.  */
 		      igain += m * COSTS_N_INSNS (1);
-		      igain -= vector_const_cost (src);
+		      igain -= vector_const_cost (src, bb);
 		    }
 		}
 	      else if (MEM_P (dst))
 		{
 		  igain += (m * ix86_cost->int_store[2]
 			    - ix86_cost->sse_store[sse_cost_idx]);
-		  igain -= vector_const_cost (src);
+		  igain -= vector_const_cost (src, bb);
 		}
 	      break;
 
@@ -737,13 +803,14 @@ general_scalar_chain::compute_convert_gain ()
 	      if (XVECEXP (XEXP (src, 1), 0, 0) == const0_rtx)
 		{
 		  // movd (4 bytes) replaced with movdqa (4 bytes).
-		  if (!optimize_insn_for_size_p ())
-		    igain += ix86_cost->sse_to_integer - ix86_cost->xmm_move;
+		  if (!!speed_p)
+		    igain += COSTS_N_INSNS (ix86_cost->sse_to_integer
+					    - ix86_cost->xmm_move) / 2;
 		}
 	      else
 		{
 		  // pshufd; movd replaced with pshufd.
-		  if (optimize_insn_for_size_p ())
+		  if (!speed_p)
 		    igain += COSTS_N_BYTES (4);
 		  else
 		    igain += ix86_cost->sse_to_integer;
@@ -755,55 +822,34 @@ general_scalar_chain::compute_convert_gain ()
 	    }
 	}
 
+      if (speed_p)
+	weighted_gain += bb_freq * igain;
+      gain += igain;
+
       if (igain != 0 && dump_file)
 	{
-	  fprintf (dump_file, "  Instruction gain %d for ", igain);
+	  fprintf (dump_file, "  Instruction gain %d with bb_freq %.2f for",
+		   igain, bb_freq.to_double ());
 	  dump_insn_slim (dump_file, insn);
 	}
-      gain += igain;
     }
 
   if (dump_file)
-    fprintf (dump_file, "  Instruction conversion gain: %d\n", gain);
-
-  /* Cost the integer to sse and sse to integer moves.  */
-  if (!optimize_function_for_size_p (cfun))
-    {
-      cost += n_sse_to_integer * ix86_cost->sse_to_integer;
-      /* ???  integer_to_sse but we only have that in the RA cost table.
-	      Assume sse_to_integer/integer_to_sse are the same which they
-	      are at the moment.  */
-      cost += n_integer_to_sse * ix86_cost->sse_to_integer;
-    }
-  else if (TARGET_64BIT || smode == SImode)
     {
-      cost += n_sse_to_integer * COSTS_N_BYTES (4);
-      cost += n_integer_to_sse * COSTS_N_BYTES (4);
+      fprintf (dump_file, "  Instruction conversion gain: %d, \n",
+	       gain);
+      fprintf (dump_file, "  Registers conversion cost: %d\n",
+	       cost_sse_integer);
+      fprintf (dump_file, "  Weighted instruction conversion gain: %.2f, \n",
+	       weighted_gain.to_double ());
+      fprintf (dump_file, "  Weighted registers conversion cost: %.2f\n",
+	       weighted_cost_sse_integer.to_double ());
     }
-  else if (TARGET_SSE4_1)
-    {
-      /* vmovd (4 bytes) + vpextrd (6 bytes).  */
-      cost += n_sse_to_integer * COSTS_N_BYTES (10);
-      /* vmovd (4 bytes) + vpinsrd (6 bytes).  */
-      cost += n_integer_to_sse * COSTS_N_BYTES (10);
-    }
-  else
-    {
-      /* movd (4 bytes) + psrlq (5 bytes) + movd (4 bytes).  */
-      cost += n_sse_to_integer * COSTS_N_BYTES (13);
-      /* movd (4 bytes) + movd (4 bytes) + unpckldq (4 bytes).  */
-      cost += n_integer_to_sse * COSTS_N_BYTES (12);
-    }
-
-  if (dump_file)
-    fprintf (dump_file, "  Registers conversion cost: %d\n", cost);
-
-  gain -= cost;
 
-  if (dump_file)
-    fprintf (dump_file, "  Total gain: %d\n", gain);
-
-  return gain;
+  if (weighted_gain != weighted_cost_sse_integer)
+    return weighted_gain > weighted_cost_sse_integer;
+  else
+    return gain > cost_sse_integer;;
 }
 
 /* Insert generated conversion instruction sequence INSNS
@@ -902,8 +948,7 @@ scalar_chain::make_vector_copies (rtx_insn *insn, rtx reg)
   else
     emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0),
 			    gen_gpr_to_xmm_move_src (vmode, reg)));
-  rtx_insn *seq = get_insns ();
-  end_sequence ();
+  rtx_insn *seq = end_sequence ();
   emit_conversion_insns (seq, insn);
 
   if (dump_file)
@@ -970,8 +1015,7 @@ scalar_chain::convert_reg (rtx_insn *insn, rtx dst, rtx src)
   else
     emit_move_insn (dst, src);
 
-  rtx_insn *seq = get_insns ();
-  end_sequence ();
+  rtx_insn *seq = end_sequence ();
   emit_conversion_insns (seq, insn);
 
   if (dump_file)
@@ -1066,8 +1110,7 @@ scalar_chain::convert_op (rtx *op, rtx_insn *insn)
 	{
 	  start_sequence ();
 	  vec_cst = validize_mem (force_const_mem (vmode, vec_cst));
-	  rtx_insn *seq = get_insns ();
-	  end_sequence ();
+	  rtx_insn *seq = end_sequence ();
 	  emit_insn_before (seq, insn);
 	}
 
@@ -1508,33 +1551,34 @@ general_scalar_chain::convert_insn (rtx_insn *insn)
    with numerous special cases.  */
 
 static int
-timode_immed_const_gain (rtx cst)
+timode_immed_const_gain (rtx cst, basic_block bb)
 {
   /* movabsq vs. movabsq+vmovq+vunpacklqdq.  */
   if (CONST_WIDE_INT_P (cst)
       && CONST_WIDE_INT_NUNITS (cst) == 2
       && CONST_WIDE_INT_ELT (cst, 0) == CONST_WIDE_INT_ELT (cst, 1))
-    return optimize_insn_for_size_p () ? -COSTS_N_BYTES (9)
+    return optimize_bb_for_size_p (bb) ? -COSTS_N_BYTES (9)
 				       : -COSTS_N_INSNS (2);
   /* 2x movabsq ~ vmovdqa.  */
   return 0;
 }
 
-/* Compute a gain for chain conversion.  */
+/* Return true it's cost profitable for for chain conversion.  */
 
-int
+bool
 timode_scalar_chain::compute_convert_gain ()
 {
   /* Assume that if we have to move TImode values between units,
      then transforming this chain isn't worth it.  */
-  if (n_sse_to_integer || n_integer_to_sse)
-    return -1;
+  if (cost_sse_integer)
+    return false;
 
   bitmap_iterator bi;
   unsigned insn_uid;
 
   /* Split ties to prefer V1TImode when not optimizing for size.  */
   int gain = optimize_size ? 0 : 1;
+  sreal weighted_gain  = 0;
 
   if (dump_file)
     fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
@@ -1546,34 +1590,36 @@ timode_scalar_chain::compute_convert_gain ()
       rtx src = SET_SRC (def_set);
       rtx dst = SET_DEST (def_set);
       HOST_WIDE_INT op1val;
+      basic_block bb = BLOCK_FOR_INSN (insn);
       int scost, vcost;
       int igain = 0;
+      profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
+      bool speed_p = optimize_bb_for_speed_p (bb);
+      sreal bb_freq = bb->count.to_sreal_scale (entry_count);
 
       switch (GET_CODE (src))
 	{
 	case REG:
-	  if (optimize_insn_for_size_p ())
+	  if (!speed_p)
 	    igain = MEM_P (dst) ? COSTS_N_BYTES (6) : COSTS_N_BYTES (3);
 	  else
 	    igain = COSTS_N_INSNS (1);
 	  break;
 
 	case MEM:
-	  igain = optimize_insn_for_size_p () ? COSTS_N_BYTES (7)
-					      : COSTS_N_INSNS (1);
+	  igain = !speed_p ? COSTS_N_BYTES (7) : COSTS_N_INSNS (1);
 	  break;
 
 	case CONST_INT:
 	  if (MEM_P (dst)
 	      && standard_sse_constant_p (src, V1TImode))
-	    igain = optimize_insn_for_size_p () ? COSTS_N_BYTES (11) : 1;
+	    igain = !speed_p ? COSTS_N_BYTES (11) : 1;
 	  break;
 
 	case CONST_WIDE_INT:
 	  /* 2 x mov vs. vmovdqa.  */
 	  if (MEM_P (dst))
-	    igain = optimize_insn_for_size_p () ? COSTS_N_BYTES (3)
-						: COSTS_N_INSNS (1);
+	    igain = !speed_p ? COSTS_N_BYTES (3) : COSTS_N_INSNS (1);
 	  break;
 
 	case NOT:
@@ -1587,14 +1633,14 @@ timode_scalar_chain::compute_convert_gain ()
 	  if (!MEM_P (dst))
 	    igain = COSTS_N_INSNS (1);
 	  if (CONST_SCALAR_INT_P (XEXP (src, 1)))
-	    igain += timode_immed_const_gain (XEXP (src, 1));
+	    igain += timode_immed_const_gain (XEXP (src, 1), bb);
 	  break;
 
 	case ASHIFT:
 	case LSHIFTRT:
 	  /* See ix86_expand_v1ti_shift.  */
 	  op1val = INTVAL (XEXP (src, 1));
-	  if (optimize_insn_for_size_p ())
+	  if (!speed_p)
 	    {
 	      if (op1val == 64 || op1val == 65)
 		scost = COSTS_N_BYTES (5);
@@ -1628,7 +1674,7 @@ timode_scalar_chain::compute_convert_gain ()
 	case ASHIFTRT:
 	  /* See ix86_expand_v1ti_ashiftrt.  */
 	  op1val = INTVAL (XEXP (src, 1));
-	  if (optimize_insn_for_size_p ())
+	  if (!speed_p)
 	    {
 	      if (op1val == 64 || op1val == 127)
 		scost = COSTS_N_BYTES (7);
@@ -1706,7 +1752,7 @@ timode_scalar_chain::compute_convert_gain ()
 	case ROTATERT:
 	  /* See ix86_expand_v1ti_rotate.  */
 	  op1val = INTVAL (XEXP (src, 1));
-	  if (optimize_insn_for_size_p ())
+	  if (!speed_p)
 	    {
 	      scost = COSTS_N_BYTES (13);
 	      if ((op1val & 31) == 0)
@@ -1738,34 +1784,40 @@ timode_scalar_chain::compute_convert_gain ()
 	    {
 	      if (GET_CODE (XEXP (src, 0)) == AND)
 		/* and;and;or (9 bytes) vs. ptest (5 bytes).  */
-		igain = optimize_insn_for_size_p() ? COSTS_N_BYTES (4)
-						   : COSTS_N_INSNS (2);
+		igain = !speed_p ? COSTS_N_BYTES (4) : COSTS_N_INSNS (2);
 	      /* or (3 bytes) vs. ptest (5 bytes).  */
-	      else if (optimize_insn_for_size_p ())
+	      else if (!speed_p)
 		igain = -COSTS_N_BYTES (2);
 	    }
 	  else if (XEXP (src, 1) == const1_rtx)
 	    /* and;cmp -1 (7 bytes) vs. pcmpeqd;pxor;ptest (13 bytes).  */
-	    igain = optimize_insn_for_size_p() ? -COSTS_N_BYTES (6)
-					       : -COSTS_N_INSNS (1);
+	    igain = !speed_p ? -COSTS_N_BYTES (6) : -COSTS_N_INSNS (1);
 	  break;
 
 	default:
 	  break;
 	}
 
+      gain += igain;
+      if (speed_p)
+	weighted_gain += bb_freq * igain;
+
       if (igain != 0 && dump_file)
 	{
-	  fprintf (dump_file, "  Instruction gain %d for ", igain);
+	  fprintf (dump_file, "  Instruction gain %d with bb_freq %.2f for ",
+		   igain, bb_freq.to_double ());
 	  dump_insn_slim (dump_file, insn);
 	}
-      gain += igain;
     }
 
   if (dump_file)
-    fprintf (dump_file, "  Total gain: %d\n", gain);
+    fprintf (dump_file, "  Total gain: %d, weighted gain %.2f\n",
+	     gain, weighted_gain.to_double ());
 
-  return gain;
+  if (weighted_gain > (sreal) 0)
+    return true;
+  else
+    return gain > 0;
 }
 
 /* Fix uses of converted REG in debug insns.  */
@@ -1874,8 +1926,7 @@ timode_scalar_chain::convert_insn (rtx_insn *insn)
 	      src = validize_mem (force_const_mem (V1TImode, src));
 	      use_move = MEM_P (dst);
 	    }
-	  rtx_insn *seq = get_insns ();
-	  end_sequence ();
+	  rtx_insn *seq = end_sequence ();
 	  if (seq)
 	    emit_insn_before (seq, insn);
 	  if (use_move)
@@ -2090,7 +2141,7 @@ convertible_comparison_p (rtx_insn *insn, enum machine_mode mode)
 
   gcc_assert (GET_CODE (src) == COMPARE);
 
-  if (GET_CODE (dst) != REG
+  if (!REG_P (dst)
       || REGNO (dst) != FLAGS_REG
       || GET_MODE (dst) != CCZmode)
     return false;
@@ -2561,7 +2612,7 @@ convert_scalars_to_vector (bool timode_p)
 	     conversions.  */
 	  if (chain->build (&candidates[i], uid, disallowed))
 	    {
-	      if (chain->compute_convert_gain () > 0)
+	      if (chain->compute_convert_gain ())
 		converted_insns += chain->convert ();
 	      else if (dump_file)
 		fprintf (dump_file, "Chain #%d conversion is not profitable\n",
@@ -2902,7 +2953,7 @@ rest_of_insert_endbr_and_patchable_area (bool need_endbr,
 
 		  /* Also generate ENDBRANCH for non-tail call which
 		     may return via indirect branch.  */
-		  if (GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
+		  if (SYMBOL_REF_P (XEXP (fnaddr, 0)))
 		    fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0));
 		  if (fndecl == NULL_TREE)
 		    fndecl = MEM_EXPR (fnaddr);
@@ -3034,6 +3085,135 @@ ix86_rpad_gate ()
 	  && optimize_function_for_speed_p (cfun));
 }
 
+enum x86_cse_kind
+{
+  X86_CSE_CONST0_VECTOR,
+  X86_CSE_CONSTM1_VECTOR,
+  X86_CSE_VEC_DUP,
+  X86_CSE_TLS_GD,
+  X86_CSE_TLS_LD_BASE,
+  X86_CSE_TLSDESC
+};
+
+struct redundant_pattern
+{
+  /* Bitmap of basic blocks with broadcast instructions.  */
+  auto_bitmap bbs;
+  /* Bitmap of broadcast instructions.  */
+  auto_bitmap insns;
+  /* The broadcast inner scalar.  */
+  rtx val;
+  /* The actual redundant source value for UNSPEC_TLSDESC.  */
+  rtx tlsdesc_val;
+  /* The inner scalar mode.  */
+  machine_mode mode;
+  /* The instruction which sets the inner scalar.  Nullptr if the inner
+     scalar is applied to the whole function, instead of within the same
+     block.  */
+  rtx_insn *def_insn;
+  /* The widest broadcast source.  */
+  rtx broadcast_source;
+  /* The widest broadcast register.  */
+  rtx broadcast_reg;
+  /* The basic block of the broadcast instruction.  */
+  basic_block bb;
+  /* The number of broadcast instructions with the same inner scalar.  */
+  unsigned HOST_WIDE_INT count;
+  /* The threshold of broadcast instructions with the same inner
+     scalar.  */
+  unsigned int threshold;
+  /* The widest broadcast size in bytes.  */
+  unsigned int size;
+  /* Load kind.  */
+  x86_cse_kind kind;
+};
+
+/* Generate a vector set, DEST = SRC, at entry of the nearest dominator
+   for basic block map BBS, which is in the fake loop that contains the
+   whole function, so that there is only a single vector set in the
+   whole function.  If not nullptr, LOAD is a pointer to the load.  */
+
+static void
+ix86_place_single_vector_set (rtx dest, rtx src, bitmap bbs,
+			      redundant_pattern *load = nullptr)
+{
+  basic_block bb = nearest_common_dominator_for_set (CDI_DOMINATORS, bbs);
+  /* For X86_CSE_VEC_DUP, don't place the vector set outside of the loop
+     to avoid extra spills.  */
+  if (!load || load->kind != X86_CSE_VEC_DUP)
+    {
+      while (bb->loop_father->latch
+	     != EXIT_BLOCK_PTR_FOR_FN (cfun))
+	bb = get_immediate_dominator (CDI_DOMINATORS,
+				      bb->loop_father->header);
+    }
+
+  rtx set = gen_rtx_SET (dest, src);
+
+  rtx_insn *insn = BB_HEAD (bb);
+  while (insn && !NONDEBUG_INSN_P (insn))
+    {
+      if (insn == BB_END (bb))
+	{
+	  insn = NULL;
+	  break;
+	}
+      insn = NEXT_INSN (insn);
+    }
+
+  rtx_insn *set_insn;
+  if (insn == BB_HEAD (bb))
+    {
+      set_insn = emit_insn_before (set, insn);
+      if (dump_file)
+	{
+	  fprintf (dump_file, "\nPlace:\n\n");
+	  print_rtl_single (dump_file, set_insn);
+	  fprintf (dump_file, "\nbefore:\n\n");
+	  print_rtl_single (dump_file, insn);
+	  fprintf (dump_file, "\n");
+	}
+    }
+  else
+    {
+      rtx_insn *after = insn ? PREV_INSN (insn) : BB_END (bb);
+      set_insn = emit_insn_after (set, after);
+      if (dump_file)
+	{
+	  fprintf (dump_file, "\nPlace:\n\n");
+	  print_rtl_single (dump_file, set_insn);
+	  fprintf (dump_file, "\nafter:\n\n");
+	  print_rtl_single (dump_file, after);
+	  fprintf (dump_file, "\n");
+	}
+    }
+
+  if (load && load->kind == X86_CSE_VEC_DUP)
+    {
+      /* Get the source from LOAD as (reg:SI 99) in
+
+	 (vec_duplicate:V4SI (reg:SI 99))
+
+       */
+      rtx inner_scalar = load->val;
+      /* Set the source in (vec_duplicate:V4SI (reg:SI 99)).  */
+      rtx reg = XEXP (src, 0);
+      if ((REG_P (inner_scalar) || MEM_P (inner_scalar))
+	  && GET_MODE (reg) != GET_MODE (inner_scalar))
+	inner_scalar = gen_rtx_SUBREG (GET_MODE (reg), inner_scalar, 0);
+      rtx set = gen_rtx_SET (reg, inner_scalar);
+      insn = emit_insn_before (set, set_insn);
+      if (dump_file)
+	{
+	  fprintf (dump_file, "\nAdd:\n\n");
+	  print_rtl_single (dump_file, insn);
+	  fprintf (dump_file, "\nbefore:\n\n");
+	  print_rtl_single (dump_file, set_insn);
+	  fprintf (dump_file, "\n");
+	}
+    }
+}
+
 /* At entry of the nearest common dominator for basic blocks with
    conversions/rcp/sqrt/rsqrt/round, generate a single
 	vxorps %xmmN, %xmmN, %xmmN
@@ -3099,7 +3279,7 @@ remove_partial_avx_dependency (void)
 	      break;
 	    }
 
-	  /* Only hanlde conversion here.  */
+	  /* Only handle conversion here.  */
 	  machine_mode src_mode
 	    = convert_p ? GET_MODE (XEXP (src, 0)) : VOIDmode;
 	  switch (src_mode)
@@ -3155,7 +3335,6 @@ remove_partial_avx_dependency (void)
 	  /* Generate an XMM vector SET.  */
 	  set = gen_rtx_SET (vec, src);
 	  set_insn = emit_insn_before (set, insn);
-	  df_insn_rescan (set_insn);
 
 	  if (cfun->can_throw_non_call_exceptions)
 	    {
@@ -3188,35 +3367,10 @@ remove_partial_avx_dependency (void)
       calculate_dominance_info (CDI_DOMINATORS);
       loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
 
-      /* Generate a vxorps at entry of the nearest dominator for basic
-	 blocks with conversions, which is in the fake loop that
-	 contains the whole function, so that there is only a single
-	 vxorps in the whole function.   */
-      bb = nearest_common_dominator_for_set (CDI_DOMINATORS,
-					     convert_bbs);
-      while (bb->loop_father->latch
-	     != EXIT_BLOCK_PTR_FOR_FN (cfun))
-	bb = get_immediate_dominator (CDI_DOMINATORS,
-				      bb->loop_father->header);
-
-      set = gen_rtx_SET (v4sf_const0, CONST0_RTX (V4SFmode));
+      ix86_place_single_vector_set (v4sf_const0,
+				    CONST0_RTX (V4SFmode),
+				    convert_bbs);
 
-      insn = BB_HEAD (bb);
-      while (insn && !NONDEBUG_INSN_P (insn))
-	{
-	  if (insn == BB_END (bb))
-	    {
-	      insn = NULL;
-	      break;
-	    }
-	  insn = NEXT_INSN (insn);
-	}
-      if (insn == BB_HEAD (bb))
-	set_insn = emit_insn_before (set, insn);
-      else
-	set_insn = emit_insn_after (set,
-				    insn ? PREV_INSN (insn) : BB_END (bb));
-      df_insn_rescan (set_insn);
       loop_optimizer_finalize ();
 
       if (!control_flow_insns.is_empty ())
@@ -3288,6 +3442,1255 @@ make_pass_remove_partial_avx_dependency (gcc::context *ctxt)
   return new pass_remove_partial_avx_dependency (ctxt);
 }
 
+/* Return a machine mode suitable for vector SIZE with SMODE inner
+   mode.  */
+
+static machine_mode
+ix86_get_vector_cse_mode (unsigned int size, machine_mode smode)
+{
+  /* Use the inner scalar mode of vector broadcast source in:
+
+     (set (reg:V8DF 394)
+	  (vec_duplicate:V8DF (reg:V2DF 190 [ alpha ])))
+
+     to compute the vector mode for broadcast from vector source.
+   */
+  if (VECTOR_MODE_P (smode))
+    smode = GET_MODE_INNER (smode);
+  scalar_mode s_mode = as_a <scalar_mode> (smode);
+  poly_uint64 nunits = size / GET_MODE_SIZE (smode);
+  machine_mode mode = mode_for_vector (s_mode, nunits).require ();
+  return mode;
+}
+
+/* Replace the source operand of instructions in VECTOR_INSNS with
+   VECTOR_CONST in VECTOR_MODE.  */
+
+static void
+replace_vector_const (machine_mode vector_mode, rtx vector_const,
+		      auto_bitmap &vector_insns,
+		      machine_mode scalar_mode)
+{
+  bitmap_iterator bi;
+  unsigned int id;
+
+  EXECUTE_IF_SET_IN_BITMAP (vector_insns, 0, id, bi)
+    {
+      rtx_insn *insn = DF_INSN_UID_GET (id)->insn;
+
+      /* Get the single SET instruction.  */
+      rtx set = single_set (insn);
+      rtx src = SET_SRC (set);
+      rtx dest = SET_DEST (set);
+      machine_mode mode = GET_MODE (dest);
+
+      rtx replace;
+      /* Replace the source operand with VECTOR_CONST.  */
+      if (SUBREG_P (src) || mode == vector_mode)
+	replace = vector_const;
+      else
+	{
+	  unsigned int size = GET_MODE_SIZE (mode);
+	  if (size < ix86_regmode_natural_size (mode))
+	    {
+	      /* If the mode size is smaller than its natural size,
+		 first insert an extra move with a QI vector SUBREG
+		 of the same size to avoid validate_subreg failure.  */
+	      machine_mode vmode
+		= ix86_get_vector_cse_mode (size, scalar_mode);
+	      rtx vreg;
+	      if (mode == vmode)
+		vreg = vector_const;
+	      else
+		{
+		  vreg = gen_reg_rtx (vmode);
+		  rtx vsubreg = gen_rtx_SUBREG (vmode, vector_const, 0);
+		  rtx pat = gen_rtx_SET (vreg, vsubreg);
+		  rtx_insn *vinsn = emit_insn_before (pat, insn);
+		  if (dump_file)
+		    {
+		      fprintf (dump_file, "\nInsert an extra move:\n\n");
+		      print_rtl_single (dump_file, vinsn);
+		      fprintf (dump_file, "\nbefore:\n\n");
+		      print_rtl_single (dump_file, insn);
+		      fprintf (dump_file, "\n");
+		    }
+		}
+	      replace = gen_rtx_SUBREG (mode, vreg, 0);
+	    }
+	  else
+	    replace = gen_rtx_SUBREG (mode, vector_const, 0);
+	}
+
+      if (dump_file)
+	{
+	  fprintf (dump_file, "\nReplace:\n\n");
+	  print_rtl_single (dump_file, insn);
+	}
+      SET_SRC (set) = replace;
+      /* Drop possible dead definitions.  */
+      PATTERN (insn) = set;
+      INSN_CODE (insn) = -1;
+      recog_memoized (insn);
+      if (dump_file)
+	{
+	  fprintf (dump_file, "\nwith:\n\n");
+	  print_rtl_single (dump_file, insn);
+	  fprintf (dump_file, "\n");
+	}
+      df_insn_rescan (insn);
+    }
+}
+
+/* Return the inner scalar if OP is a broadcast, else return nullptr.  */
+
+static rtx
+ix86_broadcast_inner (rtx op, machine_mode mode,
+		      machine_mode *scalar_mode_p,
+		      x86_cse_kind *kind_p, rtx_insn **insn_p)
+{
+  switch (standard_sse_constant_p (op, mode))
+    {
+    case 1:
+      *scalar_mode_p = QImode;
+      *kind_p = X86_CSE_CONST0_VECTOR;
+      *insn_p = nullptr;
+      return const0_rtx;
+    case 2:
+      *scalar_mode_p = QImode;
+      *kind_p = X86_CSE_CONSTM1_VECTOR;
+      *insn_p = nullptr;
+      return constm1_rtx;
+    default:
+      break;
+    }
+
+  mode = GET_MODE (op);
+  int nunits = GET_MODE_NUNITS (mode);
+  if (nunits < 2)
+    return nullptr;
+
+  *kind_p = X86_CSE_VEC_DUP;
+
+  rtx reg;
+  if (GET_CODE (op) == VEC_DUPLICATE)
+    {
+      /* Only
+	  (vec_duplicate:V4SI (reg:SI 99))
+	  (vec_duplicate:V2DF (mem/u/c:DF (symbol_ref/u:DI ("*.LC1") [flags 0x2]) [0  S8 A64]))
+	 are supported.  Set OP to the broadcast source by default.  */
+      op = XEXP (op, 0);
+      reg = op;
+      if (SUBREG_P (op)
+	  && SUBREG_BYTE (op) == 0
+	  && !paradoxical_subreg_p (op))
+	reg = SUBREG_REG (op);
+      if (!REG_P (reg))
+	{
+	  if (MEM_P (op)
+	      && SYMBOL_REF_P (XEXP (op, 0))
+	      && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
+	    {
+	      /* Handle constant broadcast from memory.  */
+	      *scalar_mode_p = GET_MODE_INNER (mode);
+	      *insn_p = nullptr;
+	      return op;
+	    }
+	  return nullptr;
+	}
+    }
+  else if (CONST_VECTOR_P (op))
+    {
+      rtx first = XVECEXP (op, 0, 0);
+      for (int i = 1; i < nunits; ++i)
+	{
+	  rtx tmp = XVECEXP (op, 0, i);
+	  /* Vector duplicate value.  */
+	  if (!rtx_equal_p (tmp, first))
+	    return nullptr;
+	}
+      *scalar_mode_p = GET_MODE (first);
+      *insn_p = nullptr;
+      return first;
+    }
+  else
+    return nullptr;
+
+  mode = GET_MODE (op);
+
+  /* Only single def chain is supported.  */
+  df_ref ref = DF_REG_DEF_CHAIN (REGNO (reg));
+  if (!ref
+      || DF_REF_IS_ARTIFICIAL (ref)
+      || DF_REF_NEXT_REG (ref) != nullptr)
+    return nullptr;
+
+  rtx_insn *insn = DF_REF_INSN (ref);
+  rtx set = single_set (insn);
+  if (!set)
+    return nullptr;
+
+  rtx src = SET_SRC (set);
+
+  if (CONST_INT_P (src))
+    {
+      /* Handle sequences like
+
+	 (set (reg:SI 99)
+	       (const_int 34 [0x22]))
+	 (set (reg:V4SI 98)
+	       (vec_duplicate:V4SI (reg:SI 99)))
+
+	 Set *INSN_P to nullptr and return SET_SRC if SET_SRC is an
+	 integer constant.  */
+      op = src;
+      if (mode != GET_MODE (reg))
+	op = gen_int_mode (INTVAL (src), mode);
+      *insn_p = nullptr;
+    }
+  else
+    {
+      /* Handle sequences like
+
+	 (set (reg:QI 105 [ c ])
+	      (reg:QI 5 di [ c ]))
+	 (set (reg:V64QI 102 [ _1 ])
+	      (vec_duplicate:V64QI (reg:QI 105 [ c ])))
+
+	 (set (reg/v:SI 116 [ argc ])
+	      (mem/c:SI (reg:SI 135) [2 argc+0 S4 A32]))
+	 (set (reg:V4SI 119 [ _45 ])
+	      (vec_duplicate:V4SI (reg/v:SI 116 [ argc ])))
+
+	 (set (reg:SI 98 [ _1 ])
+	      (sign_extend:SI (reg:QI 106 [ c ])))
+	 (set (reg:V16SI 103 [ _2 ])
+	       (vec_duplicate:V16SI (reg:SI 98 [ _1 ])))
+
+	 (set (reg:SI 102 [ cost ])
+	      (mem/c:SI (symbol_ref:DI ("cost") [flags 0x40])))
+	 (set (reg:V4HI 103 [ _16 ])
+	      (vec_duplicate:V4HI (subreg:HI (reg:SI 102 [ cost ]) 0)))
+
+	 (set (subreg:SI (reg/v:HI 107 [ cr_val ]) 0)
+	      (ashift:SI (reg:SI 158)
+			 (subreg:QI (reg:SI 156 [ _2 ]) 0)))
+	 (set (reg:V16HI 183 [ _61 ])
+	      (vec_duplicate:V16HI (reg/v:HI 107 [ cr_val ])))
+
+	 Set *INSN_P to INSN and return the broadcast source otherwise.  */
+      *insn_p = insn;
+    }
+
+  *scalar_mode_p = mode;
+  return op;
+}
+
+/* Replace CALL instruction in TLS_CALL_INSNS with SET from SRC and
+   put the updated instruction in UPDATED_TLS_INSNS.  */
+
+static void
+replace_tls_call (rtx src, auto_bitmap &tls_call_insns,
+		  auto_bitmap &updated_tls_insns)
+{
+  bitmap_iterator bi;
+  unsigned int id;
+
+  EXECUTE_IF_SET_IN_BITMAP (tls_call_insns, 0, id, bi)
+    {
+      rtx_insn *insn = DF_INSN_UID_GET (id)->insn;
+
+      /* If this isn't a CALL, only GNU2 TLS implicit CALL patterns are
+	 allowed.  */
+      if (!CALL_P (insn))
+	{
+	  attr_tls64 tls64 = get_attr_tls64 (insn);
+	  if (tls64 != TLS64_CALL && tls64 != TLS64_COMBINE)
+	    gcc_unreachable ();
+	}
+
+      rtx pat = PATTERN (insn);
+      gcc_assert (GET_CODE (pat) == PARALLEL);
+      rtx set = XVECEXP (pat, 0, 0);
+      gcc_assert (GET_CODE (set) == SET);
+      rtx dest = SET_DEST (set);
+
+      set = gen_rtx_SET (dest, src);
+      rtx_insn *set_insn = emit_insn_after (set, insn);
+      if (recog_memoized (set_insn) < 0)
+	gcc_unreachable ();
+
+      /* Put SET_INSN in UPDATED_TLS_INSNS.  */
+      bitmap_set_bit (updated_tls_insns, INSN_UID (set_insn));
+
+      if (dump_file)
+	{
+	  fprintf (dump_file, "\nReplace:\n\n");
+	  print_rtl_single (dump_file, insn);
+	  fprintf (dump_file, "\nwith:\n\n");
+	  print_rtl_single (dump_file, set_insn);
+	  fprintf (dump_file, "\n");
+	}
+
+      /* Delete the CALL insn.  */
+      delete_insn (insn);
+
+      df_insn_rescan (set_insn);
+    }
+}
+
+/* Return the basic block which dominates all basic blocks which set
+   hard register REGNO used in basic block BB.  */
+
+static basic_block
+ix86_get_dominator_for_reg (unsigned int regno, basic_block bb)
+{
+  basic_block set_bb;
+  auto_bitmap set_bbs;
+
+  /* Get all BBs which set REGNO and dominate the current BB from all
+     DEFs of REGNO.  */
+  for (df_ref def = DF_REG_DEF_CHAIN (regno);
+       def;
+       def = DF_REF_NEXT_REG (def))
+    if (!DF_REF_IS_ARTIFICIAL (def)
+	&& !DF_REF_FLAGS_IS_SET (def, DF_REF_MAY_CLOBBER)
+	&& !DF_REF_FLAGS_IS_SET (def, DF_REF_MUST_CLOBBER))
+      {
+	set_bb = DF_REF_BB (def);
+	if (dominated_by_p (CDI_DOMINATORS, bb, set_bb))
+	  bitmap_set_bit (set_bbs, set_bb->index);
+      }
+
+  bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs);
+  return bb;
+}
+
+/* Mark FLAGS register as live in DATA, a bitmap of live caller-saved
+   registers, if DEST is FLAGS register.  */
+
+static void
+ix86_check_flags_reg (rtx dest, const_rtx, void *data)
+{
+  auto_bitmap *live_caller_saved_regs = (auto_bitmap *) data;
+  if (REG_P (dest) && REGNO (dest) == FLAGS_REG)
+    bitmap_set_bit (*live_caller_saved_regs, FLAGS_REG);
+}
+
+/* Emit a TLS_SET instruction of KIND in basic block BB.   Store the
+   insertion point in *BEFORE_P for emit_insn_before or in *AFTER_P
+   for emit_insn_after.  UPDATED_GNU_TLS_INSNS contains instructions
+   which replace the GNU TLS instructions.  UPDATED_GNU2_TLS_INSNS
+   contains instructions which replace the GNU2 TLS instructions.  */
+
+static rtx_insn *
+ix86_emit_tls_call (rtx tls_set, x86_cse_kind kind, basic_block bb,
+		    rtx_insn **before_p, rtx_insn **after_p,
+		    auto_bitmap &updated_gnu_tls_insns,
+		    auto_bitmap &updated_gnu2_tls_insns)
+{
+  rtx_insn *tls_insn;
+
+  do
+    {
+      rtx_insn *insn = BB_HEAD (bb);
+      while (insn && !NONDEBUG_INSN_P (insn))
+	{
+	  if (insn == BB_END (bb))
+	    {
+	      /* This must be the beginning basic block:
+
+		 (note 4 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
+		 (note 2 4 26 2 NOTE_INSN_FUNCTION_BEG)
+
+		 or a basic block with only a label:
+
+		 (code_label 78 11 77 3 14 (nil) [1 uses])
+		 (note 77 78 54 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
+
+		 or a basic block with only a debug marker:
+
+		 (note 3 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
+		 (note 2 3 5 2 NOTE_INSN_FUNCTION_BEG)
+		 (debug_insn 5 2 16 2 (debug_marker) "x.c":6:3 -1 (nil))
+
+	       */
+	      gcc_assert (DEBUG_INSN_P (insn)
+			  || (NOTE_P (insn)
+			      && ((NOTE_KIND (insn)
+				   == NOTE_INSN_FUNCTION_BEG)
+				  || (NOTE_KIND (insn)
+				      == NOTE_INSN_BASIC_BLOCK))));
+	      insn = NULL;
+	      break;
+	    }
+	  insn = NEXT_INSN (insn);
+	}
+
+      /* TLS_GD and TLS_LD_BASE instructions are normal functions which
+	 clobber caller-saved registers.  TLSDESC instructions only
+	 clobber FLAGS.  If any registers clobbered by TLS instructions
+	 are live in this basic block, we must insert TLS instructions
+	 after all live registers clobbered are dead.  */
+
+      auto_bitmap live_caller_saved_regs;
+      bitmap in = df_live ? DF_LIVE_IN (bb) : DF_LR_IN (bb);
+
+      if (bitmap_bit_p (in, FLAGS_REG))
+	bitmap_set_bit (live_caller_saved_regs, FLAGS_REG);
+
+      unsigned int i;
+
+      /* Get all live caller-saved registers for TLS_GD and TLS_LD_BASE
+	 instructions.  */
+      if (kind != X86_CSE_TLSDESC)
+	for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+	  if (call_used_regs[i]
+	      && !fixed_regs[i]
+	      && bitmap_bit_p (in, i))
+	    bitmap_set_bit (live_caller_saved_regs, i);
+
+      if (bitmap_empty_p (live_caller_saved_regs))
+	{
+	  if (insn == BB_HEAD (bb))
+	    {
+	      *before_p = insn;
+	      tls_insn = emit_insn_before (tls_set, insn);
+	    }
+	  else
+	    {
+	      /* Emit the TLS call after NOTE_INSN_FUNCTION_BEG in the
+		 beginning basic block:
+
+		 (note 4 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
+		 (note 2 4 26 2 NOTE_INSN_FUNCTION_BEG)
+
+		 or after NOTE_INSN_BASIC_BLOCK in a basic block with
+		 only a label:
+
+		 (code_label 78 11 77 3 14 (nil) [1 uses])
+		 (note 77 78 54 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
+
+		 or after debug marker in a basic block with only a
+		 debug marker:
+
+		 (note 3 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
+		 (note 2 3 5 2 NOTE_INSN_FUNCTION_BEG)
+		 (debug_insn 5 2 16 2 (debug_marker) "x.c":6:3 -1 (nil))
+
+	       */
+	      insn = insn ? PREV_INSN (insn) : BB_END (bb);
+	      *after_p = insn;
+	      tls_insn = emit_insn_after (tls_set, insn);
+	    }
+	  return tls_insn;
+	}
+
+      bool repeat = false;
+
+      /* Search for REG_DEAD notes in this basic block.  */
+      FOR_BB_INSNS (bb, insn)
+	{
+	  if (!NONDEBUG_INSN_P (insn))
+	    continue;
+
+	  /* NB: Conditional jump is the only instruction which reads
+	     flags register and changes control flow.  We can never
+	     place the TLS call after unconditional jump.  */
+	  if (JUMP_P (insn))
+	    {
+	      /* This must be a conditional jump.  */
+	      rtx label = JUMP_LABEL (insn);
+	      if (label == nullptr
+		  || ANY_RETURN_P (label)
+		  || !(LABEL_P (label) || SYMBOL_REF_P (label)))
+		gcc_unreachable ();
+
+	      /* Place the call before all FLAGS_REG setting BBs since
+		 we can't place a call before nor after a conditional
+		 jump.  */
+	      bb = ix86_get_dominator_for_reg (FLAGS_REG, bb);
+
+	      /* Start over again.  */
+	      repeat = true;
+	      break;
+	    }
+
+	  if (bitmap_bit_p (updated_gnu_tls_insns, INSN_UID (insn)))
+	    {
+	      /* Insert the __tls_get_addr call before INSN which
+		 replaces a __tls_get_addr call.  */
+	      *before_p = insn;
+	      tls_insn = emit_insn_before (tls_set, insn);
+	      return tls_insn;
+	    }
+
+	  if (bitmap_bit_p (updated_gnu2_tls_insns, INSN_UID (insn)))
+	    {
+	      /* Mark FLAGS register as dead since FLAGS register
+		 would be clobbered by the GNU2 TLS instruction.  */
+	      bitmap_clear_bit (live_caller_saved_regs, FLAGS_REG);
+	      continue;
+	    }
+
+	  /* Check if FLAGS register is live.  */
+	  note_stores (insn, ix86_check_flags_reg,
+		       &live_caller_saved_regs);
+
+	  rtx link;
+	  for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
+	    if (REG_NOTE_KIND (link) == REG_DEAD
+		&& REG_P (XEXP (link, 0)))
+	      {
+		/* Mark the live caller-saved register as dead.  */
+		for (i = REGNO (XEXP (link, 0));
+		     i < END_REGNO (XEXP (link, 0));
+		     i++)
+		  if (i < FIRST_PSEUDO_REGISTER)
+		    bitmap_clear_bit (live_caller_saved_regs, i);
+
+		if (bitmap_empty_p (live_caller_saved_regs))
+		  {
+		    *after_p = insn;
+		    tls_insn = emit_insn_after (tls_set, insn);
+		    return tls_insn;
+		  }
+	      }
+	}
+
+      /* NB: Start over again for conditional jump.  */
+      if (repeat)
+	continue;
+
+      gcc_assert (!bitmap_empty_p (live_caller_saved_regs));
+
+      /* If any live caller-saved registers aren't dead at the end of
+	 this basic block, get the basic block which dominates all
+	 basic blocks which set the remaining live registers.  */
+      auto_bitmap set_bbs;
+      bitmap_iterator bi;
+      unsigned int id;
+      EXECUTE_IF_SET_IN_BITMAP (live_caller_saved_regs, 0, id, bi)
+	{
+	  basic_block set_bb = ix86_get_dominator_for_reg (id, bb);
+	  bitmap_set_bit (set_bbs, set_bb->index);
+	}
+      bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs);
+    }
+  while (true);
+}
+
+/* Generate a TLS call of KIND with VAL and copy the call result to DEST,
+   at entry of the nearest dominator for basic block map BBS, which is in
+   the fake loop that contains the whole function, so that there is only
+   a single TLS CALL of KIND with VAL in the whole function.
+   UPDATED_GNU_TLS_INSNS contains instructions which replace the GNU TLS
+   instructions.  UPDATED_GNU2_TLS_INSNS contains instructions which
+   replace the GNU2 TLS instructions.  If TLSDESC_SET isn't nullptr,
+   insert it before the TLS call.  */
+
+static void
+ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind,
+			    auto_bitmap &bbs,
+			    auto_bitmap &updated_gnu_tls_insns,
+			    auto_bitmap &updated_gnu2_tls_insns,
+			    rtx tlsdesc_set = nullptr)
+{
+  basic_block bb = nearest_common_dominator_for_set (CDI_DOMINATORS, bbs);
+  while (bb->loop_father->latch
+	 != EXIT_BLOCK_PTR_FOR_FN (cfun))
+    bb = get_immediate_dominator (CDI_DOMINATORS,
+				  bb->loop_father->header);
+
+  rtx rax = nullptr, rdi;
+  rtx eqv = nullptr;
+  rtx caddr;
+  rtx set;
+  rtx clob;
+  rtx symbol;
+  rtx tls;
+
+  switch (kind)
+    {
+    case X86_CSE_TLS_GD:
+      rax = gen_rtx_REG (Pmode, AX_REG);
+      rdi = gen_rtx_REG (Pmode, DI_REG);
+      caddr = ix86_tls_get_addr ();
+
+      symbol = XVECEXP (val, 0, 0);
+      tls = gen_tls_global_dynamic_64 (Pmode, rax, symbol, caddr, rdi);
+
+      if (GET_MODE (symbol) != Pmode)
+	symbol = gen_rtx_ZERO_EXTEND (Pmode, symbol);
+      eqv = symbol;
+      break;
+
+    case X86_CSE_TLS_LD_BASE:
+      rax = gen_rtx_REG (Pmode, AX_REG);
+      rdi = gen_rtx_REG (Pmode, DI_REG);
+      caddr = ix86_tls_get_addr ();
+
+      tls = gen_tls_local_dynamic_base_64 (Pmode, rax, caddr, rdi);
+
+      /* Attach a unique REG_EQUAL to DEST, to allow the RTL optimizers
+	 to share the LD_BASE result with other LD model accesses.  */
+      eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
+			    UNSPEC_TLS_LD_BASE);
+
+      break;
+
+    case X86_CSE_TLSDESC:
+      set = gen_rtx_SET (dest, val);
+      clob = gen_rtx_CLOBBER (VOIDmode,
+			      gen_rtx_REG (CCmode, FLAGS_REG));
+      tls = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clob));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Emit the TLS CALL insn.  */
+  rtx_insn *before = nullptr;
+  rtx_insn *after = nullptr;
+  rtx_insn *tls_insn = ix86_emit_tls_call (tls, kind, bb, &before,
+					   &after,
+					   updated_gnu_tls_insns,
+					   updated_gnu2_tls_insns);
+
+  rtx_insn *tlsdesc_insn = nullptr;
+  if (tlsdesc_set)
+    {
+      rtx dest = copy_rtx (SET_DEST (tlsdesc_set));
+      rtx src = copy_rtx (SET_SRC (tlsdesc_set));
+      tlsdesc_set = gen_rtx_SET (dest, src);
+      tlsdesc_insn = emit_insn_before (tlsdesc_set, tls_insn);
+    }
+
+  if (kind != X86_CSE_TLSDESC)
+    {
+      RTL_CONST_CALL_P (tls_insn) = 1;
+
+      /* Indicate that this function can't jump to non-local gotos.  */
+      make_reg_eh_region_note_nothrow_nononlocal (tls_insn);
+    }
+
+  if (recog_memoized (tls_insn) < 0)
+    gcc_unreachable ();
+
+  if (dump_file)
+    {
+      if (after)
+	{
+	  fprintf (dump_file, "\nPlace:\n\n");
+	  if (tlsdesc_insn)
+	    print_rtl_single (dump_file, tlsdesc_insn);
+	  print_rtl_single (dump_file, tls_insn);
+	  fprintf (dump_file, "\nafter:\n\n");
+	  print_rtl_single (dump_file, after);
+	  fprintf (dump_file, "\n");
+	}
+      else
+	{
+	  fprintf (dump_file, "\nPlace:\n\n");
+	  if (tlsdesc_insn)
+	    print_rtl_single (dump_file, tlsdesc_insn);
+	  print_rtl_single (dump_file, tls_insn);
+	  fprintf (dump_file, "\nbefore:\n\n");
+	  print_rtl_single (dump_file, before);
+	  fprintf (dump_file, "\n");
+	}
+    }
+
+  if (kind != X86_CSE_TLSDESC)
+    {
+      /* Copy RAX to DEST.  */
+      set = gen_rtx_SET (dest, rax);
+      rtx_insn *set_insn = emit_insn_after (set, tls_insn);
+      set_dst_reg_note (set_insn, REG_EQUAL, copy_rtx (eqv), dest);
+      if (dump_file)
+	{
+	  fprintf (dump_file, "\nPlace:\n\n");
+	  print_rtl_single (dump_file, set_insn);
+	  fprintf (dump_file, "\nafter:\n\n");
+	  print_rtl_single (dump_file, tls_insn);
+	  fprintf (dump_file, "\n");
+	}
+    }
+}
+
+namespace {
+
+const pass_data pass_data_x86_cse =
+{
+  RTL_PASS, /* type */
+  "x86_cse", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  TV_MACH_DEP, /* tv_id */
+  0, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  0, /* todo_flags_finish */
+};
+
+class pass_x86_cse : public rtl_opt_pass
+{
+public:
+  pass_x86_cse (gcc::context *ctxt)
+    : rtl_opt_pass (pass_data_x86_cse, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  bool gate (function *fun) final override
+    {
+      return (TARGET_SSE2
+	      && optimize
+	      && optimize_function_for_speed_p (fun));
+    }
+
+  unsigned int execute (function *) final override
+    {
+      return x86_cse ();
+    }
+
+private:
+  /* The redundant source value.  */
+  rtx val;
+  /* The actual redundant source value for UNSPEC_TLSDESC.  */
+  rtx tlsdesc_val;
+  /* The instruction which defines the redundant value.  */
+  rtx_insn *def_insn;
+  /* Mode of the destination of the candidate redundant instruction.  */
+  machine_mode mode;
+  /* Mode of the source of the candidate redundant instruction.  */
+  machine_mode scalar_mode;
+  /* The classification of the candidate redundant instruction.  */
+  x86_cse_kind kind;
+
+  unsigned int x86_cse (void);
+  bool candidate_gnu_tls_p (rtx_insn *, attr_tls64);
+  bool candidate_gnu2_tls_p (rtx, attr_tls64);
+  bool candidate_vector_p (rtx);
+  rtx_insn *tls_set_insn_from_symbol (const_rtx, const_rtx);
+}; // class pass_x86_cse
+
+/* Return the instruction which sets REG from TLS_SYMBOL.  */
+
+rtx_insn *
+pass_x86_cse::tls_set_insn_from_symbol (const_rtx reg,
+					const_rtx tls_symbol)
+{
+  rtx_insn *set_insn = nullptr;
+  for (df_ref ref = DF_REG_DEF_CHAIN (REGNO (reg));
+       ref;
+       ref = DF_REF_NEXT_REG (ref))
+    {
+      if (DF_REF_IS_ARTIFICIAL (ref))
+	return nullptr;
+
+      set_insn = DF_REF_INSN (ref);
+      if (get_attr_tls64 (set_insn) != TLS64_LEA)
+	return nullptr;
+
+      rtx tls_set = PATTERN (set_insn);
+      rtx tls_src = XVECEXP (SET_SRC (tls_set), 0, 0);
+      if (!rtx_equal_p (tls_symbol, tls_src))
+	return nullptr;
+    }
+
+  return set_insn;
+}
+
+/* Return true and output def_insn, val, mode, scalar_mode and kind if
+   INSN is UNSPEC_TLS_GD or UNSPEC_TLS_LD_BASE.  */
+
+bool
+pass_x86_cse::candidate_gnu_tls_p (rtx_insn *insn, attr_tls64 tls64)
+{
+  if (!TARGET_64BIT || !cfun->machine->tls_descriptor_call_multiple_p)
+    return false;
+
+  /* Record the redundant TLS CALLs for 64-bit:
+
+     (parallel [
+	(set (reg:DI 0 ax)
+	     (call:DI (mem:QI (symbol_ref:DI ("__tls_get_addr")))
+		      (const_int 0 [0])))
+	(unspec:DI [(symbol_ref:DI ("foo") [flags 0x50])
+		    (reg/f:DI 7 sp)] UNSPEC_TLS_GD)
+	(clobber (reg:DI 5 di))])
+
+
+     and
+
+     (parallel [
+	(set (reg:DI 0 ax)
+	     (call:DI (mem:QI (symbol_ref:DI ("__tls_get_addr")))
+		      (const_int 0 [0])))
+	(unspec:DI [(reg/f:DI 7 sp)] UNSPEC_TLS_LD_BASE)])
+
+   */
+
+  rtx pat = PATTERN (insn);
+  rtx set = XVECEXP (pat, 0, 0);
+  gcc_assert (GET_CODE (set) == SET);
+  rtx dest = SET_DEST (set);
+  scalar_mode = mode = GET_MODE (dest);
+  val = XVECEXP (pat, 0, 1);
+  gcc_assert (GET_CODE (val) == UNSPEC);
+
+  if (tls64 == TLS64_GD)
+    kind = X86_CSE_TLS_GD;
+  else
+    kind = X86_CSE_TLS_LD_BASE;
+
+  def_insn = nullptr;
+  return true;
+}
+
+/* Return true and output def_insn, val, mode, scalar_mode and kind if
+   SET is UNSPEC_TLSDESC.  */
+
+bool
+pass_x86_cse::candidate_gnu2_tls_p (rtx set, attr_tls64 tls64)
+{
+  if (!TARGET_64BIT || !cfun->machine->tls_descriptor_call_multiple_p)
+    return false;
+
+  rtx tls_symbol;
+  rtx_insn *set_insn;
+  rtx src = SET_SRC (set);
+  val = src;
+  tlsdesc_val = src;
+  kind = X86_CSE_TLSDESC;
+
+  if (tls64 == TLS64_COMBINE)
+    {
+      /* Record 64-bit TLS64_COMBINE:
+
+	 (set (reg/f:DI 104)
+	      (plus:DI (unspec:DI [
+			  (symbol_ref:DI ("_TLS_MODULE_BASE_") [flags 0x10])
+			  (reg:DI 114)
+			  (reg/f:DI 7 sp)] UNSPEC_TLSDESC)
+		       (const:DI (unspec:DI [
+				    (symbol_ref:DI ("e") [flags 0x1a])
+				  ] UNSPEC_DTPOFF))))
+
+	 (set (reg/f:DI 104)
+	      (plus:DI (unspec:DI [
+			  (symbol_ref:DI ("_TLS_MODULE_BASE_") [flags 0x10])
+			  (unspec:DI [
+			     (symbol_ref:DI ("_TLS_MODULE_BASE_") [flags 0x10])
+			  ] UNSPEC_TLSDESC)
+			  (reg/f:DI 7 sp)] UNSPEC_TLSDESC)
+		       (const:DI (unspec:DI [
+				    (symbol_ref:DI ("e") [flags 0x1a])
+				 ] UNSPEC_DTPOFF))))
+     */
+
+      scalar_mode = mode = GET_MODE (src);
+
+      /* Since the first operand of PLUS in the source TLS_COMBINE
+	 pattern is unused, use the second operand of PLUS:
+
+	 (const:DI (unspec:DI [
+		      (symbol_ref:DI ("e") [flags 0x1a])
+		   ] UNSPEC_DTPOFF))
+
+	 as VAL to check if 2 TLS_COMBINE patterns have the same
+	 source.  */
+      val = XEXP (src, 1);
+      gcc_assert (GET_CODE (val) == CONST
+		  && GET_CODE (XEXP (val, 0)) == UNSPEC
+		      && XINT (XEXP (val, 0), 1) == UNSPEC_DTPOFF
+		      && SYMBOL_REF_P (XVECEXP (XEXP (val, 0), 0, 0)));
+      def_insn = nullptr;
+      return true;
+    }
+
+  /* Record 64-bit TLS_CALL:
+
+     (set (reg:DI 101)
+	  (unspec:DI [(symbol_ref:DI ("foo") [flags 0x50])
+		      (reg:DI 112)
+		      (reg/f:DI 7 sp)] UNSPEC_TLSDESC))
+
+   */
+
+  gcc_assert (GET_CODE (src) == UNSPEC);
+  tls_symbol = XVECEXP (src, 0, 0);
+  src = XVECEXP (src, 0, 1);
+  scalar_mode = mode = GET_MODE (src);
+  gcc_assert (REG_P (src));
+
+  /* All definitions of reg:DI 129 in
+
+     (set (reg:DI 110)
+	  (unspec:DI [(symbol_ref:DI ("foo"))
+		      (reg:DI 129)
+		      (reg/f:DI 7 sp)] UNSPEC_TLSDESC))
+
+     should have the same source as in
+
+     (set (reg:DI 129)
+	  (unspec:DI [(symbol_ref:DI ("foo"))] UNSPEC_TLSDESC))
+
+   */
+
+  set_insn = tls_set_insn_from_symbol (src, tls_symbol);
+  if (!set_insn)
+    return false;
+
+  /* Use TLS_SYMBOL as VAL to check if 2 patterns have the same source.  */
+  val = tls_symbol;
+  def_insn = set_insn;
+  return true;
+}
+
+/* Return true and output def_insn, val, mode, scalar_mode and kind if
+  INSN is a vector broadcast instruction.  */
+
+bool
+pass_x86_cse::candidate_vector_p (rtx set)
+{
+  rtx src = SET_SRC (set);
+  rtx dest = SET_DEST (set);
+  mode = GET_MODE (dest);
+  /* Skip non-vector instruction.  */
+  if (!VECTOR_MODE_P (mode))
+    return false;
+
+  /* Skip non-vector load instruction.  */
+  if (!REG_P (dest) && !SUBREG_P (dest))
+    return false;
+
+  val = ix86_broadcast_inner (src, mode, &scalar_mode, &kind,
+			      &def_insn);
+  return val ? true : false;
+}
+
+/* At entry of the nearest common dominator for basic blocks with
+
+   1. Vector CONST0_RTX patterns.
+   2. Vector CONSTM1_RTX patterns.
+   3. Vector broadcast patterns.
+   4. UNSPEC_TLS_GD patterns.
+   5. UNSPEC_TLS_LD_BASE patterns.
+   6. UNSPEC_TLSDESC patterns.
+
+   generate a single pattern whose destination is used to replace the
+   source in all identical patterns.
+
+   NB: We want to generate a pattern, which is executed only once, to
+   cover the whole function.  The LCM algorithm isn't appropriate here
+   since it may place a pattern inside the loop.  */
+
+unsigned int
+pass_x86_cse::x86_cse (void)
+{
+  timevar_push (TV_MACH_DEP);
+
+  auto_vec<redundant_pattern *> loads;
+  redundant_pattern *load;
+  basic_block bb;
+  rtx_insn *insn;
+  unsigned int i;
+  auto_bitmap updated_gnu_tls_insns;
+  auto_bitmap updated_gnu2_tls_insns;
+
+  df_set_flags (DF_DEFER_INSN_RESCAN);
+
+  bool recursive_call_p = cfun->machine->recursive_function;
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      FOR_BB_INSNS (bb, insn)
+	{
+	  if (!NONDEBUG_INSN_P (insn))
+	    continue;
+
+	  bool matched = false;
+	  /* Remove redundant pattens if there are more than 2 of
+	     them.  */
+	  unsigned int threshold = 2;
+
+	  rtx set = single_set (insn);
+	  if (!set && !CALL_P (insn))
+	    continue;
+
+	  tlsdesc_val = nullptr;
+
+	  attr_tls64 tls64 = get_attr_tls64 (insn);
+	  switch (tls64)
+	    {
+	    case TLS64_GD:
+	    case TLS64_LD_BASE:
+	      /* Verify UNSPEC_TLS_GD and UNSPEC_TLS_LD_BASE.  */
+	      if (candidate_gnu_tls_p (insn, tls64))
+		break;
+	      continue;
+
+	    case TLS64_CALL:
+	    case TLS64_COMBINE:
+	      /* Verify UNSPEC_TLSDESC.  */
+	      if (candidate_gnu2_tls_p (set, tls64))
+		break;
+	      continue;
+
+	    case TLS64_LEA:
+	      /* Skip TLS64_LEA.  */
+	      continue;
+
+	    case TLS64_NONE:
+	      if (!set)
+		continue;
+
+	      /* Check for vector broadcast.  */
+	      if (candidate_vector_p (set))
+		break;
+	      continue;
+	    }
+
+	  /* Check if there is a matching redundant load.   */
+	  FOR_EACH_VEC_ELT (loads, i, load)
+	    if (load->val
+		&& load->kind == kind
+		&& load->mode == scalar_mode
+		&& (load->bb == bb
+		    || kind != X86_CSE_VEC_DUP
+		    /* Non all 0s/1s vector load must be in the same
+		       basic block if it is in a recursive call.  */
+		    || !recursive_call_p)
+		&& rtx_equal_p (load->val, val))
+	      {
+		/* Record instruction.  */
+		bitmap_set_bit (load->insns, INSN_UID (insn));
+
+		/* Record the maximum vector size.  */
+		if (kind <= X86_CSE_VEC_DUP
+		    && load->size < GET_MODE_SIZE (mode))
+		  load->size = GET_MODE_SIZE (mode);
+
+		/* Record the basic block.  */
+		bitmap_set_bit (load->bbs, bb->index);
+
+		/* Increment the count.  */
+		load->count++;
+
+		matched = true;
+		break;
+	      }
+
+	  if (matched)
+	    continue;
+
+	  /* We see this instruction the first time.  Record the
+	     redundant source value, its mode, the destination size,
+	     instruction which defines the redundant source value,
+	     instruction basic block and the instruction kind.  */
+	  load = new redundant_pattern;
+
+	  load->val = copy_rtx (val);
+	  if (tlsdesc_val)
+	    load->tlsdesc_val = copy_rtx (tlsdesc_val);
+	  else
+	    load->tlsdesc_val = nullptr;
+	  load->mode = scalar_mode;
+	  load->size = GET_MODE_SIZE (mode);
+	  load->def_insn = def_insn;
+	  load->count = 1;
+	  load->threshold = threshold;
+	  load->bb = BLOCK_FOR_INSN (insn);
+	  load->kind = kind;
+
+	  bitmap_set_bit (load->insns, INSN_UID (insn));
+	  bitmap_set_bit (load->bbs, bb->index);
+
+	  loads.safe_push (load);
+	}
+    }
+
+  bool replaced = false;
+  FOR_EACH_VEC_ELT (loads, i, load)
+    if (load->count >= load->threshold)
+      {
+	machine_mode mode;
+	rtx reg, broadcast_source, broadcast_reg;
+	replaced = true;
+	switch (load->kind)
+	  {
+	  case X86_CSE_TLS_GD:
+	  case X86_CSE_TLS_LD_BASE:
+	  case X86_CSE_TLSDESC:
+	    broadcast_reg = gen_reg_rtx (load->mode);
+	    replace_tls_call (broadcast_reg, load->insns,
+			      (load->kind == X86_CSE_TLSDESC
+			       ? updated_gnu2_tls_insns
+			       : updated_gnu_tls_insns));
+	    load->broadcast_reg = broadcast_reg;
+	    break;
+
+	  case X86_CSE_CONST0_VECTOR:
+	  case X86_CSE_CONSTM1_VECTOR:
+	  case X86_CSE_VEC_DUP:
+	    mode = ix86_get_vector_cse_mode (load->size, load->mode);
+	    broadcast_reg = gen_reg_rtx (mode);
+	    if (load->def_insn)
+	      {
+		/* Replace redundant vector loads with a single vector
+		   load in the same basic block.  */
+		reg = load->val;
+		if (load->mode != GET_MODE (reg))
+		  reg = gen_rtx_SUBREG (load->mode, reg, 0);
+		broadcast_source = gen_rtx_VEC_DUPLICATE (mode, reg);
+	      }
+	    else
+	      /* This is a constant integer/double vector.  If the
+		 inner scalar is 0 or -1, set vector to CONST0_RTX
+		 or CONSTM1_RTX directly.  */
+	      switch (load->kind)
+		{
+		case X86_CSE_CONST0_VECTOR:
+		  broadcast_source = CONST0_RTX (mode);
+		  break;
+		case X86_CSE_CONSTM1_VECTOR:
+		  broadcast_source = CONSTM1_RTX (mode);
+		  break;
+		case X86_CSE_VEC_DUP:
+		  reg = gen_reg_rtx (load->mode);
+		  broadcast_source = gen_rtx_VEC_DUPLICATE (mode, reg);
+		  break;
+		default:
+		  gcc_unreachable ();
+		}
+	    replace_vector_const (mode, broadcast_reg, load->insns,
+				  load->mode);
+	    load->broadcast_source = broadcast_source;
+	    load->broadcast_reg = broadcast_reg;
+	    break;
+	  }
+      }
+
+  if (replaced)
+    {
+      auto_vec<rtx_insn *> control_flow_insns;
+
+      /* (Re-)discover loops so that bb->loop_father can be used in the
+	 analysis below.  */
+      calculate_dominance_info (CDI_DOMINATORS);
+      loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
+
+      FOR_EACH_VEC_ELT (loads, i, load)
+	if (load->count >= load->threshold)
+	  {
+	    rtx set;
+	    if (load->def_insn)
+	      switch (load->kind)
+		{
+		case X86_CSE_TLSDESC:
+		  ix86_place_single_tls_call (load->broadcast_reg,
+					      load->tlsdesc_val,
+					      load->kind,
+					      load->bbs,
+					      updated_gnu_tls_insns,
+					      updated_gnu2_tls_insns,
+					      PATTERN (load->def_insn));
+		  break;
+		case X86_CSE_VEC_DUP:
+		  /* Insert a broadcast after the original scalar
+		     definition.  */
+		  set = gen_rtx_SET (load->broadcast_reg,
+				     load->broadcast_source);
+		  insn = emit_insn_after (set, load->def_insn);
+
+		  if (cfun->can_throw_non_call_exceptions)
+		    {
+		      /* Handle REG_EH_REGION note in DEF_INSN.  */
+		      rtx note = find_reg_note (load->def_insn,
+						REG_EH_REGION, nullptr);
+		      if (note)
+			{
+			  control_flow_insns.safe_push (load->def_insn);
+			  add_reg_note (insn, REG_EH_REGION,
+					XEXP (note, 0));
+			}
+		    }
+
+		  if (dump_file)
+		    {
+		      fprintf (dump_file, "\nAdd:\n\n");
+		      print_rtl_single (dump_file, insn);
+		      fprintf (dump_file, "\nafter:\n\n");
+		      print_rtl_single (dump_file, load->def_insn);
+		      fprintf (dump_file, "\n");
+		    }
+		  break;
+		default:
+		  gcc_unreachable ();
+		}
+	    else
+	      switch (load->kind)
+		{
+		case X86_CSE_TLS_GD:
+		case X86_CSE_TLS_LD_BASE:
+		case X86_CSE_TLSDESC:
+		  ix86_place_single_tls_call (load->broadcast_reg,
+					      (load->kind == X86_CSE_TLSDESC
+					       ? load->tlsdesc_val
+					       : load->val),
+					      load->kind,
+					      load->bbs,
+					      updated_gnu_tls_insns,
+					      updated_gnu2_tls_insns);
+		  break;
+		case X86_CSE_CONST0_VECTOR:
+		case X86_CSE_CONSTM1_VECTOR:
+		case X86_CSE_VEC_DUP:
+		  ix86_place_single_vector_set (load->broadcast_reg,
+						load->broadcast_source,
+						load->bbs,
+						load);
+		  break;
+		}
+	  }
+
+      loop_optimizer_finalize ();
+
+      if (!control_flow_insns.is_empty ())
+	{
+	  free_dominance_info (CDI_DOMINATORS);
+
+	  FOR_EACH_VEC_ELT (control_flow_insns, i, insn)
+	    if (control_flow_insn_p (insn))
+	      {
+		/* Split the block after insn.  There will be a fallthru
+		   edge, which is OK so we keep it.  We have to create
+		   the exception edges ourselves.  */
+		bb = BLOCK_FOR_INSN (insn);
+		split_block (bb, insn);
+		rtl_make_eh_edge (NULL, bb, BB_END (bb));
+	      }
+	}
+
+      df_process_deferred_rescans ();
+    }
+
+  df_clear_flags (DF_DEFER_INSN_RESCAN);
+
+  timevar_pop (TV_MACH_DEP);
+  return 0;
+}
+
+} // anon namespace
+
+rtl_opt_pass *
+make_pass_x86_cse (gcc::context *ctxt)
+{
+  return new pass_x86_cse (ctxt);
+}
+
 /* Convert legacy instructions that clobbers EFLAGS to APX_NF
    instructions when there are no flag set between a flag
    producer and user.  */
@@ -3962,7 +5365,6 @@ ix86_get_function_versions_dispatcher (void *decl)
   struct cgraph_node *node = NULL;
   struct cgraph_node *default_node = NULL;
   struct cgraph_function_version_info *node_v = NULL;
-  struct cgraph_function_version_info *first_v = NULL;
 
   tree dispatch_decl = NULL;
 
@@ -3979,37 +5381,16 @@ ix86_get_function_versions_dispatcher (void *decl)
   if (node_v->dispatcher_resolver != NULL)
     return node_v->dispatcher_resolver;
 
-  /* Find the default version and make it the first node.  */
-  first_v = node_v;
-  /* Go to the beginning of the chain.  */
-  while (first_v->prev != NULL)
-    first_v = first_v->prev;
-  default_version_info = first_v;
-  while (default_version_info != NULL)
-    {
-      if (is_function_default_version
-	    (default_version_info->this_node->decl))
-	break;
-      default_version_info = default_version_info->next;
-    }
+  /* The default node is always the beginning of the chain.  */
+  default_version_info = node_v;
+  while (default_version_info->prev != NULL)
+    default_version_info = default_version_info->prev;
+  default_node = default_version_info->this_node;
 
   /* If there is no default node, just return NULL.  */
-  if (default_version_info == NULL)
+  if (!is_function_default_version (default_node->decl))
     return NULL;
 
-  /* Make default info the first node.  */
-  if (first_v != default_version_info)
-    {
-      default_version_info->prev->next = default_version_info->next;
-      if (default_version_info->next)
-	default_version_info->next->prev = default_version_info->prev;
-      first_v->prev = default_version_info;
-      default_version_info->next = first_v;
-      default_version_info->prev = NULL;
-    }
-
-  default_node = default_version_info->this_node;
-
 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
   if (targetm.has_ifunc_p ())
     {
diff --git a/gcc/config/i386/i386-features.h b/gcc/config/i386/i386-features.h
index 24b0c4e..e3719b3 100644
--- a/gcc/config/i386/i386-features.h
+++ b/gcc/config/i386/i386-features.h
@@ -153,12 +153,13 @@ class scalar_chain
 
   bitmap insns_conv;
   hash_map<rtx, rtx> defs_map;
-  unsigned n_sse_to_integer;
-  unsigned n_integer_to_sse;
+  /* Cost of inserted conversion between ineteger and sse.  */
+  int cost_sse_integer;
+  sreal weighted_cost_sse_integer;
   auto_vec<rtx_insn *> control_flow_insns;
 
   bool build (bitmap candidates, unsigned insn_uid, bitmap disallowed);
-  virtual int compute_convert_gain () = 0;
+  virtual bool compute_convert_gain () = 0;
   int convert ();
 
  protected:
@@ -184,11 +185,11 @@ class general_scalar_chain : public scalar_chain
  public:
   general_scalar_chain (enum machine_mode smode_, enum machine_mode vmode_)
     : scalar_chain (smode_, vmode_) {}
-  int compute_convert_gain () final override;
+  bool compute_convert_gain () final override;
 
  private:
   void convert_insn (rtx_insn *insn) final override;
-  int vector_const_cost (rtx exp);
+  int vector_const_cost (rtx exp, basic_block bb);
   rtx convert_rotate (enum rtx_code, rtx op0, rtx op1, rtx_insn *insn);
 };
 
@@ -196,7 +197,7 @@ class timode_scalar_chain : public scalar_chain
 {
  public:
   timode_scalar_chain () : scalar_chain (TImode, V1TImode) {}
-  int compute_convert_gain () final override;
+  bool compute_convert_gain () final override;
 
  private:
   void fix_debug_reg_uses (rtx reg);
diff --git a/gcc/config/i386/i386-isa.def b/gcc/config/i386/i386-isa.def
index 19d78d7..6fa601d 100644
--- a/gcc/config/i386/i386-isa.def
+++ b/gcc/config/i386/i386-isa.def
@@ -118,8 +118,6 @@ DEF_PTA(SHA512)
 DEF_PTA(SM4)
 DEF_PTA(APX_F)
 DEF_PTA(USER_MSR)
-DEF_PTA(EVEX512)
-DEF_PTA(AVX10_1_256)
 DEF_PTA(AVX10_1)
 DEF_PTA(AVX10_2)
 DEF_PTA(AMX_AVX512)
diff --git a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def
index 2fedbeb..c2db305 100644
--- a/gcc/config/i386/i386-modes.def
+++ b/gcc/config/i386/i386-modes.def
@@ -91,7 +91,6 @@ VECTOR_MODES (FLOAT, 16);     /*         V8HF V4SF V2DF */
 VECTOR_MODES (FLOAT, 32);     /*   V16HF V8SF V4DF V2TF */
 VECTOR_MODES (FLOAT, 64);     /*  V32HF V16SF V8DF V4TF */
 VECTOR_MODES (FLOAT, 128);    /* V64HF V32SF V16DF V8TF */
-VECTOR_MODES (FLOAT, 256);    /* V128HF V64SF V32DF V16TF */
 VECTOR_MODE (FLOAT, HF, 2);   /* 	      	   V2HF */
 VECTOR_MODE (FLOAT, BF, 2);   /* 	      	   V2BF */
 VECTOR_MODE (FLOAT, HF, 6);   /*		   V6HF */
@@ -102,7 +101,6 @@ VECTOR_MODE (INT, QI, 2);     /*                   V2QI */
 VECTOR_MODE (INT, QI, 12);    /*                  V12QI */
 VECTOR_MODE (INT, QI, 14);    /*                  V14QI */
 VECTOR_MODE (INT, HI, 6);     /*                   V6HI */
-VECTOR_MODE (INT, SI, 64);    /* 		  V64SI */
 
 INT_MODE (OI, 32);
 INT_MODE (XI, 64);
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index 964449f..abb5dd7 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -259,9 +259,7 @@ static struct ix86_target_opts isa2_opts[] =
   { "-msm3",		OPTION_MASK_ISA2_SM3 },
   { "-msha512",		OPTION_MASK_ISA2_SHA512 },
   { "-msm4",            OPTION_MASK_ISA2_SM4 },
-  { "-mevex512",	OPTION_MASK_ISA2_EVEX512 },
   { "-musermsr",	OPTION_MASK_ISA2_USER_MSR },
-  { "-mavx10.1-256",	OPTION_MASK_ISA2_AVX10_1_256 },
   { "-mavx10.1",	OPTION_MASK_ISA2_AVX10_1 },
   { "-mavx10.2",	OPTION_MASK_ISA2_AVX10_2 },
   { "-mamx-avx512",	OPTION_MASK_ISA2_AMX_AVX512 },
@@ -713,8 +711,6 @@ ix86_function_specific_save (struct cl_target_option *ptr,
   ptr->x_ix86_apx_features = opts->x_ix86_apx_features;
   ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
   ptr->x_ix86_isa_flags2_explicit = opts->x_ix86_isa_flags2_explicit;
-  ptr->x_ix86_no_avx512_explicit = opts->x_ix86_no_avx512_explicit;
-  ptr->x_ix86_no_avx10_1_explicit = opts->x_ix86_no_avx10_1_explicit;
   ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
   ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
   ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
@@ -764,63 +760,63 @@ static unsigned HOST_WIDE_INT initial_ix86_arch_features[X86_ARCH_LAST] = {
 /* This table must be in sync with enum processor_type in i386.h.  */
 static const struct processor_costs *processor_cost_table[] =
 {
-  &generic_cost,
-  &i386_cost,
-  &i486_cost,
-  &pentium_cost,
-  &lakemont_cost,
-  &pentiumpro_cost,
-  &pentium4_cost,
-  &nocona_cost,
-  &core_cost,
-  &core_cost,
-  &core_cost,
-  &core_cost,
-  &atom_cost,
-  &slm_cost,
-  &slm_cost,
-  &slm_cost,
-  &tremont_cost,
-  &alderlake_cost,
-  &alderlake_cost,
-  &alderlake_cost,
-  &skylake_cost,
-  &skylake_cost,
-  &icelake_cost,
-  &icelake_cost,
-  &icelake_cost,
-  &skylake_cost,
-  &icelake_cost,
-  &skylake_cost,
-  &icelake_cost,
-  &alderlake_cost,
-  &icelake_cost,
-  &icelake_cost,
-  &icelake_cost,
-  &alderlake_cost,
-  &alderlake_cost,
-  &alderlake_cost,
-  &icelake_cost,
-  &intel_cost,
-  &lujiazui_cost,
-  &yongfeng_cost,
-  &shijidadao_cost,
-  &geode_cost,
-  &k6_cost,
-  &athlon_cost,
-  &k8_cost,
-  &amdfam10_cost,
-  &bdver_cost,
-  &bdver_cost,
-  &bdver_cost,
-  &bdver_cost,
-  &btver1_cost,
-  &btver2_cost,
-  &znver1_cost,
-  &znver2_cost,
-  &znver3_cost,
-  &znver4_cost,
-  &znver5_cost
+  &generic_cost,	/* PROCESSOR_GENERIC.		*/
+  &i386_cost,		/* PROCESSOR_I386.		*/
+  &i486_cost,		/* PROCESSOR_I486.		*/
+  &pentium_cost,	/* PROCESSOR_PENTIUM.		*/
+  &lakemont_cost,	/* PROCESSOR_LAKEMONT.		*/
+  &pentiumpro_cost,	/* PROCESSOR_PENTIUMPRO.	*/
+  &pentium4_cost,	/* PROCESSOR_PENTIUM4.		*/
+  &nocona_cost,		/* PROCESSOR_NOCONA.		*/
+  &core_cost,		/* PROCESSOR_CORE2.		*/
+  &core_cost,		/* PROCESSOR_NEHALEM.		*/
+  &core_cost,		/* PROCESSOR_SANDYBRIDGE.	*/
+  &core_cost,		/* PROCESSOR_HASWELL.		*/
+  &atom_cost,		/* PROCESSOR_BONNELL.		*/
+  &slm_cost,		/* PROCESSOR_SILVERMONT.	*/
+  &slm_cost,		/* PROCESSOR_GOLDMONT.		*/
+  &slm_cost,		/* PROCESSOR_GOLDMONT_PLUS.	*/
+  &tremont_cost,	/* PROCESSOR_TREMONT.		*/
+  &alderlake_cost,	/* PROCESSOR_SIERRAFOREST.	*/
+  &alderlake_cost,	/* PROCESSOR_GRANDRIDGE.	*/
+  &alderlake_cost,	/* PROCESSOR_CLEARWATERFOREST.	*/
+  &skylake_cost,	/* PROCESSOR_SKYLAKE.	*/
+  &skylake_cost,	/* PROCESSOR_SKYLAKE_AVX512.	*/
+  &icelake_cost,	/* PROCESSOR_CANNONLAKE.	*/
+  &icelake_cost,	/* PROCESSOR_ICELAKE_CLIENT.	*/
+  &icelake_cost,	/* PROCESSOR_ICELAKE_SERVER.	*/
+  &skylake_cost,	/* PROCESSOR_CASCADELAKE.	*/
+  &icelake_cost,	/* PROCESSOR_TIGERLAKE.		*/
+  &skylake_cost,	/* PROCESSOR_COOPERLAKE.	*/
+  &icelake_cost,	/* PROCESSOR_SAPPHIRERAPIDS.	*/
+  &alderlake_cost,	/* PROCESSOR_ALDERLAKE.		*/
+  &icelake_cost,	/* PROCESSOR_ROCKETLAKE.	*/
+  &icelake_cost,	/* PROCESSOR_GRANITERAPIDS.	*/
+  &icelake_cost,	/* PROCESSOR_GRANITERAPIDS_D.	*/
+  &alderlake_cost,	/* PROCESSOR_ARROWLAKE.		*/
+  &alderlake_cost,	/* PROCESSOR_ARROWLAKE_S.	*/
+  &alderlake_cost,	/* PROCESSOR_PANTHERLAKE.	*/
+  &icelake_cost,	/* PROCESSOR_DIAMONDRAPIDS.	*/
+  &alderlake_cost,	/* PROCESSOR_INTEL.		*/
+  &lujiazui_cost,	/* PROCESSOR_LUJIAZUI.		*/
+  &yongfeng_cost,	/* PROCESSOR_YONGFENG.		*/
+  &shijidadao_cost,	/* PROCESSOR_SHIJIDADAO.	*/
+  &geode_cost,		/* PROCESSOR_GEODE.		*/
+  &k6_cost,		/* PROCESSOR_K6.		*/
+  &athlon_cost,		/* PROCESSOR_ATHLON.		*/
+  &k8_cost,		/* PROCESSOR_K8.		*/
+  &amdfam10_cost,	/* PROCESSOR_AMDFAM10.		*/
+  &bdver_cost,		/* PROCESSOR_BDVER1.		*/
+  &bdver_cost,		/* PROCESSOR_BDVER2.		*/
+  &bdver_cost,		/* PROCESSOR_BDVER3.		*/
+  &bdver_cost,		/* PROCESSOR_BDVER4.		*/
+  &btver1_cost,		/* PROCESSOR_BTVER1.		*/
+  &btver2_cost,		/* PROCESSOR_BTVER2.		*/
+  &znver1_cost,		/* PROCESSOR_ZNVER1.		*/
+  &znver2_cost,		/* PROCESSOR_ZNVER2.		*/
+  &znver3_cost,		/* PROCESSOR_ZNVER3.		*/
+  &znver4_cost,		/* PROCESSOR_ZNVER4.		*/
+  &znver5_cost		/* PROCESSOR_ZNVER5.		*/
 };
 
 /* Guarantee that the array is aligned with enum processor_type.  */
@@ -858,8 +854,6 @@ ix86_function_specific_restore (struct gcc_options *opts,
   opts->x_ix86_apx_features = ptr->x_ix86_apx_features;
   opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
   opts->x_ix86_isa_flags2_explicit = ptr->x_ix86_isa_flags2_explicit;
-  opts->x_ix86_no_avx512_explicit = ptr->x_ix86_no_avx512_explicit;
-  opts->x_ix86_no_avx10_1_explicit = ptr->x_ix86_no_avx10_1_explicit;
   opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
   opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
   opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
@@ -1131,11 +1125,8 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
     IX86_ATTR_ISA ("sha512", OPT_msha512),
     IX86_ATTR_ISA ("sm4", OPT_msm4),
     IX86_ATTR_ISA ("apxf", OPT_mapxf),
-    IX86_ATTR_ISA ("evex512", OPT_mevex512),
     IX86_ATTR_ISA ("usermsr", OPT_musermsr),
-    IX86_ATTR_ISA ("avx10.1-256", OPT_mavx10_1_256),
     IX86_ATTR_ISA ("avx10.1", OPT_mavx10_1),
-    IX86_ATTR_ISA ("avx10.1-512", OPT_mavx10_1),
     IX86_ATTR_ISA ("avx10.2", OPT_mavx10_2),
     IX86_ATTR_ISA ("amx-avx512", OPT_mamx_avx512),
     IX86_ATTR_ISA ("amx-tf32", OPT_mamx_tf32),
@@ -1181,6 +1172,10 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
 		   OPT_mrecip,
 		   MASK_RECIP),
 
+    IX86_ATTR_YES ("80387",
+		   OPT_m80387,
+		   MASK_80387),
+
     IX86_ATTR_IX86_YES ("general-regs-only",
 			OPT_mgeneral_regs_only,
 			OPTION_MASK_GENERAL_REGS_ONLY),
@@ -1271,13 +1266,6 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
 	    }
 	}
 
-      /* Fixup -msse4 which is RejectNegative to -mno-sse4 when negated.  */
-      if (opt == OPT_msse4 && !opt_set_p)
-	{
-	  opt = OPT_mno_sse4;
-	  opt_set_p = true;
-	}
-
       /* Process the option.  */
       if (opt == N_OPTS)
 	{
@@ -1297,6 +1285,8 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
 
       else if (type == ix86_opt_yes || type == ix86_opt_no)
 	{
+	  opts_set->x_target_flags |= mask;
+
 	  if (type == ix86_opt_no)
 	    opt_set_p = !opt_set_p;
 
@@ -1378,7 +1368,9 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
 	  arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
 	  if (arg_ok)
 	    set_option (opts, enum_opts_set, opt, value,
-			p + opt_len, DK_UNSPECIFIED, input_location,
+			p + opt_len,
+			static_cast<int> (diagnostics::kind::unspecified),
+			input_location,
 			global_dc);
 	  else
 	    {
@@ -1436,18 +1428,6 @@ ix86_valid_target_attribute_tree (tree fndecl, tree args,
 					    target_clone_attr))
     return error_mark_node;
 
-  /* AVX10.1-256 will enable only 256 bit AVX512F features by setting all
-     AVX512 related ISA flags and not setting EVEX512.  When it is used
-     with avx512 related function attribute, we need to enable 512 bit to
-     align with the command line behavior.  Manually set EVEX512 for this
-     scenario.  */
-  if ((def->x_ix86_isa_flags2 & OPTION_MASK_ISA2_AVX10_1_256)
-      && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_AVX512F)
-      && (opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F)
-      && !(def->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_EVEX512)
-      && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_EVEX512))
-    opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_EVEX512;
-
   /* If the changed options are different from the default, rerun
      ix86_option_override_internal, and then save the options away.
      The string options are attribute options, and will be undone
@@ -1458,10 +1438,7 @@ ix86_valid_target_attribute_tree (tree fndecl, tree args,
       || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
       || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
       || enum_opts_set.x_ix86_fpmath
-      || enum_opts_set.x_prefer_vector_width_type
-      || (!(def->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_AVX10_1_256)
-	  && (opts->x_ix86_isa_flags2_explicit
-	      & OPTION_MASK_ISA2_AVX10_1_256)))
+      || enum_opts_set.x_prefer_vector_width_type)
     {
       /* If we are using the default tune= or arch=, undo the string assigned,
 	 and use the default.  */
@@ -2025,7 +2002,7 @@ ix86_option_override_internal (bool main_args_p,
 			       struct gcc_options *opts_set)
 {
   unsigned int i;
-  unsigned HOST_WIDE_INT ix86_arch_mask, avx512_isa_flags, avx512_isa_flags2;
+  unsigned HOST_WIDE_INT ix86_arch_mask;
   const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
 
   /* -mrecip options.  */
@@ -2044,15 +2021,6 @@ ix86_option_override_internal (bool main_args_p,
       { "vec-sqrt",  RECIP_MASK_VEC_SQRT },
     };
 
-  avx512_isa_flags = OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX512CD
-    | OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512BW
-    | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512IFMA
-    | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI2
-    | OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VPOPCNTDQ
-    | OPTION_MASK_ISA_AVX512BITALG;
-  avx512_isa_flags2 = OPTION_MASK_ISA2_AVX512FP16
-    | OPTION_MASK_ISA2_AVX512BF16;
-
   /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
      TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false.  */
   if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
@@ -2674,107 +2642,6 @@ ix86_option_override_internal (bool main_args_p,
       &= ~((OPTION_MASK_ISA_BMI | OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_TBM)
 	   & ~opts->x_ix86_isa_flags_explicit);
 
-  /* Emit a warning if AVX10.1 options is used with AVX512/EVEX512 options except
-     for the following option combinations:
-     1. Both AVX10.1-512 and AVX512 with 512 bit vector width are enabled with no
-	explicit disable on other AVX512 features.
-     2. Both AVX10.1-256 and AVX512 w/o 512 bit vector width are enabled with no
-	explicit disable on other AVX512 features.
-     3. Both AVX10.1 and AVX512 are disabled.  */
-  if (TARGET_AVX10_1_P (opts->x_ix86_isa_flags2))
-    {
-      if (opts->x_ix86_no_avx512_explicit
-	  && (((~(avx512_isa_flags & opts->x_ix86_isa_flags)
-	       & (avx512_isa_flags & opts->x_ix86_isa_flags_explicit)))
-	      || ((~((avx512_isa_flags2 | OPTION_MASK_ISA2_EVEX512)
-		     & opts->x_ix86_isa_flags2)
-		   & ((avx512_isa_flags2 | OPTION_MASK_ISA2_EVEX512)
-		      & opts->x_ix86_isa_flags2_explicit)))))
-	warning (0, "%<-mno-evex512%> or %<-mno-avx512XXX%> cannot disable "
-		    "AVX10 instructions when AVX10.1-512 is available in GCC 15, "
-		    "behavior will change to it will disable that part of "
-		    "AVX512 instructions since GCC 16");
-    }
-  else if (TARGET_AVX10_1_256_P (opts->x_ix86_isa_flags2))
-    {
-      if (TARGET_EVEX512_P (opts->x_ix86_isa_flags2)
-	  && (OPTION_MASK_ISA2_EVEX512 & opts->x_ix86_isa_flags2_explicit))
-	{
-	  if (!TARGET_AVX512F_P (opts->x_ix86_isa_flags)
-	      || !(OPTION_MASK_ISA_AVX512F & opts->x_ix86_isa_flags_explicit))
-	    {
-	      /* We should not emit 512 bit instructions under AVX10.1-256
-		 when EVEX512 is enabled w/o any AVX512 features enabled.
-		 Disable EVEX512 bit for this.  */
-	      warning (0, "Using %<-mevex512%> without any AVX512 features "
-			  "enabled together with AVX10.1 only will not enable "
-			  "any AVX512 or AVX10.1-512 features, using 256 as "
-			  "max vector size");
-	      opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_EVEX512;
-	    }
-	  else
-	    warning (0, "Vector size conflicts between AVX10.1 and AVX512, "
-			"using 512 as max vector size");
-	}
-      else if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)
-	       && (opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F)
-	       && !(OPTION_MASK_ISA2_EVEX512
-		    & opts->x_ix86_isa_flags2_explicit))
-	warning (0, "Vector size conflicts between AVX10.1 and AVX512, using "
-		    "512 as max vector size");
-      else if (opts->x_ix86_no_avx512_explicit
-	       && (((~(avx512_isa_flags & opts->x_ix86_isa_flags)
-		    & (avx512_isa_flags & opts->x_ix86_isa_flags_explicit)))
-		   || ((~(avx512_isa_flags2 & opts->x_ix86_isa_flags2)
-			& (avx512_isa_flags2
-			   & opts->x_ix86_isa_flags2_explicit)))))
-	warning (0, "%<-mno-avx512XXX%> cannot disable AVX10 instructions "
-		    "when AVX10 is available in GCC 15, behavior will change "
-		    "to it will disable that part of AVX512 instructions since "
-		    "GCC 16");
-    }
-  else if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)
-	   && (OPTION_MASK_ISA_AVX512F & opts->x_ix86_isa_flags_explicit))
-    {
-      if (opts->x_ix86_no_avx10_1_explicit
-	  && ((OPTION_MASK_ISA2_AVX10_1_256 | OPTION_MASK_ISA2_AVX10_1)
-	      & opts->x_ix86_isa_flags2_explicit))
-	{
-	  warning (0, "%<-mno-avx10.1-256, -mno-avx10.1-512%> cannot disable "
-		      "AVX512 instructions when %<-mavx512XXX%> in GCC 15, "
-		      "behavior will change to it will disable all the "
-		      "instructions in GCC 16");
-	  /* Reset those unset AVX512 flags set by AVX10 options when AVX10 is
-	     disabled.  */
-	  if (OPTION_MASK_ISA2_AVX10_1_256 & opts->x_ix86_isa_flags2_explicit)
-	    {
-	      opts->x_ix86_isa_flags = (~avx512_isa_flags
-					& opts->x_ix86_isa_flags)
-		| (avx512_isa_flags & opts->x_ix86_isa_flags
-		   & opts->x_ix86_isa_flags_explicit);
-	      opts->x_ix86_isa_flags2 = (~avx512_isa_flags2
-					 & opts->x_ix86_isa_flags2)
-		| (avx512_isa_flags2 & opts->x_ix86_isa_flags2
-		   & opts->x_ix86_isa_flags2_explicit);
-	    }
-	}
-    }
-
-  /* Set EVEX512 if one of the following conditions meets:
-     1. AVX512 is enabled while EVEX512 is not explicitly set/unset.
-     2. AVX10.1-512 is enabled.  */
-  if (TARGET_AVX10_1_P (opts->x_ix86_isa_flags2)
-      || (TARGET_AVX512F_P (opts->x_ix86_isa_flags)
-	  && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_EVEX512)))
-    opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_EVEX512;
-
-  /* Enable all AVX512 related ISAs when AVX10.1 is enabled.  */
-  if (TARGET_AVX10_1_256_P (opts->x_ix86_isa_flags2))
-    {
-      opts->x_ix86_isa_flags |= avx512_isa_flags;
-      opts->x_ix86_isa_flags2 |= avx512_isa_flags2;
-    }
-
   /* Validate -mpreferred-stack-boundary= value or default it to
      PREFERRED_STACK_BOUNDARY_DEFAULT.  */
   ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
@@ -2980,7 +2847,9 @@ ix86_option_override_internal (bool main_args_p,
 
   /* Set the default value for -mfentry.  */
   if (!opts_set->x_flag_fentry)
-    opts->x_flag_fentry = TARGET_SEH;
+    opts->x_flag_fentry = (TARGET_SEH
+			   || (TARGET_64BIT_P (opts->x_ix86_isa_flags)
+			       && ENABLE_X86_64_MFENTRY));
   else
     {
       if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic
@@ -2991,6 +2860,17 @@ ix86_option_override_internal (bool main_args_p,
 	sorry ("%<-mno-fentry%> isn%'t compatible with SEH");
     }
 
+#ifdef OPTION_GLIBC_P
+  /* -mfentry is supported only on glibc targets.  */
+  if (!opts->x_flag_fentry
+      && OPTION_GLIBC_P (opts)
+      && (TARGET_64BIT_P (opts->x_ix86_isa_flags) || !opts->x_flag_pic)
+      && opts->x_flag_shrink_wrap
+      && opts->x_profile_flag)
+    warning (0, "%<-pg%> without %<-mfentry%> may be unreliable with "
+	     "shrink wrapping");
+#endif
+
   if (TARGET_SEH && TARGET_CALL_MS2SYSV_XLOGUES)
     sorry ("%<-mcall-ms2sysv-xlogues%> isn%'t currently supported with SEH");
 
@@ -3049,8 +2929,7 @@ ix86_option_override_internal (bool main_args_p,
 	  opts->x_ix86_move_max = opts->x_prefer_vector_width_type;
 	  if (opts_set->x_ix86_move_max == PVW_NONE)
 	    {
-	      if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)
-		  && TARGET_EVEX512_P (opts->x_ix86_isa_flags2))
+	      if (TARGET_AVX512F_P (opts->x_ix86_isa_flags))
 		opts->x_ix86_move_max = PVW_AVX512;
 	      /* Align with vectorizer to avoid potential STLF issue.  */
 	      else if (TARGET_AVX_P (opts->x_ix86_isa_flags))
@@ -3076,8 +2955,7 @@ ix86_option_override_internal (bool main_args_p,
 	  opts->x_ix86_store_max = opts->x_prefer_vector_width_type;
 	  if (opts_set->x_ix86_store_max == PVW_NONE)
 	    {
-	      if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)
-		  && TARGET_EVEX512_P (opts->x_ix86_isa_flags2))
+	      if (TARGET_AVX512F_P (opts->x_ix86_isa_flags))
 		opts->x_ix86_store_max = PVW_AVX512;
 	      /* Align with vectorizer to avoid potential STLF issue.  */
 	      else if (TARGET_AVX_P (opts->x_ix86_isa_flags))
@@ -3374,13 +3252,13 @@ ix86_simd_clone_adjust (struct cgraph_node *node)
     case 'e':
       if (TARGET_PREFER_AVX256)
 	{
-	  if (!TARGET_AVX512F || !TARGET_EVEX512)
-	    str = "avx512f,evex512,prefer-vector-width=512";
+	  if (!TARGET_AVX512F)
+	    str = "avx512f,prefer-vector-width=512";
 	  else
 	    str = "prefer-vector-width=512";
 	}
-      else if (!TARGET_AVX512F || !TARGET_EVEX512)
-	str = "avx512f,evex512";
+      else if (!TARGET_AVX512F)
+	str = "avx512f";
       break;
     default:
       gcc_unreachable ();
@@ -3420,19 +3298,21 @@ ix86_set_func_type (tree fndecl)
      interrupt function in this case.  */
   enum call_saved_registers_type no_callee_saved_registers
     = TYPE_DEFAULT_CALL_SAVED_REGISTERS;
-  if (lookup_attribute ("no_callee_saved_registers",
-			TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
+  if (lookup_attribute ("preserve_none",
+			     TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
+    no_callee_saved_registers = TYPE_PRESERVE_NONE;
+  else if ((lookup_attribute ("no_callee_saved_registers",
+			      TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
+	   || (ix86_noreturn_no_callee_saved_registers
+	       && TREE_THIS_VOLATILE (fndecl)
+	       && optimize
+	       && !optimize_debug
+	       && (TREE_NOTHROW (fndecl) || !flag_exceptions)
+	       && !lookup_attribute ("interrupt",
+				     TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))
+	       && !lookup_attribute ("no_caller_saved_registers",
+				 TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))))
     no_callee_saved_registers = TYPE_NO_CALLEE_SAVED_REGISTERS;
-  else if (ix86_noreturn_no_callee_saved_registers
-	   && TREE_THIS_VOLATILE (fndecl)
-	   && optimize
-	   && !optimize_debug
-	   && (TREE_NOTHROW (fndecl) || !flag_exceptions)
-	   && !lookup_attribute ("interrupt",
-				 TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))
-	   && !lookup_attribute ("no_caller_saved_registers",
-				 TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
-    no_callee_saved_registers = TYPE_NO_CALLEE_SAVED_REGISTERS_EXCEPT_BP;
 
   if (cfun->machine->func_type == TYPE_UNKNOWN)
     {
@@ -3444,9 +3324,16 @@ ix86_set_func_type (tree fndecl)
 		      "interrupt and naked attributes are not compatible");
 
 	  if (no_callee_saved_registers)
-	    error_at (DECL_SOURCE_LOCATION (fndecl),
-		      "%qs and %qs attributes are not compatible",
-		      "interrupt", "no_callee_saved_registers");
+	    {
+	      const char *attr;
+	      if (no_callee_saved_registers == TYPE_PRESERVE_NONE)
+		attr = "preserve_none";
+	      else
+		attr = "no_callee_saved_registers";
+	      error_at (DECL_SOURCE_LOCATION (fndecl),
+			"%qs and %qs attributes are not compatible",
+			"interrupt", attr);
+	    }
 
 	  int nargs = 0;
 	  for (tree arg = DECL_ARGUMENTS (fndecl);
@@ -3468,21 +3355,13 @@ ix86_set_func_type (tree fndecl)
       else
 	{
 	  cfun->machine->func_type = TYPE_NORMAL;
-	  if (lookup_attribute ("no_caller_saved_registers",
-				TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
+	  if (no_callee_saved_registers)
+	    cfun->machine->call_saved_registers
+	      = no_callee_saved_registers;
+	  else if (lookup_attribute ("no_caller_saved_registers",
+				     TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
 	    cfun->machine->call_saved_registers
 	      = TYPE_NO_CALLER_SAVED_REGISTERS;
-	  if (no_callee_saved_registers)
-	    {
-	      if (cfun->machine->call_saved_registers
-		  == TYPE_NO_CALLER_SAVED_REGISTERS)
-		error_at (DECL_SOURCE_LOCATION (fndecl),
-			  "%qs and %qs attributes are not compatible",
-			  "no_caller_saved_registers",
-			  "no_callee_saved_registers");
-	      cfun->machine->call_saved_registers
-		= no_callee_saved_registers;
-	    }
 	}
     }
 }
@@ -3671,11 +3550,25 @@ ix86_set_current_function (tree fndecl)
       || (cfun->machine->call_saved_registers
 	  == TYPE_NO_CALLER_SAVED_REGISTERS))
     {
-      /* Don't allow SSE, MMX nor x87 instructions since they
-	 may change processor state.  */
+      /* Don't allow AVX, AVX512, MMX nor x87 instructions since they
+	 may change processor state.  Don't allow SSE instructions in
+	 exception/interrupt service routines.  */
       const char *isa;
       if (TARGET_SSE)
-	isa = "SSE";
+	{
+	  if (TARGET_AVX512F)
+	    isa = "AVX512";
+	  else if (TARGET_AVX)
+	    isa = "AVX";
+	  else if (cfun->machine->func_type != TYPE_NORMAL)
+	    isa = "SSE";
+	  else if (TARGET_MMX)
+	    isa = "MMX/3Dnow";
+	  else if (TARGET_80387)
+	    isa = "80387";
+	  else
+	    isa = NULL;
+	}
       else if (TARGET_MMX)
 	isa = "MMX/3Dnow";
       else if (TARGET_80387)
@@ -3732,6 +3625,18 @@ ix86_handle_cconv_attribute (tree *node, tree name, tree args, int,
       return NULL_TREE;
     }
 
+  if (TARGET_64BIT)
+    {
+      /* Do not warn when emulating the MS ABI.  */
+      if ((TREE_CODE (*node) != FUNCTION_TYPE
+	   && TREE_CODE (*node) != METHOD_TYPE)
+	  || ix86_function_type_abi (*node) != MS_ABI)
+	warning (OPT_Wattributes, "%qE attribute ignored",
+		 name);
+      *no_add_attrs = true;
+      return NULL_TREE;
+    }
+
   /* Can combine regparm with all attributes but fastcall, and thiscall.  */
   if (is_attribute_p ("regparm", name))
     {
@@ -3744,7 +3649,7 @@ ix86_handle_cconv_attribute (tree *node, tree name, tree args, int,
 
       if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
 	{
-	  error ("regparam and thiscall attributes are not compatible");
+	  error ("regparm and thiscall attributes are not compatible");
 	}
 
       cst = TREE_VALUE (args);
@@ -3765,19 +3670,7 @@ ix86_handle_cconv_attribute (tree *node, tree name, tree args, int,
       return NULL_TREE;
     }
 
-  if (TARGET_64BIT)
-    {
-      /* Do not warn when emulating the MS ABI.  */
-      if ((TREE_CODE (*node) != FUNCTION_TYPE
-	   && TREE_CODE (*node) != METHOD_TYPE)
-	  || ix86_function_type_abi (*node) != MS_ABI)
-	warning (OPT_Wattributes, "%qE attribute ignored",
-	         name);
-      *no_add_attrs = true;
-      return NULL_TREE;
-    }
-
-  /* Can combine fastcall with stdcall (redundant) and sseregparm.  */
+  /* Can combine fastcall with sseregparm.  */
   if (is_attribute_p ("fastcall", name))
     {
       if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
@@ -3798,8 +3691,7 @@ ix86_handle_cconv_attribute (tree *node, tree name, tree args, int,
 	}
     }
 
-  /* Can combine stdcall with fastcall (redundant), regparm and
-     sseregparm.  */
+  /* Can combine stdcall with regparm and sseregparm.  */
   else if (is_attribute_p ("stdcall", name))
     {
       if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
@@ -3849,6 +3741,10 @@ ix86_handle_cconv_attribute (tree *node, tree name, tree args, int,
 	{
 	  error ("cdecl and thiscall attributes are not compatible");
 	}
+      if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
+	{
+	  error ("regparm and thiscall attributes are not compatible");
+	}
     }
 
   /* Can combine sseregparm with all attributes.  */
@@ -4100,9 +3996,50 @@ ix86_handle_fndecl_attribute (tree *node, tree name, tree args, int,
 }
 
 static tree
-ix86_handle_call_saved_registers_attribute (tree *, tree, tree,
+ix86_handle_call_saved_registers_attribute (tree *node, tree name, tree,
 					    int, bool *)
 {
+  const char *attr1 = nullptr;
+  const char *attr2 = nullptr;
+
+  if (is_attribute_p ("no_callee_saved_registers", name))
+    {
+      /* Disallow preserve_none and no_caller_saved_registers
+	 attributes.  */
+      attr1 = "no_callee_saved_registers";
+      if (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (*node)))
+	attr2 = "preserve_none";
+      else if (lookup_attribute ("no_caller_saved_registers",
+				 TYPE_ATTRIBUTES (*node)))
+	attr2 = "no_caller_saved_registers";
+    }
+  else if (is_attribute_p ("no_caller_saved_registers", name))
+    {
+      /* Disallow preserve_none and no_callee_saved_registers
+	 attributes.  */
+      attr1 = "no_caller_saved_registers";
+      if (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (*node)))
+	attr2 = "preserve_none";
+      else if (lookup_attribute ("no_callee_saved_registers",
+				 TYPE_ATTRIBUTES (*node)))
+	attr2 = "no_callee_saved_registers";
+    }
+  else if (is_attribute_p ("preserve_none", name))
+    {
+      /* Disallow no_callee_saved_registers and no_caller_saved_registers
+	 attributes.  */
+      attr1 = "preserve_none";
+      if (lookup_attribute ("no_callee_saved_registers",
+			    TYPE_ATTRIBUTES (*node)))
+	attr2 = "no_caller_saved_registers";
+      else if (lookup_attribute ("no_callee_saved_registers",
+				 TYPE_ATTRIBUTES (*node)))
+	attr2 = "no_callee_saved_registers";
+    }
+
+  if (attr2)
+    error ("%qs and %qs attributes are not compatible", attr1, attr2);
+
   return NULL_TREE;
 }
 
@@ -4264,6 +4201,8 @@ static const attribute_spec ix86_gnu_attributes[] =
     ix86_handle_interrupt_attribute, NULL },
   { "no_caller_saved_registers", 0, 0, false, true, true, false,
     ix86_handle_call_saved_registers_attribute, NULL },
+  { "preserve_none", 0, 0, false, true, true, true,
+    ix86_handle_call_saved_registers_attribute, NULL },
   { "no_callee_saved_registers", 0, 0, false, true, true, true,
     ix86_handle_call_saved_registers_attribute, NULL },
   { "naked", 0, 0, true, false, false, false,
diff --git a/gcc/config/i386/i386-passes.def b/gcc/config/i386/i386-passes.def
index 39f8bc6..553b46d 100644
--- a/gcc/config/i386/i386-passes.def
+++ b/gcc/config/i386/i386-passes.def
@@ -35,5 +35,6 @@ along with GCC; see the file COPYING3.  If not see
      PR116174.  */
   INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_align_tight_loops);
 
+  INSERT_PASS_AFTER (pass_late_combine, 1, pass_x86_cse);
   INSERT_PASS_AFTER (pass_late_combine, 1, pass_remove_partial_avx_dependency);
   INSERT_PASS_AFTER (pass_rtl_ifcvt, 1, pass_apx_nf_convert);
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index bea3fd4..bdb8bb9 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -186,6 +186,7 @@ extern void ix86_expand_v2di_ashiftrt (rtx[]);
 extern rtx ix86_replace_reg_with_reg (rtx, rtx, rtx);
 extern rtx ix86_find_base_term (rtx);
 extern bool ix86_check_movabs (rtx, int);
+extern bool ix86_check_movs (rtx, int);
 extern bool ix86_check_no_addr_space (rtx);
 extern void ix86_split_idivmod (machine_mode, rtx[], bool);
 extern bool ix86_hardreg_mov_ok (rtx, rtx);
@@ -198,6 +199,7 @@ extern int ix86_attr_length_vex_default (rtx_insn *, bool, bool);
 extern rtx ix86_libcall_value (machine_mode);
 extern bool ix86_function_arg_regno_p (int);
 extern void ix86_asm_output_function_label (FILE *, const char *, tree);
+extern void ix86_asm_output_labelref (FILE *, const char *, const char *);
 extern void ix86_call_abi_override (const_tree);
 extern int ix86_reg_parm_stack_space (const_tree);
 
@@ -280,6 +282,7 @@ extern tree ix86_valid_target_attribute_tree (tree, tree,
 					      struct gcc_options *,
 					      struct gcc_options *, bool);
 extern unsigned int ix86_get_callcvt (const_tree);
+extern bool ix86_type_no_callee_saved_registers_p (const_tree);
 
 #endif
 
@@ -287,6 +290,7 @@ extern rtx ix86_tls_module_base (void);
 extern bool ix86_gpr_tls_address_pattern_p (rtx);
 extern bool ix86_tls_address_pattern_p (rtx);
 extern rtx ix86_rewrite_tls_address (rtx);
+extern rtx ix86_tls_get_addr (void);
 
 extern void ix86_expand_vector_init (bool, rtx, rtx);
 extern void ix86_expand_vector_set (bool, rtx, rtx, int);
@@ -427,12 +431,21 @@ extern rtl_opt_pass *make_pass_insert_endbr_and_patchable_area
   (gcc::context *);
 extern rtl_opt_pass *make_pass_remove_partial_avx_dependency
   (gcc::context *);
+extern rtl_opt_pass *make_pass_x86_cse (gcc::context *);
 extern rtl_opt_pass *make_pass_apx_nf_convert (gcc::context *);
 extern rtl_opt_pass *make_pass_align_tight_loops (gcc::context *);
 
 extern bool ix86_has_no_direct_extern_access;
 extern bool ix86_rpad_gate ();
 
+extern sbitmap ix86_get_separate_components (void);
+extern sbitmap ix86_components_for_bb (basic_block);
+extern void ix86_disqualify_components (sbitmap, edge, sbitmap, bool);
+extern void ix86_emit_prologue_components (sbitmap);
+extern void ix86_emit_epilogue_components (sbitmap);
+extern void ix86_set_handled_components (sbitmap);
+
 /* In i386-expand.cc.  */
 bool ix86_check_builtin_isa_match (unsigned int, HOST_WIDE_INT*,
 				   HOST_WIDE_INT*);
+rtx ix86_vgf2p8affine_shift_matrix (rtx, rtx, enum rtx_code);
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 38df84f..471be3e 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -100,6 +100,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "i386-features.h"
 #include "function-abi.h"
 #include "rtl-error.h"
+#include "gimple-pretty-print.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -334,6 +335,14 @@ static int const x86_64_ms_abi_int_parameter_registers[4] =
   CX_REG, DX_REG, R8_REG, R9_REG
 };
 
+/* Similar as Clang's preserve_none function parameter passing.
+   NB: Use DI_REG and SI_REG, see ix86_function_value_regno_p.  */
+
+static int const x86_64_preserve_none_int_parameter_registers[6] =
+{
+  R12_REG, R13_REG, R14_REG, R15_REG, DI_REG, SI_REG
+};
+
 static int const x86_64_int_return_registers[4] =
 {
   AX_REG, DX_REG, DI_REG, SI_REG
@@ -459,7 +468,8 @@ int ix86_arch_specified;
    red-zone.
 
    NB: Don't use red-zone for functions with no_caller_saved_registers
-   and 32 GPRs since 128-byte red-zone is too small for 31 GPRs.
+   and 32 GPRs or 16 XMM registers since 128-byte red-zone is too small
+   for 31 GPRs or 15 GPRs + 16 XMM registers.
 
    TODO: If we can reserve the first 2 WORDs, for PUSH and, another
    for CALL, in red-zone, we can allow local indirect jumps with
@@ -470,7 +480,7 @@ ix86_using_red_zone (void)
 {
   return (TARGET_RED_ZONE
 	  && !TARGET_64BIT_MS_ABI
-	  && (!TARGET_APX_EGPR
+	  && ((!TARGET_APX_EGPR && !TARGET_SSE)
 	      || (cfun->machine->call_saved_registers
 		  != TYPE_NO_CALLER_SAVED_REGISTERS))
 	  && (!cfun->machine->has_local_indirect_jump
@@ -897,6 +907,18 @@ x86_64_elf_unique_section (tree decl, int reloc)
   default_unique_section (decl, reloc);
 }
 
+/* Return true if TYPE has no_callee_saved_registers or preserve_none
+   attribute.  */
+
+bool
+ix86_type_no_callee_saved_registers_p (const_tree type)
+{
+  return (lookup_attribute ("no_callee_saved_registers",
+			    TYPE_ATTRIBUTES (type)) != NULL
+	  || lookup_attribute ("preserve_none",
+			       TYPE_ATTRIBUTES (type)) != NULL);
+}
+
 #ifdef COMMON_ASM_OP
 
 #ifndef LARGECOMM_SECTION_ASM_OP
@@ -1018,11 +1040,10 @@ ix86_function_ok_for_sibcall (tree decl, tree exp)
 
   /* Sibling call isn't OK if callee has no callee-saved registers
      and the calling function has callee-saved registers.  */
-  if (cfun->machine->call_saved_registers != TYPE_NO_CALLEE_SAVED_REGISTERS
-      && (cfun->machine->call_saved_registers
-	  != TYPE_NO_CALLEE_SAVED_REGISTERS_EXCEPT_BP)
-      && lookup_attribute ("no_callee_saved_registers",
-			   TYPE_ATTRIBUTES (type)))
+  if ((cfun->machine->call_saved_registers
+       != TYPE_NO_CALLEE_SAVED_REGISTERS)
+      && cfun->machine->call_saved_registers != TYPE_PRESERVE_NONE
+      && ix86_type_no_callee_saved_registers_p (type))
     return false;
 
   /* If outgoing reg parm stack space changes, we cannot do sibcall.  */
@@ -1187,10 +1208,16 @@ ix86_comp_type_attributes (const_tree type1, const_tree type2)
       != ix86_function_regparm (type2, NULL))
     return 0;
 
-  if (lookup_attribute ("no_callee_saved_registers",
-			TYPE_ATTRIBUTES (type1))
-      != lookup_attribute ("no_callee_saved_registers",
-			   TYPE_ATTRIBUTES (type2)))
+  if (ix86_type_no_callee_saved_registers_p (type1)
+      != ix86_type_no_callee_saved_registers_p (type2))
+    return 0;
+
+  /* preserve_none attribute uses a different calling convention is
+     only for 64-bit.  */
+  if (TARGET_64BIT
+      && (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (type1))
+	  != lookup_attribute ("preserve_none",
+			       TYPE_ATTRIBUTES (type2))))
     return 0;
 
   return 1;
@@ -1552,7 +1579,10 @@ ix86_function_arg_regno_p (int regno)
   if (call_abi == SYSV_ABI && regno == AX_REG)
     return true;
 
-  if (call_abi == MS_ABI)
+  if (cfun
+      && cfun->machine->call_saved_registers == TYPE_PRESERVE_NONE)
+    parm_regs = x86_64_preserve_none_int_parameter_registers;
+  else if (call_abi == MS_ABI)
     parm_regs = x86_64_ms_abi_int_parameter_registers;
   else
     parm_regs = x86_64_int_parameter_registers;
@@ -1715,6 +1745,19 @@ ix86_asm_output_function_label (FILE *out_file, const char *fname,
     }
 }
 
+/* Output a user-defined label.  In AT&T syntax, registers are prefixed
+   with %, so labels require no punctuation.  In Intel syntax, registers
+   are unprefixed, so labels may clash with registers or other operators,
+   and require quoting.  */
+void
+ix86_asm_output_labelref (FILE *file, const char *prefix, const char *label)
+{
+  if (ASSEMBLER_DIALECT == ASM_ATT)
+    fprintf (file, "%s%s", prefix, label);
+  else
+    fprintf (file, "\"%s%s\"", prefix, label);
+}
+
 /* Implementation of call abi switching target hook. Specific to FNDECL
    the specific call register sets are set.  See also
    ix86_conditional_register_usage for more details.  */
@@ -1794,8 +1837,7 @@ ix86_init_pic_reg (void)
       add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
     }
 
-  seq = get_insns ();
-  end_sequence ();
+  seq = end_sequence ();
 
   entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
   insert_insn_on_edge (seq, entry_edge);
@@ -1822,6 +1864,7 @@ init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
 
   memset (cum, 0, sizeof (*cum));
 
+  tree preserve_none_type;
   if (fndecl)
     {
       target = cgraph_node::get (fndecl);
@@ -1830,12 +1873,24 @@ init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
 	  target = target->function_symbol ();
 	  local_info_node = cgraph_node::local_info_node (target->decl);
 	  cum->call_abi = ix86_function_abi (target->decl);
+	  preserve_none_type = TREE_TYPE (target->decl);
 	}
       else
-	cum->call_abi = ix86_function_abi (fndecl);
+	{
+	  cum->call_abi = ix86_function_abi (fndecl);
+	  preserve_none_type = TREE_TYPE (fndecl);
+	}
     }
   else
-    cum->call_abi = ix86_function_type_abi (fntype);
+    {
+      cum->call_abi = ix86_function_type_abi (fntype);
+      preserve_none_type = fntype;
+    }
+  cum->preserve_none_abi
+    = (preserve_none_type
+       && (lookup_attribute ("preserve_none",
+			     TYPE_ATTRIBUTES (preserve_none_type))
+	   != nullptr));
 
   cum->caller = caller;
 
@@ -1997,8 +2052,7 @@ type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
 	    if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
 		&& GET_MODE_INNER (mode) == innermode)
 	      {
-		if (size == 64 && (!TARGET_AVX512F || !TARGET_EVEX512)
-		    && !TARGET_IAMCU)
+		if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
 		  {
 		    static bool warnedavx512f;
 		    static bool warnedavx512f_ret;
@@ -3409,9 +3463,15 @@ function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
       break;
     }
 
+  const int *parm_regs;
+  if (cum->preserve_none_abi)
+    parm_regs = x86_64_preserve_none_int_parameter_registers;
+  else
+    parm_regs = x86_64_int_parameter_registers;
+
   return construct_container (mode, orig_mode, type, 0, cum->nregs,
 			      cum->sse_nregs,
-			      &x86_64_int_parameter_registers [cum->regno],
+			      &parm_regs[cum->regno],
 			      cum->sse_regno);
 }
 
@@ -4421,7 +4481,7 @@ ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
 
 	  /* AVX512F values are returned in ZMM0 if available.  */
 	  if (size == 64)
-	    return !TARGET_AVX512F || !TARGET_EVEX512;
+	    return !TARGET_AVX512F;
 	}
 
       if (mode == XFmode)
@@ -4576,6 +4636,12 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
   if (max > X86_64_REGPARM_MAX)
     max = X86_64_REGPARM_MAX;
 
+  const int *parm_regs;
+  if (cum->preserve_none_abi)
+    parm_regs = x86_64_preserve_none_int_parameter_registers;
+  else
+    parm_regs = x86_64_int_parameter_registers;
+
   for (i = cum->regno; i < max; i++)
     {
       mem = gen_rtx_MEM (word_mode,
@@ -4583,8 +4649,7 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
       MEM_NOTRAP_P (mem) = 1;
       set_mem_alias_set (mem, set);
       emit_move_insn (mem,
-		      gen_rtx_REG (word_mode,
-				   x86_64_int_parameter_registers[i]));
+		      gen_rtx_REG (word_mode, parm_regs[i]));
     }
 
   if (ix86_varargs_fpr_size)
@@ -4738,8 +4803,7 @@ ix86_va_start (tree valist, rtx nextarg)
 
 	  start_sequence ();
 	  emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
-	  seq = get_insns ();
-	  end_sequence ();
+	  seq = end_sequence ();
 
 	  push_topmost_sequence ();
 	  emit_insn_after (seq, entry_of_function ());
@@ -5179,6 +5243,27 @@ ix86_check_movabs (rtx insn, int opnum)
   return volatile_ok || !MEM_VOLATILE_P (mem);
 }
 
+/* Return true if XVECEXP idx of INSN satisfies MOVS arguments.  */
+bool
+ix86_check_movs (rtx insn, int idx)
+{
+  rtx pat = PATTERN (insn);
+  gcc_assert (GET_CODE (pat) == PARALLEL);
+
+  rtx set = XVECEXP (pat, 0, idx);
+  gcc_assert (GET_CODE (set) == SET);
+
+  rtx dst = SET_DEST (set);
+  gcc_assert (MEM_P (dst));
+
+  rtx src = SET_SRC (set);
+  gcc_assert (MEM_P (src));
+
+  return (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst))
+	  && (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src))
+	      || Pmode == word_mode));
+}
+
 /* Return false if INSN contains a MEM with a non-default address space.  */
 bool
 ix86_check_no_addr_space (rtx insn)
@@ -5355,7 +5440,7 @@ standard_sse_constant_p (rtx x, machine_mode pred_mode)
       switch (GET_MODE_SIZE (mode))
 	{
 	case 64:
-	  if (TARGET_AVX512F && TARGET_EVEX512)
+	  if (TARGET_AVX512F)
 	    return 2;
 	  break;
 	case 32:
@@ -5408,10 +5493,8 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
 	    {
 	      if (TARGET_AVX512VL)
 		return "vpxord\t%x0, %x0, %x0";
-	      else if (TARGET_EVEX512)
-		return "vpxord\t%g0, %g0, %g0";
 	      else
-		gcc_unreachable ();
+		return "vpxord\t%g0, %g0, %g0";
 	    }
 	  return "vpxor\t%x0, %x0, %x0";
 
@@ -5427,19 +5510,15 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
 		{
 		  if (TARGET_AVX512VL)
 		    return "vxorpd\t%x0, %x0, %x0";
-		  else if (TARGET_EVEX512)
-		    return "vxorpd\t%g0, %g0, %g0";
 		  else
-		    gcc_unreachable ();
+		    return "vxorpd\t%g0, %g0, %g0";
 		}
 	      else
 		{
 		  if (TARGET_AVX512VL)
 		    return "vpxorq\t%x0, %x0, %x0";
-		  else if (TARGET_EVEX512)
-		    return "vpxorq\t%g0, %g0, %g0";
 		  else
-		    gcc_unreachable ();
+		    return "vpxorq\t%g0, %g0, %g0";
 		}
 	    }
 	  return "vxorpd\t%x0, %x0, %x0";
@@ -5456,19 +5535,15 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
 		{
 		  if (TARGET_AVX512VL)
 		    return "vxorps\t%x0, %x0, %x0";
-		  else if (TARGET_EVEX512)
-		    return "vxorps\t%g0, %g0, %g0";
 		  else
-		    gcc_unreachable ();
+		    return "vxorps\t%g0, %g0, %g0";
 		}
 	      else
 		{
 		  if (TARGET_AVX512VL)
 		    return "vpxord\t%x0, %x0, %x0";
-		  else if (TARGET_EVEX512)
-		    return "vpxord\t%g0, %g0, %g0";
 		  else
-		    gcc_unreachable ();
+		    return "vpxord\t%g0, %g0, %g0";
 		}
 	    }
 	  return "vxorps\t%x0, %x0, %x0";
@@ -5489,7 +5564,7 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
 	case MODE_XI:
 	case MODE_V8DF:
 	case MODE_V16SF:
-	  gcc_assert (TARGET_AVX512F && TARGET_EVEX512);
+	  gcc_assert (TARGET_AVX512F);
 	  return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
 
 	case MODE_OI:
@@ -5505,10 +5580,8 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
 	    {
 	      if (TARGET_AVX512VL)
 		return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
-	      else if (TARGET_EVEX512)
-		return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
 	      else
-		gcc_unreachable ();
+		return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
 	    }
 	  return (TARGET_AVX
 		  ? "vpcmpeqd\t%0, %0, %0"
@@ -5522,7 +5595,7 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
     {
       if (GET_MODE_SIZE (mode) == 64)
 	{
-	  gcc_assert (TARGET_AVX512F && TARGET_EVEX512);
+	  gcc_assert (TARGET_AVX512F);
 	  return "vpcmpeqd\t%t0, %t0, %t0";
 	}
       else if (GET_MODE_SIZE (mode) == 32)
@@ -5534,7 +5607,7 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
     }
   else if (vector_all_ones_zero_extend_quarter_operand (x, mode))
     {
-      gcc_assert (TARGET_AVX512F && TARGET_EVEX512);
+      gcc_assert (TARGET_AVX512F);
       return "vpcmpeqd\t%x0, %x0, %x0";
     }
 
@@ -5645,8 +5718,6 @@ ix86_get_ssemov (rtx *operands, unsigned size,
 	  || memory_operand (operands[1], mode))
 	gcc_unreachable ();
       size = 64;
-      /* We need TARGET_EVEX512 to move into zmm register.  */
-      gcc_assert (TARGET_EVEX512);
       switch (type)
 	{
 	case opcode_int:
@@ -5685,7 +5756,7 @@ ix86_get_ssemov (rtx *operands, unsigned size,
 		      : "%vmovaps");
 	  else
 	    opcode = (misaligned_p
-		      ? (TARGET_AVX512BW
+		      ? (TARGET_AVX512BW && evex_reg_p
 			 ? "vmovdqu16"
 			 : "%vmovdqu")
 		      : "%vmovdqa");
@@ -5727,7 +5798,7 @@ ix86_get_ssemov (rtx *operands, unsigned size,
 		      : "%vmovaps");
 	  else
 	    opcode = (misaligned_p
-		      ? (TARGET_AVX512BW
+		      ? (TARGET_AVX512BW && evex_reg_p
 			 ? "vmovdqu8"
 			 : "%vmovdqu")
 		      : "%vmovdqa");
@@ -5747,7 +5818,7 @@ ix86_get_ssemov (rtx *operands, unsigned size,
 		      : "%vmovaps");
 	  else
 	    opcode = (misaligned_p
-		      ? (TARGET_AVX512BW
+		      ? (TARGET_AVX512BW && evex_reg_p
 			 ? "vmovdqu16"
 			 : "%vmovdqu")
 		      : "%vmovdqa");
@@ -5908,7 +5979,7 @@ symbolic_reference_mentioned_p (rtx op)
   const char *fmt;
   int i;
 
-  if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
+  if (SYMBOL_REF_P (op) || LABEL_REF_P (op))
     return true;
 
   fmt = GET_RTX_FORMAT (GET_CODE (op));
@@ -6455,7 +6526,7 @@ output_set_got (rtx dest, rtx label)
 
   xops[0] = dest;
 
-  if (TARGET_VXWORKS_RTP && flag_pic)
+  if (TARGET_VXWORKS_GOTTPIC && TARGET_VXWORKS_RTP && flag_pic)
     {
       /* Load (*VXWORKS_GOTT_BASE) into the PIC register.  */
       xops[2] = gen_rtx_MEM (Pmode,
@@ -6700,9 +6771,7 @@ ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
 		  || !frame_pointer_needed));
 
     case TYPE_NO_CALLEE_SAVED_REGISTERS:
-      return false;
-
-    case TYPE_NO_CALLEE_SAVED_REGISTERS_EXCEPT_BP:
+    case TYPE_PRESERVE_NONE:
       if (regno != HARD_FRAME_POINTER_REGNUM)
 	return false;
       break;
@@ -6779,7 +6848,9 @@ ix86_nsaved_sseregs (void)
   int nregs = 0;
   int regno;
 
-  if (!TARGET_64BIT_MS_ABI)
+  if (!TARGET_64BIT_MS_ABI
+      && (cfun->machine->call_saved_registers
+	  != TYPE_NO_CALLER_SAVED_REGISTERS))
     return 0;
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
     if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
@@ -6887,6 +6958,26 @@ ix86_pro_and_epilogue_can_use_push2pop2 (int nregs)
 	 && (nregs + aligned) >= 3;
 }
 
+/* Check if push/pop should be used to save/restore registers.  */
+static bool
+save_regs_using_push_pop (HOST_WIDE_INT to_allocate)
+{
+  return ((!to_allocate && cfun->machine->frame.nregs <= 1)
+	  || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
+	  /* If static stack checking is enabled and done with probes,
+	     the registers need to be saved before allocating the frame.  */
+	  || flag_stack_check == STATIC_BUILTIN_STACK_CHECK
+	  /* If stack clash probing needs a loop, then it needs a
+	     scratch register.  But the returned register is only guaranteed
+	     to be safe to use after register saves are complete.  So if
+	     stack clash protections are enabled and the allocated frame is
+	     larger than the probe interval, then use pushes to save
+	     callee saved registers.  */
+	  || (flag_stack_clash_protection
+	      && !ix86_target_stack_probe ()
+	      && to_allocate > get_probe_interval ()));
+}
+
 /* Fill structure ix86_frame about frame of currently computed function.  */
 
 static void
@@ -6967,12 +7058,18 @@ ix86_compute_frame_layout (void)
   gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
   gcc_assert (preferred_alignment <= stack_alignment_needed);
 
-  /* The only ABI saving SSE regs should be 64-bit ms_abi.  */
-  gcc_assert (TARGET_64BIT || !frame->nsseregs);
+  /* The only ABI saving SSE regs should be 64-bit ms_abi or with
+     no_caller_saved_registers attribue.  */
+  gcc_assert (TARGET_64BIT
+	      || (cfun->machine->call_saved_registers
+		  == TYPE_NO_CALLER_SAVED_REGISTERS)
+	      || !frame->nsseregs);
   if (TARGET_64BIT && m->call_ms2sysv)
     {
       gcc_assert (stack_alignment_needed >= 16);
-      gcc_assert (!frame->nsseregs);
+      gcc_assert ((cfun->machine->call_saved_registers
+		   == TYPE_NO_CALLER_SAVED_REGISTERS)
+		  || !frame->nsseregs);
     }
 
   /* For SEH we have to limit the amount of code movement into the prologue.
@@ -7171,20 +7268,7 @@ ix86_compute_frame_layout (void)
   /* Size prologue needs to allocate.  */
   to_allocate = offset - frame->sse_reg_save_offset;
 
-  if ((!to_allocate && frame->nregs <= 1)
-      || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
-       /* If static stack checking is enabled and done with probes,
-	  the registers need to be saved before allocating the frame.  */
-      || flag_stack_check == STATIC_BUILTIN_STACK_CHECK
-      /* If stack clash probing needs a loop, then it needs a
-	 scratch register.  But the returned register is only guaranteed
-	 to be safe to use after register saves are complete.  So if
-	 stack clash protections are enabled and the allocated frame is
-	 larger than the probe interval, then use pushes to save
-	 callee saved registers.  */
-      || (flag_stack_clash_protection
-	  && !ix86_target_stack_probe ()
-	  && to_allocate > get_probe_interval ()))
+  if (save_regs_using_push_pop (to_allocate))
     frame->save_regs_using_mov = false;
 
   if (ix86_using_red_zone ()
@@ -7642,7 +7726,9 @@ ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
       {
-        ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
+	/* Skip registers, already processed by shrink wrap separate.  */
+	if (!cfun->machine->reg_is_wrapped_separately[regno])
+	  ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
 	cfa_offset -= UNITS_PER_WORD;
       }
 }
@@ -7735,8 +7821,15 @@ pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
 	add_frame_related_expr = true;
     }
 
-  insn = emit_insn (gen_pro_epilogue_adjust_stack_add
-		    (Pmode, dest, src, addend));
+  /*  Shrink wrap separate may insert prologue between TEST and JMP.  In order
+      not to affect EFlags, emit add without reg clobbering.  */
+  if (crtl->shrink_wrapped_separate)
+    insn = emit_insn (gen_pro_epilogue_adjust_stack_add_nocc
+		      (Pmode, dest, src, addend));
+  else
+    insn = emit_insn (gen_pro_epilogue_adjust_stack_add
+		      (Pmode, dest, src, addend));
+
   if (style >= 0)
     ix86_add_queued_cfa_restore_notes (insn);
 
@@ -7920,6 +8013,15 @@ ix86_update_stack_boundary (void)
   if (ix86_tls_descriptor_calls_expanded_in_cfun
       && crtl->preferred_stack_boundary < 128)
     crtl->preferred_stack_boundary = 128;
+
+  /* For 32-bit MS ABI, both the incoming and preferred stack boundaries
+     are 32 bits, but if force_align_arg_pointer is specified, it should
+     prefer 128 bits for a backward-compatibility reason, which is also
+     what the doc suggests.  */
+  if (lookup_attribute ("force_align_arg_pointer",
+			TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))
+      && crtl->preferred_stack_boundary < 128)
+    crtl->preferred_stack_boundary = 128;
 }
 
 /* Handle the TARGET_GET_DRAP_RTX hook.  Return NULL if no DRAP is
@@ -7950,8 +8052,7 @@ ix86_get_drap_rtx (void)
 
       start_sequence ();
       drap_vreg = copy_to_reg (arg_ptr);
-      seq = get_insns ();
-      end_sequence ();
+      seq = end_sequence ();
 
       insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
       if (!optimize)
@@ -8472,6 +8573,128 @@ output_probe_stack_range (rtx reg, rtx end)
   return "";
 }
 
+/* Data passed to ix86_update_stack_alignment.  */
+struct stack_access_data
+{
+  /* The stack access register.  */
+  const_rtx reg;
+  /* Pointer to stack alignment.  */
+  unsigned int *stack_alignment;
+};
+
+/* Update the maximum stack slot alignment from memory alignment in PAT.  */
+
+static void
+ix86_update_stack_alignment (rtx, const_rtx pat, void *data)
+{
+  /* This insn may reference stack slot.  Update the maximum stack slot
+     alignment if the memory is referenced by the stack access register. */
+  stack_access_data *p = (stack_access_data *) data;
+
+  subrtx_iterator::array_type array;
+  FOR_EACH_SUBRTX (iter, array, pat, ALL)
+    {
+      auto op = *iter;
+      if (MEM_P (op))
+	{
+	  if (reg_mentioned_p (p->reg, XEXP (op, 0)))
+	    {
+	      unsigned int alignment = MEM_ALIGN (op);
+
+	      if (alignment > *p->stack_alignment)
+		*p->stack_alignment = alignment;
+	      break;
+	    }
+	  else
+	    iter.skip_subrtxes ();
+	}
+    }
+}
+
+/* Helper function for ix86_find_all_reg_uses.  */
+
+static void
+ix86_find_all_reg_uses_1 (HARD_REG_SET &regset,
+			  rtx set, unsigned int regno,
+			  auto_bitmap &worklist)
+{
+  rtx dest = SET_DEST (set);
+
+  if (!REG_P (dest))
+    return;
+
+  /* Reject non-Pmode modes.  */
+  if (GET_MODE (dest) != Pmode)
+    return;
+
+  unsigned int dst_regno = REGNO (dest);
+
+  if (TEST_HARD_REG_BIT (regset, dst_regno))
+    return;
+
+  const_rtx src = SET_SRC (set);
+
+  subrtx_iterator::array_type array;
+  FOR_EACH_SUBRTX (iter, array, src, ALL)
+    {
+      auto op = *iter;
+
+      if (MEM_P (op))
+	iter.skip_subrtxes ();
+
+      if (REG_P (op) && REGNO (op) == regno)
+	{
+	  /* Add this register to register set.  */
+	  add_to_hard_reg_set (&regset, Pmode, dst_regno);
+	  bitmap_set_bit (worklist, dst_regno);
+	  break;
+	}
+    }
+}
+
+/* Find all registers defined with register REGNO.  */
+
+static void
+ix86_find_all_reg_uses (HARD_REG_SET &regset,
+			unsigned int regno, auto_bitmap &worklist)
+{
+  for (df_ref ref = DF_REG_USE_CHAIN (regno);
+       ref != NULL;
+       ref = DF_REF_NEXT_REG (ref))
+    {
+      if (DF_REF_IS_ARTIFICIAL (ref))
+	continue;
+
+      rtx_insn *insn = DF_REF_INSN (ref);
+
+      if (!NONJUMP_INSN_P (insn))
+	continue;
+
+      unsigned int ref_regno = DF_REF_REGNO (ref);
+
+      rtx set = single_set (insn);
+      if (set)
+	{
+	  ix86_find_all_reg_uses_1 (regset, set,
+				    ref_regno, worklist);
+	  continue;
+	}
+
+      rtx pat = PATTERN (insn);
+      if (GET_CODE (pat) != PARALLEL)
+	continue;
+
+      for (int i = 0; i < XVECLEN (pat, 0); i++)
+	{
+	  rtx exp = XVECEXP (pat, 0, i);
+
+	  if (GET_CODE (exp) == SET)
+	    ix86_find_all_reg_uses_1 (regset, exp,
+				      ref_regno, worklist);
+	}
+    }
+}
+
 /* Set stack_frame_required to false if stack frame isn't required.
    Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
    slot used if stack frame is required and CHECK_STACK_SLOT is true.  */
@@ -8490,10 +8713,6 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
   add_to_hard_reg_set (&set_up_by_prologue, Pmode,
 		       HARD_FRAME_POINTER_REGNUM);
 
-  /* The preferred stack alignment is the minimum stack alignment.  */
-  if (stack_alignment > crtl->preferred_stack_boundary)
-    stack_alignment = crtl->preferred_stack_boundary;
-
   bool require_stack_frame = false;
 
   FOR_EACH_BB_FN (bb, cfun)
@@ -8505,27 +8724,67 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
 				       set_up_by_prologue))
 	  {
 	    require_stack_frame = true;
-
-	    if (check_stack_slot)
-	      {
-		/* Find the maximum stack alignment.  */
-		subrtx_iterator::array_type array;
-		FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
-		  if (MEM_P (*iter)
-		      && (reg_mentioned_p (stack_pointer_rtx,
-					   *iter)
-			  || reg_mentioned_p (frame_pointer_rtx,
-					      *iter)))
-		    {
-		      unsigned int alignment = MEM_ALIGN (*iter);
-		      if (alignment > stack_alignment)
-			stack_alignment = alignment;
-		    }
-	      }
+	    break;
 	  }
     }
 
   cfun->machine->stack_frame_required = require_stack_frame;
+
+  /* Stop if we don't need to check stack slot.  */
+  if (!check_stack_slot)
+    return;
+
+  /* The preferred stack alignment is the minimum stack alignment.  */
+  if (stack_alignment > crtl->preferred_stack_boundary)
+    stack_alignment = crtl->preferred_stack_boundary;
+
+  HARD_REG_SET stack_slot_access;
+  CLEAR_HARD_REG_SET (stack_slot_access);
+
+  /* Stack slot can be accessed by stack pointer, frame pointer or
+     registers defined by stack pointer or frame pointer.  */
+  auto_bitmap worklist;
+
+  add_to_hard_reg_set (&stack_slot_access, Pmode, STACK_POINTER_REGNUM);
+  bitmap_set_bit (worklist, STACK_POINTER_REGNUM);
+
+  if (frame_pointer_needed)
+    {
+      add_to_hard_reg_set (&stack_slot_access, Pmode,
+			   HARD_FRAME_POINTER_REGNUM);
+      bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM);
+    }
+
+  unsigned int regno;
+
+  do
+    {
+      regno = bitmap_clear_first_set_bit (worklist);
+      ix86_find_all_reg_uses (stack_slot_access, regno, worklist);
+    }
+  while (!bitmap_empty_p (worklist));
+
+  hard_reg_set_iterator hrsi;
+  stack_access_data data;
+
+  data.stack_alignment = &stack_alignment;
+
+  EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, regno, hrsi)
+    for (df_ref ref = DF_REG_USE_CHAIN (regno);
+	 ref != NULL;
+	 ref = DF_REF_NEXT_REG (ref))
+      {
+	if (DF_REF_IS_ARTIFICIAL (ref))
+	  continue;
+
+	rtx_insn *insn = DF_REF_INSN (ref);
+
+	if (!NONJUMP_INSN_P (insn))
+	  continue;
+
+	data.reg = DF_REF_REG (ref);
+	note_stores (insn, ix86_update_stack_alignment, &data);
+      }
 }
 
 /* Finalize stack_realign_needed and frame_pointer_needed flags, which
@@ -9035,11 +9294,22 @@ ix86_expand_prologue (void)
 	 doing this if we have to probe the stack; at least on x86_64 the
 	 stack probe can turn into a call that clobbers a red zone location. */
       else if (ix86_using_red_zone ()
-	       && (! TARGET_STACK_PROBE
-		   || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
+		&& (! TARGET_STACK_PROBE
+		    || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
 	{
+	  HOST_WIDE_INT allocate_offset;
+	  if (crtl->shrink_wrapped_separate)
+	    {
+	      allocate_offset = m->fs.sp_offset - frame.stack_pointer_offset;
+
+	      /* Adjust the total offset at the beginning of the function.  */
+	      pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+					 GEN_INT (allocate_offset), -1,
+					 m->fs.cfa_reg == stack_pointer_rtx);
+	      m->fs.sp_offset = cfun->machine->frame.stack_pointer_offset;
+	    }
+
 	  ix86_emit_save_regs_using_mov (frame.reg_save_offset);
-	  cfun->machine->red_zone_used = true;
 	  int_registers_saved = true;
 	}
     }
@@ -9617,30 +9887,35 @@ ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
       {
-	rtx reg = gen_rtx_REG (word_mode, regno);
-	rtx mem;
-	rtx_insn *insn;
 
-	mem = choose_baseaddr (cfa_offset, NULL);
-	mem = gen_frame_mem (word_mode, mem);
-	insn = emit_move_insn (reg, mem);
-
-        if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
+	/* Skip registers, already processed by shrink wrap separate.  */
+	if (!cfun->machine->reg_is_wrapped_separately[regno])
 	  {
-	    /* Previously we'd represented the CFA as an expression
-	       like *(%ebp - 8).  We've just popped that value from
-	       the stack, which means we need to reset the CFA to
-	       the drap register.  This will remain until we restore
-	       the stack pointer.  */
-	    add_reg_note (insn, REG_CFA_DEF_CFA, reg);
-	    RTX_FRAME_RELATED_P (insn) = 1;
+	    rtx reg = gen_rtx_REG (word_mode, regno);
+	    rtx mem;
+	    rtx_insn *insn;
 
-	    /* This means that the DRAP register is valid for addressing.  */
-	    m->fs.drap_valid = true;
-	  }
-	else
-	  ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
+	    mem = choose_baseaddr (cfa_offset, NULL);
+	    mem = gen_frame_mem (word_mode, mem);
+	    insn = emit_move_insn (reg, mem);
 
+	    if (m->fs.cfa_reg == crtl->drap_reg
+		&& regno == REGNO (crtl->drap_reg))
+	      {
+		/* Previously we'd represented the CFA as an expression
+		   like *(%ebp - 8).  We've just popped that value from
+		   the stack, which means we need to reset the CFA to
+		   the drap register.  This will remain until we restore
+		   the stack pointer.  */
+		add_reg_note (insn, REG_CFA_DEF_CFA, reg);
+		RTX_FRAME_RELATED_P (insn) = 1;
+
+		/* DRAP register is valid for addressing.  */
+		m->fs.drap_valid = true;
+	      }
+	    else
+	      ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
+	  }
 	cfa_offset -= UNITS_PER_WORD;
       }
 }
@@ -9919,10 +10194,11 @@ ix86_expand_epilogue (int style)
      less work than reloading sp and popping the register.  */
   else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1)
     restore_regs_via_mov = true;
-  else if (TARGET_EPILOGUE_USING_MOVE
-	   && cfun->machine->use_fast_prologue_epilogue
-	   && (frame.nregs > 1
-	       || m->fs.sp_offset != reg_save_offset))
+  else if (crtl->shrink_wrapped_separate
+	   || (TARGET_EPILOGUE_USING_MOVE
+	       && cfun->machine->use_fast_prologue_epilogue
+	       && (frame.nregs > 1
+		   || m->fs.sp_offset != reg_save_offset)))
     restore_regs_via_mov = true;
   else if (frame_pointer_needed
 	   && !frame.nregs
@@ -9936,6 +10212,9 @@ ix86_expand_epilogue (int style)
   else
     restore_regs_via_mov = false;
 
+  if (crtl->shrink_wrapped_separate)
+    gcc_assert (restore_regs_via_mov);
+
   if (restore_regs_via_mov || frame.nsseregs)
     {
       /* Ensure that the entire register save area is addressable via
@@ -9988,6 +10267,7 @@ ix86_expand_epilogue (int style)
       gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
       gcc_assert (!crtl->drap_reg);
       gcc_assert (!frame.nregs);
+      gcc_assert (!crtl->shrink_wrapped_separate);
     }
   else if (restore_regs_via_mov)
     {
@@ -10002,6 +10282,8 @@ ix86_expand_epilogue (int style)
 	  rtx sa = EH_RETURN_STACKADJ_RTX;
 	  rtx_insn *insn;
 
+	  gcc_assert (!crtl->shrink_wrapped_separate);
+
 	  /* Stack realignment doesn't work with eh_return.  */
 	  if (crtl->stack_realign_needed)
 	    sorry ("Stack realignment not supported with "
@@ -10442,8 +10724,7 @@ split_stack_prologue_scratch_regno (void)
 
 static GTY(()) rtx split_stack_fn;
 
-/* A SYMBOL_REF for the more stack function when using the large
-   model.  */
+/* A SYMBOL_REF for the more stack function when using the large model.  */
 
 static GTY(()) rtx split_stack_fn_large;
 
@@ -11101,6 +11382,23 @@ ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
 
   return cost;
 }
+
+/* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P.  */
+
+bool
+ix86_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
+				     unsigned int align,
+				     enum by_pieces_operation op,
+				     bool speed_p)
+{
+  /* Return true when we are currently expanding memcpy/memset epilogue
+     with move_by_pieces or store_by_pieces.  */
+  if (cfun->machine->by_pieces_in_use)
+    return true;
+
+  return default_use_by_pieces_infrastructure_p (size, align, op,
+						 speed_p);
+}
 
 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
    this is used for to form addresses to local data when -fPIC is in
@@ -11131,7 +11429,7 @@ ix86_force_load_from_GOT_p (rtx x, bool call_p)
 	  && (!flag_pic || this_is_asm_operands)
 	  && ix86_cmodel != CM_LARGE
 	  && ix86_cmodel != CM_LARGE_PIC
-	  && GET_CODE (x) == SYMBOL_REF
+	  && SYMBOL_REF_P (x)
 	  && ((!call_p
 	       && (!ix86_direct_extern_access
 		   || (SYMBOL_REF_DECL (x)
@@ -11177,20 +11475,23 @@ ix86_legitimate_constant_p (machine_mode mode, rtx x)
 	  case UNSPEC_TPOFF:
 	  case UNSPEC_NTPOFF:
 	    x = XVECEXP (x, 0, 0);
-	    return (GET_CODE (x) == SYMBOL_REF
+	    return (SYMBOL_REF_P (x)
 		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
 	  case UNSPEC_DTPOFF:
 	    x = XVECEXP (x, 0, 0);
-	    return (GET_CODE (x) == SYMBOL_REF
+	    return (SYMBOL_REF_P (x)
 		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
+	  case UNSPEC_SECREL32:
+	    x = XVECEXP (x, 0, 0);
+	    return SYMBOL_REF_P (x);
 	  default:
 	    return false;
 	  }
 
       /* We must have drilled down to a symbol.  */
-      if (GET_CODE (x) == LABEL_REF)
+      if (LABEL_REF_P (x))
 	return true;
-      if (GET_CODE (x) != SYMBOL_REF)
+      if (!SYMBOL_REF_P (x))
 	return false;
       /* FALLTHRU */
 
@@ -11230,7 +11531,7 @@ ix86_legitimate_constant_p (machine_mode mode, rtx x)
 	case E_OImode:
 	case E_XImode:
 	  if (!standard_sse_constant_p (x, mode)
-	      && GET_MODE_SIZE (TARGET_AVX512F && TARGET_EVEX512
+	      && GET_MODE_SIZE (TARGET_AVX512F
 				? XImode
 				: (TARGET_AVX
 				   ? OImode
@@ -11317,8 +11618,11 @@ legitimate_pic_operand_p (rtx x)
 	    return TARGET_64BIT;
 	  case UNSPEC_TPOFF:
 	    x = XVECEXP (inner, 0, 0);
-	    return (GET_CODE (x) == SYMBOL_REF
+	    return (SYMBOL_REF_P (x)
 		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
+	  case UNSPEC_SECREL32:
+	    x = XVECEXP (inner, 0, 0);
+	    return SYMBOL_REF_P (x);
 	  case UNSPEC_MACHOPIC_OFFSET:
 	    return legitimate_pic_address_disp_p (x);
 	  default:
@@ -11369,7 +11673,7 @@ legitimate_pic_address_disp_p (rtx disp)
 	  if (INTVAL (op1) >= 16*1024*1024
 	      || INTVAL (op1) < -16*1024*1024)
 	    break;
-	  if (GET_CODE (op0) == LABEL_REF)
+	  if (LABEL_REF_P (op0))
 	    return true;
 	  if (GET_CODE (op0) == CONST
 	      && GET_CODE (XEXP (op0, 0)) == UNSPEC
@@ -11378,7 +11682,7 @@ legitimate_pic_address_disp_p (rtx disp)
 	  if (GET_CODE (op0) == UNSPEC
 	      && XINT (op0, 1) == UNSPEC_PCREL)
 	    return true;
-	  if (GET_CODE (op0) != SYMBOL_REF)
+	  if (!SYMBOL_REF_P (op0))
 	    break;
 	  /* FALLTHRU */
 
@@ -11443,8 +11747,8 @@ legitimate_pic_address_disp_p (rtx disp)
 	      && XINT (disp, 1) != UNSPEC_PLTOFF))
 	return false;
 
-      if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
-	  && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
+      if (!SYMBOL_REF_P (XVECEXP (disp, 0, 0))
+	  && !LABEL_REF_P (XVECEXP (disp, 0, 0)))
 	return false;
       return true;
     }
@@ -11472,14 +11776,14 @@ legitimate_pic_address_disp_p (rtx disp)
       /* We need to check for both symbols and labels because VxWorks loads
 	 text labels with @GOT rather than @GOTOFF.  See gotoff_operand for
 	 details.  */
-      return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
-	      || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
+      return (SYMBOL_REF_P (XVECEXP (disp, 0, 0))
+	      || LABEL_REF_P (XVECEXP (disp, 0, 0)));
     case UNSPEC_GOTOFF:
       /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
 	 While ABI specify also 32bit relocation but we don't produce it in
 	 small PIC model at all.  */
-      if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
-	   || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
+      if ((SYMBOL_REF_P (XVECEXP (disp, 0, 0))
+	   || LABEL_REF_P (XVECEXP (disp, 0, 0)))
 	  && !TARGET_64BIT)
         return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
       return false;
@@ -11489,16 +11793,19 @@ legitimate_pic_address_disp_p (rtx disp)
       if (saw_plus)
 	return false;
       disp = XVECEXP (disp, 0, 0);
-      return (GET_CODE (disp) == SYMBOL_REF
+      return (SYMBOL_REF_P (disp)
 	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
     case UNSPEC_NTPOFF:
       disp = XVECEXP (disp, 0, 0);
-      return (GET_CODE (disp) == SYMBOL_REF
+      return (SYMBOL_REF_P (disp)
 	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
     case UNSPEC_DTPOFF:
       disp = XVECEXP (disp, 0, 0);
-      return (GET_CODE (disp) == SYMBOL_REF
+      return (SYMBOL_REF_P (disp)
 	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
+    case UNSPEC_SECREL32:
+      disp = XVECEXP (disp, 0, 0);
+      return SYMBOL_REF_P (disp);
     }
 
   return false;
@@ -11776,6 +12083,7 @@ ix86_legitimate_address_p (machine_mode, rtx addr, bool strict,
 	  case UNSPEC_INDNTPOFF:
 	  case UNSPEC_NTPOFF:
 	  case UNSPEC_DTPOFF:
+	  case UNSPEC_SECREL32:
 	    break;
 
 	  default:
@@ -11801,7 +12109,8 @@ ix86_legitimate_address_p (machine_mode, rtx addr, bool strict,
 		  || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
 		  || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
 		  || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
-		      && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
+		      && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF
+		      && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_SECREL32))
 		/* Non-constant pic memory reference.  */
 		return false;
 	    }
@@ -11838,11 +12147,11 @@ ix86_legitimate_address_p (machine_mode, rtx addr, bool strict,
 	     that never results in lea, this seems to be easier and
 	     correct fix for crash to disable this test.  */
 	}
-      else if (GET_CODE (disp) != LABEL_REF
+      else if (!LABEL_REF_P (disp)
 	       && !CONST_INT_P (disp)
 	       && (GET_CODE (disp) != CONST
 		   || !ix86_legitimate_constant_p (Pmode, disp))
-	       && (GET_CODE (disp) != SYMBOL_REF
+	       && (!SYMBOL_REF_P (disp)
 		   || !ix86_legitimate_constant_p (Pmode, disp)))
 	/* Displacement is not constant.  */
 	return false;
@@ -11949,10 +12258,10 @@ legitimize_pic_address (rtx orig, rtx reg)
       else
 	new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
     }
-  else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
+  else if ((SYMBOL_REF_P (addr) && SYMBOL_REF_TLS_MODEL (addr) == 0)
 	   /* We can't always use @GOTOFF for text labels
 	      on VxWorks, see gotoff_operand.  */
-	   || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
+	   || (TARGET_VXWORKS_VAROFF && LABEL_REF_P (addr)))
     {
 #if TARGET_PECOFF
       rtx tmp = legitimize_pe_coff_symbol (addr, true);
@@ -12087,8 +12396,8 @@ legitimize_pic_address (rtx orig, rtx reg)
 		  /* For %rip addressing, we have to use
 		     just disp32, not base nor index.  */
 		  if (TARGET_64BIT
-		      && (GET_CODE (base) == SYMBOL_REF
-			  || GET_CODE (base) == LABEL_REF))
+		      && (SYMBOL_REF_P (base)
+			  || LABEL_REF_P (base)))
 		    base = force_reg (mode, base);
 		  if (GET_CODE (new_rtx) == PLUS
 		      && CONSTANT_P (XEXP (new_rtx, 1)))
@@ -12125,13 +12434,53 @@ get_thread_pointer (machine_mode tp_mode, bool to_reg)
   return tp;
 }
 
+/* Construct the SYMBOL_REF for the _tls_index symbol.  */
+
+static GTY(()) rtx ix86_tls_index_symbol;
+
+#if TARGET_WIN32_TLS
+static rtx
+ix86_tls_index (void)
+{
+  if (!ix86_tls_index_symbol)
+    ix86_tls_index_symbol = gen_rtx_SYMBOL_REF (SImode, "_tls_index");
+
+  if (flag_pic)
+    return gen_rtx_CONST (Pmode, gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_index_symbol), UNSPEC_PCREL));
+  else
+    return ix86_tls_index_symbol;
+}
+#endif
+
 /* Construct the SYMBOL_REF for the tls_get_addr function.  */
 
 static GTY(()) rtx ix86_tls_symbol;
 
-static rtx
+rtx
 ix86_tls_get_addr (void)
 {
+  if (cfun->machine->call_saved_registers
+      == TYPE_NO_CALLER_SAVED_REGISTERS)
+    {
+      /* __tls_get_addr doesn't preserve vector registers.  When a
+	 function with no_caller_saved_registers attribute calls
+	 __tls_get_addr, YMM and ZMM registers will be clobbered.
+	 Issue an error and suggest -mtls-dialect=gnu2 in this case.  */
+      if (cfun->machine->func_type == TYPE_NORMAL)
+	error (G_("%<-mtls-dialect=gnu2%> must be used with a function"
+		  " with the %<no_caller_saved_registers%> attribute"));
+      else
+	error (cfun->machine->func_type == TYPE_EXCEPTION
+	       ? G_("%<-mtls-dialect=gnu2%> must be used with an"
+		    " exception service routine")
+	       : G_("%<-mtls-dialect=gnu2%> must be used with an"
+		    " interrupt service routine"));
+      /* Don't issue the same error twice.  */
+      cfun->machine->func_type = TYPE_NORMAL;
+      cfun->machine->call_saved_registers
+	= TYPE_DEFAULT_CALL_SAVED_REGISTERS;
+    }
+
   if (!ix86_tls_symbol)
     {
       const char *sym
@@ -12183,6 +12532,26 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
   machine_mode tp_mode = Pmode;
   int type;
 
+#if TARGET_WIN32_TLS
+  off = gen_const_mem (SImode, ix86_tls_index ());
+  set_mem_alias_set (off, GOT_ALIAS_SET);
+
+  tp = gen_const_mem (Pmode, GEN_INT (TARGET_64BIT ? 88 : 44));
+  set_mem_addr_space (tp, DEFAULT_TLS_SEG_REG);
+
+  if (TARGET_64BIT)
+    off = convert_to_mode (Pmode, off, 1);
+
+  base = force_reg (Pmode, off);
+  tp = copy_to_mode_reg (Pmode, tp);
+
+  tp = gen_const_mem (Pmode, gen_rtx_PLUS (Pmode, tp, gen_rtx_MULT (Pmode, base, GEN_INT (UNITS_PER_WORD))));
+  set_mem_alias_set (tp, GOT_ALIAS_SET);
+
+  base = force_reg (Pmode, tp);
+
+  return gen_rtx_PLUS (Pmode, base, gen_rtx_CONST (Pmode, gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_SECREL32)));
+#else
   /* Fall back to global dynamic model if tool chain cannot support local
      dynamic.  */
   if (TARGET_SUN_TLS && !TARGET_64BIT
@@ -12231,13 +12600,13 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
 	  if (TARGET_64BIT)
 	    {
 	      rtx rax = gen_rtx_REG (Pmode, AX_REG);
+	      rtx rdi = gen_rtx_REG (Pmode, DI_REG);
 	      rtx_insn *insns;
 
 	      start_sequence ();
 	      emit_call_insn
-		(gen_tls_global_dynamic_64 (Pmode, rax, x, caddr));
-	      insns = get_insns ();
-	      end_sequence ();
+		(gen_tls_global_dynamic_64 (Pmode, rax, x, caddr, rdi));
+	      insns = end_sequence ();
 
 	      if (GET_MODE (x) != Pmode)
 		x = gen_rtx_ZERO_EXTEND (Pmode, x);
@@ -12285,14 +12654,14 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
 	  if (TARGET_64BIT)
 	    {
 	      rtx rax = gen_rtx_REG (Pmode, AX_REG);
+	      rtx rdi = gen_rtx_REG (Pmode, DI_REG);
 	      rtx_insn *insns;
 	      rtx eqv;
 
 	      start_sequence ();
 	      emit_call_insn
-		(gen_tls_local_dynamic_base_64 (Pmode, rax, caddr));
-	      insns = get_insns ();
-	      end_sequence ();
+		(gen_tls_local_dynamic_base_64 (Pmode, rax, caddr, rdi));
+	      insns = end_sequence ();
 
 	      /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
 		 share the LD_BASE result with other LD model accesses.  */
@@ -12405,6 +12774,7 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
     }
 
   return dest;
+#endif
 }
 
 /* Return true if the TLS address requires insn using integer registers.
@@ -12551,12 +12921,12 @@ ix86_legitimize_address (rtx x, rtx, machine_mode mode)
   bool changed = false;
   unsigned log;
 
-  log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
+  log = SYMBOL_REF_P (x) ? SYMBOL_REF_TLS_MODEL (x) : 0;
   if (log)
     return legitimize_tls_address (x, (enum tls_model) log, false);
   if (GET_CODE (x) == CONST
       && GET_CODE (XEXP (x, 0)) == PLUS
-      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
+      && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
       && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
     {
       rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
@@ -12874,6 +13244,9 @@ output_pic_addr_const (FILE *file, rtx x, int code)
 	case UNSPEC_INDNTPOFF:
 	  fputs ("@indntpoff", file);
 	  break;
+	case UNSPEC_SECREL32:
+	  fputs ("@secrel32", file);
+	  break;
 #if TARGET_MACHO
 	case UNSPEC_MACHOPIC_OFFSET:
 	  putc ('-', file);
@@ -12899,7 +13272,11 @@ i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
 {
   fputs (ASM_LONG, file);
   output_addr_const (file, x);
+#if TARGET_WIN32_TLS
+  fputs ("@secrel32", file);
+#else
   fputs ("@dtpoff", file);
+#endif
   switch (size)
     {
     case 4:
@@ -12966,7 +13343,7 @@ ix86_delegitimize_tls_address (rtx orig_x)
   if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
     return orig_x;
   x = XVECEXP (unspec, 0, 0);
-  gcc_assert (GET_CODE (x) == SYMBOL_REF);
+  gcc_assert (SYMBOL_REF_P (x));
   if (unspec != XEXP (addr.disp, 0))
     x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
   if (addr.index)
@@ -13133,7 +13510,7 @@ ix86_delegitimize_address_1 (rtx x, bool base_term_p)
       else if (base_term_p
 	       && pic_offset_table_rtx
 	       && !TARGET_MACHO
-	       && !TARGET_VXWORKS_RTP)
+	       && !TARGET_VXWORKS_VAROFF)
 	{
 	  rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
 	  tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
@@ -13558,10 +13935,11 @@ print_reg (rtx x, int code, FILE *file)
    H -- print a memory address offset by 8; used for sse high-parts
    Y -- print condition for XOP pcom* instruction.
    V -- print naked full integer register name without %.
+   v -- print segment override prefix
    + -- print a branch hint as 'cs' or 'ds' prefix
    ; -- print a semicolon (after prefixes due to bug in older gas).
    ~ -- print "i" if TARGET_AVX2, "f" otherwise.
-   ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
+   ^ -- print addr32 prefix if Pmode != word_mode
    M -- print addr32 prefix for TARGET_X32 with VSIB address.
    ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
    N -- print maskz if it's constant 0 operand.
@@ -14063,6 +14441,28 @@ ix86_print_operand (FILE *file, rtx x, int code)
 
 	  return;
 
+	case 'v':
+	  if (MEM_P (x))
+	    {
+	      switch (MEM_ADDR_SPACE (x))
+		{
+		case ADDR_SPACE_GENERIC:
+		  break;
+		case ADDR_SPACE_SEG_FS:
+		  fputs ("fs ", file);
+		  break;
+		case ADDR_SPACE_SEG_GS:
+		  fputs ("gs ", file);
+		  break;
+		default:
+		  gcc_unreachable ();
+		}
+	    }
+	  else
+	    output_operand_lossage ("operand is not a memory reference, "
+				    "invalid operand code 'v'");
+	  return;
+
 	case '*':
 	  if (ASSEMBLER_DIALECT == ASM_ATT)
 	    putc ('*', file);
@@ -14137,7 +14537,7 @@ ix86_print_operand (FILE *file, rtx x, int code)
 	  return;
 
 	case '^':
-	  if (TARGET_64BIT && Pmode != word_mode)
+	  if (Pmode != word_mode)
 	    fputs ("addr32 ", file);
 	  return;
 
@@ -14307,7 +14707,7 @@ ix86_print_operand (FILE *file, rtx x, int code)
       /* We have patterns that allow zero sets of memory, for instance.
 	 In 64-bit mode, we should probably support all 8-byte vectors,
 	 since we can in fact encode that into an immediate.  */
-      if (GET_CODE (x) == CONST_VECTOR)
+      if (CONST_VECTOR_P (x))
 	{
 	  if (x != CONST0_RTX (GET_MODE (x)))
 	    output_operand_lossage ("invalid vector immediate");
@@ -14337,8 +14737,8 @@ ix86_print_operand (FILE *file, rtx x, int code)
 	      if (ASSEMBLER_DIALECT == ASM_ATT)
 		putc ('$', file);
 	    }
-	  else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
-		   || GET_CODE (x) == LABEL_REF)
+	  else if (GET_CODE (x) == CONST || SYMBOL_REF_P (x)
+		   || LABEL_REF_P (x))
 	    {
 	      if (ASSEMBLER_DIALECT == ASM_ATT)
 		putc ('$', file);
@@ -14433,8 +14833,8 @@ ix86_print_operand_address_as (FILE *file, rtx addr,
 	  && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
 	symbol = XEXP (XEXP (disp, 0), 0);
 
-      if (GET_CODE (symbol) == LABEL_REF
-	  || (GET_CODE (symbol) == SYMBOL_REF
+      if (LABEL_REF_P (symbol)
+	  || (SYMBOL_REF_P (symbol)
 	      && SYMBOL_REF_TLS_MODEL (symbol) == 0))
 	base = pc_rtx;
     }
@@ -14522,7 +14922,7 @@ ix86_print_operand_address_as (FILE *file, rtx addr,
 	    {
 	      if (flag_pic)
 		output_pic_addr_const (file, disp, 0);
-	      else if (GET_CODE (disp) == LABEL_REF)
+	      else if (LABEL_REF_P (disp))
 		output_asm_label (disp);
 	      else
 		output_addr_const (file, disp);
@@ -14558,7 +14958,7 @@ ix86_print_operand_address_as (FILE *file, rtx addr,
 
 	      if (flag_pic)
 		output_pic_addr_const (file, disp, 0);
-	      else if (GET_CODE (disp) == LABEL_REF)
+	      else if (LABEL_REF_P (disp))
 		output_asm_label (disp);
 	      else if (CONST_INT_P (disp))
 		offset = disp;
@@ -14652,6 +15052,10 @@ i386_asm_output_addr_const_extra (FILE *file, rtx x)
       output_addr_const (file, op);
       fputs ("@indntpoff", file);
       break;
+    case UNSPEC_SECREL32:
+      output_addr_const (file, op);
+      fputs ("@secrel32", file);
+      break;
 #if TARGET_MACHO
     case UNSPEC_MACHOPIC_OFFSET:
       output_addr_const (file, op);
@@ -15506,7 +15910,7 @@ ix86_output_addr_diff_elt (FILE *file, int value, int rel)
   gcc_assert (!TARGET_64BIT);
 #endif
   /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand.  */
-  if (TARGET_64BIT || TARGET_VXWORKS_RTP)
+  if (TARGET_64BIT || TARGET_VXWORKS_VAROFF)
     fprintf (file, "%s%s%d-%s%d\n",
 	     directive, LPREFIX, value, LPREFIX, rel);
 #if TARGET_MACHO
@@ -16338,6 +16742,10 @@ ix86_convert_const_vector_to_integer (rtx op, machine_mode mode)
 	  val = wi::insert (val, wv, innermode_bits * i, innermode_bits);
 	}
       break;
+    case E_V1SImode:
+    case E_V1DImode:
+      op = CONST_VECTOR_ELT (op, 0);
+      return INTVAL (op);
     case E_V2HFmode:
     case E_V2BFmode:
     case E_V4HFmode:
@@ -17313,8 +17721,8 @@ ix86_rip_relative_addr_p (struct ix86_address *parts)
 	      && CONST_INT_P (XEXP (symbol, 1)))
 	    symbol = XEXP (symbol, 0);
 
-	  if (GET_CODE (symbol) == LABEL_REF
-	      || (GET_CODE (symbol) == SYMBOL_REF
+	  if (LABEL_REF_P (symbol)
+	      || (SYMBOL_REF_P (symbol)
 		  && SYMBOL_REF_TLS_MODEL (symbol) == 0)
 	      || (GET_CODE (symbol) == UNSPEC
 		  && (XINT (symbol, 1) == UNSPEC_GOTPCREL
@@ -17904,9 +18312,14 @@ ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type)
   if (cum->decl && !TREE_PUBLIC (cum->decl))
     return;
 
-  const_tree ctx = get_ultimate_context (cum->decl);
-  if (ctx != NULL_TREE
-      && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
+  tree decl = cum->decl;
+  if (!decl)
+    /* If we don't know the target, look at the current TU.  */
+    decl = current_function_decl;
+
+  const_tree ctx = get_ultimate_context (decl);
+  if (ctx == NULL_TREE
+      || !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
     return;
 
   /* If the actual size of the type is zero, then there is no change
@@ -19633,7 +20046,7 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 	tree utype, ures, vce;
 	utype = unsigned_type_for (TREE_TYPE (arg0));
 	/* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR
-	   instead of ABS_EXPR to hanlde overflow case(TYPE_MIN).  */
+	   instead of ABS_EXPR to handle overflow case(TYPE_MIN).  */
 	ures = gimple_build (&stmts, ABSU_EXPR, utype, arg0);
 	gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
 	loc = gimple_location (stmt);
@@ -20043,14 +20456,10 @@ ix86_vectorize_builtin_scatter (const_tree vectype,
 {
   bool si;
   enum ix86_builtins code;
-  const machine_mode mode = TYPE_MODE (TREE_TYPE (vectype));
 
   if (!TARGET_AVX512F)
     return NULL_TREE;
 
-  if (!TARGET_EVEX512 && GET_MODE_SIZE (mode) == 64)
-    return NULL_TREE;
-
   if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 2u)
       ? !TARGET_USE_SCATTER_2PARTS
       : (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u)
@@ -20793,7 +21202,11 @@ ix86_can_change_mode_class (machine_mode from, machine_mode to,
     return true;
 
   /* x87 registers can't do subreg at all, as all values are reformatted
-     to extended precision.  */
+     to extended precision.
+
+     ??? middle-end queries mode changes for ALL_REGS and this makes
+     vec_series_lowpart_p to always return false.  We probably should
+     restrict this to modes supported by i387 and check if it is enabled.  */
   if (MAYBE_FLOAT_CLASS_P (regclass))
     return false;
 
@@ -21117,8 +21530,7 @@ ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
   /* Register pair for mask registers.  */
   if (mode == P2QImode || mode == P2HImode)
     return 2;
-  if (mode == V64SFmode || mode == V64SImode)
-    return 4;
+
   return 1;
 }
 
@@ -21168,7 +21580,7 @@ ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
 	  - any of 512-bit wide vector mode
 	  - any scalar mode.  */
       if (TARGET_AVX512F
-	  && ((VALID_AVX512F_REG_OR_XI_MODE (mode) && TARGET_EVEX512)
+	  && ((VALID_AVX512F_REG_OR_XI_MODE (mode))
 	      || VALID_AVX512F_SCALAR_MODE (mode)))
 	return true;
 
@@ -21339,19 +21751,20 @@ ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
     return mode1 == SFmode;
 
   /* If MODE2 is only appropriate for an SSE register, then tie with
-     any other mode acceptable to SSE registers.  */
-  if (GET_MODE_SIZE (mode2) == 64
-      && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
-    return (GET_MODE_SIZE (mode1) == 64
-	    && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
-  if (GET_MODE_SIZE (mode2) == 32
-      && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
-    return (GET_MODE_SIZE (mode1) == 32
-	    && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
-  if (GET_MODE_SIZE (mode2) == 16
+     any vector modes or scalar floating point modes acceptable to SSE
+     registers, excluding scalar integer modes with SUBREG:
+	(subreg:QI (reg:TI 99) 0))
+	(subreg:HI (reg:TI 99) 0))
+	(subreg:SI (reg:TI 99) 0))
+	(subreg:DI (reg:TI 99) 0))
+     to avoid unnecessary move from SSE register to integer register.
+   */
+  if (GET_MODE_SIZE (mode2) >= 16
+      && (GET_MODE_SIZE (mode1) == GET_MODE_SIZE (mode2)
+	  || ((VECTOR_MODE_P (mode1) || SCALAR_FLOAT_MODE_P (mode1))
+	      && GET_MODE_SIZE (mode1) <= GET_MODE_SIZE (mode2)))
       && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
-    return (GET_MODE_SIZE (mode1) == 16
-	    && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
+    return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
 
   /* If MODE2 is appropriate for an MMX register, then tie
      with any other mode acceptable to MMX registers.  */
@@ -21409,7 +21822,7 @@ ix86_set_reg_reg_cost (machine_mode mode)
 
     case MODE_VECTOR_INT:
     case MODE_VECTOR_FLOAT:
-      if ((TARGET_AVX512F && TARGET_EVEX512 && VALID_AVX512F_REG_MODE (mode))
+      if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
 	  || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
 	  || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
 	  || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
@@ -21470,7 +21883,7 @@ ix86_widen_mult_cost (const struct processor_costs *cost,
       /* pmuludq under sse2, pmuldq under sse4.1, for sign_extend,
 	 require extra 4 mul, 4 add, 4 cmp and 2 shift.  */
       if (!TARGET_SSE4_1 && !uns_p)
-	extra_cost = (cost->mulss + cost->addss + cost->sse_op) * 4
+	extra_cost = (cost->mulss + cost->sse_op + cost->sse_op) * 4
 		      + cost->sse_op * 2;
       /* Fallthru.  */
     case V4DImode:
@@ -21520,11 +21933,11 @@ ix86_multiplication_cost (const struct processor_costs *cost,
 	  else if (TARGET_AVX2)
 	    nops += 2;
 	  else if (TARGET_XOP)
-	    extra += cost->sse_load[2];
+	    extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
 	  else
 	    {
 	      nops += 1;
-	      extra += cost->sse_load[2];
+	      extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
 	    }
 	  goto do_qimode;
 
@@ -21543,13 +21956,13 @@ ix86_multiplication_cost (const struct processor_costs *cost,
 	    {
 	      nmults += 1;
 	      nops += 2;
-	      extra += cost->sse_load[2];
+	      extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
 	    }
 	  else
 	    {
 	      nmults += 1;
 	      nops += 4;
-	      extra += cost->sse_load[2];
+	      extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
 	    }
 	  goto do_qimode;
 
@@ -21562,14 +21975,16 @@ ix86_multiplication_cost (const struct processor_costs *cost,
 	    {
 	      nmults += 1;
 	      nops += 4;
-	      extra += cost->sse_load[3] * 2;
+	      /* 2 loads, so no division by 2.  */
+	      extra += COSTS_N_INSNS (cost->sse_load[3]);
 	    }
 	  goto do_qimode;
 
 	case V64QImode:
 	  nmults = 2;
 	  nops = 9;
-	  extra = cost->sse_load[3] * 2 + cost->sse_load[4] * 2;
+	  /* 2 loads of each size, so no division by 2.  */
+	  extra = COSTS_N_INSNS (cost->sse_load[3] + cost->sse_load[4]);
 
 	do_qimode:
 	  return ix86_vec_cost (mode, cost->mulss * nmults
@@ -21662,7 +22077,7 @@ ix86_shift_rotate_cost (const struct processor_costs *cost,
 	    /* Use vpbroadcast.  */
 	    extra = cost->sse_op;
 	  else
-	    extra = cost->sse_load[2];
+	    extra = COSTS_N_INSNS (cost->sse_load[2]) / 2;
 
 	  if (constant_op1)
 	    {
@@ -21693,7 +22108,7 @@ ix86_shift_rotate_cost (const struct processor_costs *cost,
 		 shift with one insn set the cost to prefer paddb.  */
 	      if (constant_op1)
 		{
-		  extra = cost->sse_load[2];
+		  extra = COSTS_N_INSNS (cost->sse_load[2]) / 2;
 		  return ix86_vec_cost (mode, cost->sse_op) + extra;
 		}
 	      else
@@ -21704,11 +22119,22 @@ ix86_shift_rotate_cost (const struct processor_costs *cost,
 	    }
 	  /* FALLTHRU */
 	case V32QImode:
+	  if (TARGET_GFNI && constant_op1)
+	    {
+	      /* Use vgf2p8affine.  One extra load for the mask, but in a loop
+		 with enough registers it will be moved out.  So for now don't
+		 account the constant mask load.  This is not quite right
+		 for non loop vectorization.  */
+	      extra = 0;
+	      return ix86_vec_cost (mode, cost->sse_op) + extra;
+	    }
 	  if (TARGET_AVX2)
 	    /* Use vpbroadcast.  */
 	    extra = cost->sse_op;
 	  else
-	    extra = (mode == V16QImode) ? cost->sse_load[2] : cost->sse_load[3];
+	    extra = COSTS_N_INSNS (mode == V16QImode
+				   ? cost->sse_load[2]
+				   : cost->sse_load[3]) / 2;
 
 	  if (constant_op1)
 	    {
@@ -21736,6 +22162,11 @@ ix86_shift_rotate_cost (const struct processor_costs *cost,
 	    count = 9;
 	  return ix86_vec_cost (mode, cost->sse_op * count) + extra;
 
+	case V64QImode:
+	  /* Ignore the mask load for GF2P8AFFINEQB.  */
+	  extra = 0;
+	  return ix86_vec_cost (mode, cost->sse_op) + extra;
+
 	case V2DImode:
 	case V4DImode:
 	  /* V*DImode arithmetic right shift is emulated.  */
@@ -21816,6 +22247,34 @@ ix86_insn_cost (rtx_insn *insn, bool speed)
   return insn_cost + pattern_cost (PATTERN (insn), speed);
 }
 
+/* Return cost of SSE/AVX FP->FP conversion (extensions and truncates).  */
+
+static int
+vec_fp_conversion_cost (const struct processor_costs *cost, int size)
+{
+  if (size < 128)
+    return cost->cvtss2sd;
+  else if (size < 256)
+    {
+      if (TARGET_SSE_SPLIT_REGS)
+	return cost->cvtss2sd * size / 64;
+      return cost->cvtss2sd;
+    }
+  if (size < 512)
+    return cost->vcvtps2pd256;
+  else
+    return cost->vcvtps2pd512;
+}
+
+/* Return true of X is UNSPEC with UNSPEC_PCMP or UNSPEC_UNSIGNED_PCMP.  */
+
+static bool
+unspec_pcmp_p (rtx x)
+{
+  return GET_CODE (x) == UNSPEC
+	 && (XINT (x, 1) == UNSPEC_PCMP || XINT (x, 1) == UNSPEC_UNSIGNED_PCMP);
+}
+
 /* Compute a (partial) cost for rtx X.  Return true if the complete
    cost has been computed, and false if subexpressions should be
    scanned.  In either case, *TOTAL contains the cost result.  */
@@ -21833,9 +22292,9 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
 
   /* Handling different vternlog variants.  */
   if ((GET_MODE_SIZE (mode) == 64
-       ? (TARGET_AVX512F && TARGET_EVEX512)
+       ? TARGET_AVX512F
        : (TARGET_AVX512VL
-	  || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)))
+	  || (TARGET_AVX512F && !TARGET_PREFER_AVX256)))
       && GET_MODE_SIZE (mode) >= 16
       && outer_code_i == SET
       && ternlog_operand (x, mode))
@@ -22184,8 +22643,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
 	{
 	  /* (ior (not ...) ...) can be a single insn in AVX512.  */
 	  if (GET_CODE (XEXP (x, 0)) == NOT && TARGET_AVX512F
-	      && ((TARGET_EVEX512
-		   && GET_MODE_SIZE (mode) == 64)
+	      && (GET_MODE_SIZE (mode) == 64
 		  || (TARGET_AVX512VL
 		      && (GET_MODE_SIZE (mode) == 32
 			  || GET_MODE_SIZE (mode) == 16))))
@@ -22276,8 +22734,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
 
 	      /* (and (not ...) (not ...)) can be a single insn in AVX512.  */
 	      if (GET_CODE (right) == NOT && TARGET_AVX512F
-		  && ((TARGET_EVEX512
-		       && GET_MODE_SIZE (mode) == 64)
+		  && (GET_MODE_SIZE (mode) == 64
 		      || (TARGET_AVX512VL
 			  && (GET_MODE_SIZE (mode) == 32
 			      || GET_MODE_SIZE (mode) == 16))))
@@ -22347,8 +22804,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
 	{
 	  /* (not (xor ...)) can be a single insn in AVX512.  */
 	  if (GET_CODE (XEXP (x, 0)) == XOR && TARGET_AVX512F
-	      && ((TARGET_EVEX512
-		   && GET_MODE_SIZE (mode) == 64)
+	      && (GET_MODE_SIZE (mode) == 64
 		  || (TARGET_AVX512VL
 		      && (GET_MODE_SIZE (mode) == 32
 			  || GET_MODE_SIZE (mode) == 16))))
@@ -22479,17 +22935,39 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
       return false;
 
     case FLOAT_EXTEND:
+      /* x87 represents all values extended to 80bit.  */
       if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
 	*total = 0;
       else
-        *total = ix86_vec_cost (mode, cost->addss);
+	*total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
       return false;
 
     case FLOAT_TRUNCATE:
       if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
 	*total = cost->fadd;
       else
-        *total = ix86_vec_cost (mode, cost->addss);
+	*total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
+      return false;
+    case FLOAT:
+    case UNSIGNED_FLOAT:
+      if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+	/* TODO: We do not have cost tables for x87.  */
+	*total = cost->fadd;
+      else if (VECTOR_MODE_P (mode))
+	*total = ix86_vec_cost (mode, cost->cvtpi2ps);
+      else
+	*total = cost->cvtsi2ss;
+      return false;
+
+    case FIX:
+    case UNSIGNED_FIX:
+      if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+	/* TODO: We do not have cost tables for x87.  */
+	*total = cost->fadd;
+      else if (VECTOR_MODE_P (mode))
+	*total = ix86_vec_cost (mode, cost->cvtps2pi);
+      else
+	*total = cost->cvtss2si;
       return false;
 
     case ABS:
@@ -22550,13 +23028,41 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
 	}
       return false;
 
-    case VEC_SELECT:
     case VEC_CONCAT:
       /* ??? Assume all of these vector manipulation patterns are
 	 recognizable.  In which case they all pretty much have the
-	 same cost.  */
+	 same cost.
+	 ??? We should still recruse when computing cost.  */
      *total = cost->sse_op;
      return true;
+
+    case VEC_SELECT:
+     /* Special case extracting lower part from the vector.
+	This by itself needs to code and most of SSE/AVX instructions have
+	packed and single forms where the single form may be represented
+	by such VEC_SELECT.
+
+	Use cost 1 (despite the fact that functionally equivalent SUBREG has
+	cost 0).  Making VEC_SELECT completely free, for example instructs CSE
+	to forward propagate VEC_SELECT into
+
+	   (set (reg eax) (reg src))
+
+	which then prevents fwprop and combining. See i.e.
+	gcc.target/i386/pr91103-1.c.
+
+	??? rtvec_series_p test should be, for valid patterns, equivalent to
+	vec_series_lowpart_p but is not, since the latter calls
+	can_cange_mode_class on ALL_REGS and this return false since x87 does
+	not support subregs at all.  */
+     if (rtvec_series_p (XVEC (XEXP (x, 1), 0), 0))
+       *total = rtx_cost (XEXP (x, 0), GET_MODE (XEXP (x, 0)),
+			  outer_code, opno, speed) + 1;
+     else
+       /* ??? We should still recruse when computing cost.  */
+       *total = cost->sse_op;
+     return true;
+
     case VEC_DUPLICATE:
       *total = rtx_cost (XEXP (x, 0),
 			 GET_MODE (XEXP (x, 0)),
@@ -22569,13 +23075,87 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
 
     case VEC_MERGE:
       mask = XEXP (x, 2);
+      /* Scalar versions of SSE instructions may be represented as:
+
+	 (vec_merge (vec_duplicate (operation ....))
+		     (register or memory)
+		     (const_int 1))
+
+	 In this case vec_merge and vec_duplicate is for free.
+	 Just recurse into operation and second operand.  */
+      if (mask == const1_rtx
+	  && GET_CODE (XEXP (x, 0)) == VEC_DUPLICATE)
+	{
+	  *total = rtx_cost (XEXP (XEXP (x, 0), 0), mode,
+			     outer_code, opno, speed)
+		   + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
+	  return true;
+	}
       /* This is masked instruction, assume the same cost,
 	 as nonmasked variant.  */
-      if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
-	*total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
+      else if (TARGET_AVX512F
+	       && (register_operand (mask, GET_MODE (mask))
+		   /* Redunduant clean up of high bits for kmask with VL=2/4
+		      .i.e (vec_merge op0, op1, (and op3 15)).  */
+		   || (GET_CODE (mask) == AND
+		       && register_operand (XEXP (mask, 0), GET_MODE (mask))
+		       && CONST_INT_P (XEXP (mask, 1))
+		       && ((INTVAL (XEXP (mask, 1)) == 3
+			    && GET_MODE_NUNITS (mode) == 2)
+			   || (INTVAL (XEXP (mask, 1)) == 15
+			       && GET_MODE_NUNITS (mode) == 4)))))
+	{
+	  *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
+		   + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
+	  return true;
+	}
+      /* Combination of the two above:
+
+	 (vec_merge (vec_merge (vec_duplicate (operation ...))
+		       (register or memory)
+		       (reg:QI mask))
+		    (register or memory)
+		    (const_int 1))
+
+	 i.e. avx512fp16_vcvtss2sh_mask.  */
+      else if (TARGET_AVX512F
+	       && mask == const1_rtx
+	       && GET_CODE (XEXP (x, 0)) == VEC_MERGE
+	       && GET_CODE (XEXP (XEXP (x, 0), 0)) == VEC_DUPLICATE
+	       && register_operand (XEXP (XEXP (x, 0), 2),
+				    GET_MODE (XEXP (XEXP (x, 0), 2))))
+	{
+	  *total = rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
+			     mode, outer_code, opno, speed)
+		   + rtx_cost (XEXP (XEXP (x, 0), 1),
+			       mode, outer_code, opno, speed)
+		   + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
+	  return true;
+	}
+      /* vcmp.  */
+      else if (unspec_pcmp_p (mask)
+	       || (GET_CODE (mask) == NOT
+		   && unspec_pcmp_p (XEXP (mask, 0))))
+	{
+	  rtx uns = GET_CODE (mask) == NOT ? XEXP (mask, 0) : mask;
+	  rtx unsop0 = XVECEXP (uns, 0, 0);
+	  /* Make (subreg:V4SI (not:V16QI (reg:V16QI ..)) 0)
+	     cost the same as register.
+	     This is used by avx_cmp<mode>3_ltint_not.  */
+	  if (SUBREG_P (unsop0))
+	    unsop0 = XEXP (unsop0, 0);
+	  if (GET_CODE (unsop0) == NOT)
+	    unsop0 = XEXP (unsop0, 0);
+	  *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
+		   + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
+		   + rtx_cost (unsop0, mode, UNSPEC, opno, speed)
+		   + rtx_cost (XVECEXP (uns, 0, 1), mode, UNSPEC, opno, speed)
+		   + cost->sse_op;
+	  return true;
+	}
       else
 	*total = cost->sse_op;
-      return true;
+      return false;
 
     case MEM:
       /* CONST_VECTOR_DUPLICATE_P in constant_pool is just broadcast.
@@ -22592,7 +23172,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
 	}
 
       /* An insn that accesses memory is slightly more expensive
-         than one that does not.  */
+	 than one that does not.  */
       if (speed)
 	{
 	  *total += 1;
@@ -22604,7 +23184,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
 	     So current solution is make constant disp as cheap as possible.  */
 	  if (GET_CODE (addr) == PLUS
 	      && x86_64_immediate_operand (XEXP (addr, 1), Pmode)
-	      /* Only hanlde (reg + disp) since other forms of addr are mostly LEA,
+	      /* Only handle (reg + disp) since other forms of addr are mostly LEA,
 		 there's no additional cost for the plus of disp.  */
 	      && register_operand (XEXP (addr, 0), Pmode))
 	    {
@@ -22833,7 +23413,9 @@ x86_this_parameter (tree function)
     {
       const int *parm_regs;
 
-      if (ix86_function_type_abi (type) == MS_ABI)
+      if (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (type)))
+	parm_regs = x86_64_preserve_none_int_parameter_registers;
+      else if (ix86_function_type_abi (type) == MS_ABI)
         parm_regs = x86_64_ms_abi_int_parameter_registers;
       else
         parm_regs = x86_64_int_parameter_registers;
@@ -23159,19 +23741,21 @@ x86_field_alignment (tree type, int computed)
 /* Print call to TARGET to FILE.  */
 
 static void
-x86_print_call_or_nop (FILE *file, const char *target)
+x86_print_call_or_nop (FILE *file, const char *target,
+		       const char *label)
 {
   if (flag_nop_mcount || !strcmp (target, "nop"))
     /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
-    fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
+    fprintf (file, "%s" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n",
+	     label);
   else if (!TARGET_PECOFF && flag_pic)
     {
       gcc_assert (flag_plt);
 
-      fprintf (file, "1:\tcall\t%s@PLT\n", target);
+      fprintf (file, "%s\tcall\t%s@PLT\n", label, target);
     }
   else
-    fprintf (file, "1:\tcall\t%s\n", target);
+    fprintf (file, "%s\tcall\t%s\n", label, target);
 }
 
 static bool
@@ -23256,6 +23840,13 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
 
   const char *mcount_name = MCOUNT_NAME;
 
+  bool fentry_section_p
+    = (flag_record_mcount
+       || lookup_attribute ("fentry_section",
+			    DECL_ATTRIBUTES (current_function_decl)));
+
+  const char *label = fentry_section_p ? "1:" : "";
+
   if (current_fentry_name (&mcount_name))
     ;
   else if (fentry_name)
@@ -23291,11 +23882,12 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
 		  reg = legacy_reg;
 		}
 	      if (ASSEMBLER_DIALECT == ASM_INTEL)
-		fprintf (file, "1:\tmovabs\t%s, OFFSET FLAT:%s\n"
-			       "\tcall\t%s\n", reg, mcount_name, reg);
+		fprintf (file, "%s\tmovabs\t%s, OFFSET FLAT:%s\n"
+			       "\tcall\t%s\n", label, reg, mcount_name,
+			       reg);
 	      else
-		fprintf (file, "1:\tmovabsq\t$%s, %%%s\n\tcall\t*%%%s\n",
-			 mcount_name, reg, reg);
+		fprintf (file, "%s\tmovabsq\t$%s, %%%s\n\tcall\t*%%%s\n",
+			 label, mcount_name, reg, reg);
 	      break;
 	    case CM_LARGE_PIC:
 #ifdef NO_PROFILE_COUNTERS
@@ -23336,21 +23928,21 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
 	      if (!flag_plt)
 		{
 		  if (ASSEMBLER_DIALECT == ASM_INTEL)
-		    fprintf (file, "1:\tcall\t[QWORD PTR %s@GOTPCREL[rip]]\n",
-			     mcount_name);
+		    fprintf (file, "%s\tcall\t[QWORD PTR %s@GOTPCREL[rip]]\n",
+			     label, mcount_name);
 		  else
-		    fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n",
-			     mcount_name);
+		    fprintf (file, "%s\tcall\t*%s@GOTPCREL(%%rip)\n",
+			     label, mcount_name);
 		  break;
 		}
 	      /* fall through */
 	    default:
-	      x86_print_call_or_nop (file, mcount_name);
+	      x86_print_call_or_nop (file, mcount_name, label);
 	      break;
 	    }
 	}
       else
-	x86_print_call_or_nop (file, mcount_name);
+	x86_print_call_or_nop (file, mcount_name, label);
     }
   else if (flag_pic)
     {
@@ -23365,11 +23957,13 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
 		 LPREFIX, labelno);
 #endif
       if (flag_plt)
-	x86_print_call_or_nop (file, mcount_name);
+	x86_print_call_or_nop (file, mcount_name, label);
       else if (ASSEMBLER_DIALECT == ASM_INTEL)
-	fprintf (file, "1:\tcall\t[DWORD PTR %s@GOT[ebx]]\n", mcount_name);
+	fprintf (file, "%s\tcall\t[DWORD PTR %s@GOT[ebx]]\n",
+		 label, mcount_name);
       else
-	fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
+	fprintf (file, "%s\tcall\t*%s@GOT(%%ebx)\n",
+		 label, mcount_name);
     }
   else
     {
@@ -23382,12 +23976,10 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
 	fprintf (file, "\tmovl\t$%sP%d, %%" PROFILE_COUNT_REGISTER "\n",
 		 LPREFIX, labelno);
 #endif
-      x86_print_call_or_nop (file, mcount_name);
+      x86_print_call_or_nop (file, mcount_name, label);
     }
 
-  if (flag_record_mcount
-      || lookup_attribute ("fentry_section",
-			   DECL_ATTRIBUTES (current_function_decl)))
+  if (fentry_section_p)
     {
       const char *sname = "__mcount_loc";
 
@@ -24146,7 +24738,7 @@ ix86_vector_mode_supported_p (machine_mode mode)
     return true;
   if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
     return true;
-  if (TARGET_AVX512F && TARGET_EVEX512 && VALID_AVX512F_REG_MODE (mode))
+  if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
     return true;
   if ((TARGET_MMX || TARGET_MMX_WITH_SSE)
       && VALID_MMX_REG_MODE (mode))
@@ -24248,6 +24840,12 @@ static void map_egpr_constraints (vec<const char *> &constraints)
 	      buf.safe_push (cur[j + 1]);
 	      j++;
 	      break;
+	    case '{':
+	      do
+		{
+		  buf.safe_push (cur[j]);
+		} while (cur[j++] != '}');
+	      break;
 	    default:
 	      buf.safe_push (cur[j]);
 	      break;
@@ -24394,8 +24992,7 @@ ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
 	}
     }
 
-  rtx_insn *seq = get_insns ();
-  end_sequence ();
+  rtx_insn *seq = end_sequence ();
 
   if (saw_asm_flag)
     return seq;
@@ -24666,24 +25263,18 @@ asm_preferred_eh_data_format (int code, int global)
   return DW_EH_PE_absptr;
 }
 
-/* Implement targetm.vectorize.builtin_vectorization_cost.  */
+/* Worker for ix86_builtin_vectorization_cost and the fallback calls
+   from ix86_vector_costs::add_stmt_cost.  */
 static int
-ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
-                                 tree vectype, int)
+ix86_default_vector_cost (enum vect_cost_for_stmt type_of_cost,
+			  machine_mode mode)
 {
-  bool fp = false;
-  machine_mode mode = TImode;
+  bool fp = FLOAT_MODE_P (mode);
   int index;
-  if (vectype != NULL)
-    {
-      fp = FLOAT_TYPE_P (vectype);
-      mode = TYPE_MODE (vectype);
-    }
-
   switch (type_of_cost)
     {
       case scalar_stmt:
-        return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
+	return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
 
       case scalar_load:
 	/* load/store costs are relative to register move which is 2. Recompute
@@ -24738,14 +25329,14 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 			      COSTS_N_INSNS
 				 (ix86_cost->gather_static
 				  + ix86_cost->gather_per_elt
-				    * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
+				    * GET_MODE_NUNITS (mode)) / 2);
 
       case vector_scatter_store:
         return ix86_vec_cost (mode,
 			      COSTS_N_INSNS
 				 (ix86_cost->scatter_static
 				  + ix86_cost->scatter_per_elt
-				    * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
+				    * GET_MODE_NUNITS (mode)) / 2);
 
       case cond_branch_taken:
         return ix86_cost->cond_taken_branch_cost;
@@ -24754,12 +25345,16 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
         return ix86_cost->cond_not_taken_branch_cost;
 
       case vec_perm:
+	return ix86_vec_cost (mode, ix86_cost->sse_op);
+
       case vec_promote_demote:
+	if (fp)
+	  return vec_fp_conversion_cost (ix86_tune_cost, mode);
         return ix86_vec_cost (mode, ix86_cost->sse_op);
 
       case vec_construct:
 	{
-	  int n = TYPE_VECTOR_SUBPARTS (vectype);
+	  int n = GET_MODE_NUNITS (mode);
 	  /* N - 1 element inserts into an SSE vector, the possible
 	     GPR -> XMM move is accounted for in add_stmt_cost.  */
 	  if (GET_MODE_BITSIZE (mode) <= 128)
@@ -24767,12 +25362,18 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 	  /* One vinserti128 for combining two SSE vectors for AVX256.  */
 	  else if (GET_MODE_BITSIZE (mode) == 256)
 	    return ((n - 2) * ix86_cost->sse_op
-		    + ix86_vec_cost (mode, ix86_cost->addss));
+		    + ix86_vec_cost (mode, ix86_cost->sse_op));
 	  /* One vinserti64x4 and two vinserti128 for combining SSE
 	     and AVX256 vectors to AVX512.  */
 	  else if (GET_MODE_BITSIZE (mode) == 512)
-	    return ((n - 4) * ix86_cost->sse_op
-		    + 3 * ix86_vec_cost (mode, ix86_cost->addss));
+	    {
+	      machine_mode half_mode
+		= mode_for_vector (GET_MODE_INNER (mode),
+				   GET_MODE_NUNITS (mode) / 2).require ();
+	      return ((n - 4) * ix86_cost->sse_op
+		      + 2 * ix86_vec_cost (half_mode, ix86_cost->sse_op)
+		      + ix86_vec_cost (mode, ix86_cost->sse_op));
+	    }
 	  gcc_unreachable ();
 	}
 
@@ -24781,6 +25382,17 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
     }
 }
 
+/* Implement targetm.vectorize.builtin_vectorization_cost.  */
+static int
+ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+				 tree vectype, int)
+{
+  machine_mode mode = TImode;
+  if (vectype != NULL)
+    mode = TYPE_MODE (vectype);
+  return ix86_default_vector_cost (type_of_cost, mode);
+}
+
 
 /* This function returns the calling abi specific va_list type node.
    It returns  the FNDECL specific va_list type.  */
@@ -24940,7 +25552,7 @@ ix86_preferred_simd_mode (scalar_mode mode)
   switch (mode)
     {
     case E_QImode:
-      if (TARGET_AVX512BW && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+      if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
 	return V64QImode;
       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
 	return V32QImode;
@@ -24948,7 +25560,7 @@ ix86_preferred_simd_mode (scalar_mode mode)
 	return V16QImode;
 
     case E_HImode:
-      if (TARGET_AVX512BW && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+      if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
 	return V32HImode;
       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
 	return V16HImode;
@@ -24956,7 +25568,7 @@ ix86_preferred_simd_mode (scalar_mode mode)
 	return V8HImode;
 
     case E_SImode:
-      if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+      if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
 	return V16SImode;
       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
 	return V8SImode;
@@ -24964,7 +25576,7 @@ ix86_preferred_simd_mode (scalar_mode mode)
 	return V4SImode;
 
     case E_DImode:
-      if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+      if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
 	return V8DImode;
       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
 	return V4DImode;
@@ -24978,16 +25590,15 @@ ix86_preferred_simd_mode (scalar_mode mode)
 	    {
 	      if (TARGET_PREFER_AVX128)
 		return V8HFmode;
-	      else if (TARGET_PREFER_AVX256 || !TARGET_EVEX512)
+	      else if (TARGET_PREFER_AVX256)
 		return V16HFmode;
 	    }
-	  if (TARGET_EVEX512)
-	    return V32HFmode;
+	  return V32HFmode;
 	}
       return word_mode;
 
     case E_BFmode:
-      if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+      if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
 	return V32BFmode;
       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
 	return V16BFmode;
@@ -24995,7 +25606,7 @@ ix86_preferred_simd_mode (scalar_mode mode)
 	return V8BFmode;
 
     case E_SFmode:
-      if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+      if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
 	return V16SFmode;
       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
 	return V8SFmode;
@@ -25003,7 +25614,7 @@ ix86_preferred_simd_mode (scalar_mode mode)
 	return V4SFmode;
 
     case E_DFmode:
-      if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+      if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
 	return V8DFmode;
       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
 	return V4DFmode;
@@ -25023,13 +25634,13 @@ ix86_preferred_simd_mode (scalar_mode mode)
 static unsigned int
 ix86_autovectorize_vector_modes (vector_modes *modes, bool all)
 {
-  if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+  if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
     {
       modes->safe_push (V64QImode);
       modes->safe_push (V32QImode);
       modes->safe_push (V16QImode);
     }
-  else if (TARGET_AVX512F && TARGET_EVEX512 && all)
+  else if (TARGET_AVX512F && all)
     {
       modes->safe_push (V32QImode);
       modes->safe_push (V16QImode);
@@ -25067,7 +25678,7 @@ ix86_get_mask_mode (machine_mode data_mode)
   unsigned elem_size = vector_size / nunits;
 
   /* Scalar mask case.  */
-  if ((TARGET_AVX512F && TARGET_EVEX512 && vector_size == 64)
+  if ((TARGET_AVX512F && vector_size == 64)
       || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16))
       /* AVX512FP16 only supports vector comparison
 	 to kmask for _Float16.  */
@@ -25214,15 +25825,20 @@ private:
   unsigned m_num_sse_needed[3];
   /* Number of 256-bit vector permutation.  */
   unsigned m_num_avx256_vec_perm[3];
+  /* Number of reductions for FMA/DOT_PROD_EXPR/SAD_EXPR  */
+  unsigned m_num_reduc[X86_REDUC_LAST];
+  /* Don't do unroll if m_prefer_unroll is false, default is true.  */
+  bool m_prefer_unroll;
 };
 
 ix86_vector_costs::ix86_vector_costs (vec_info* vinfo, bool costing_for_scalar)
   : vector_costs (vinfo, costing_for_scalar),
     m_num_gpr_needed (),
     m_num_sse_needed (),
-    m_num_avx256_vec_perm ()
-{
-}
+    m_num_avx256_vec_perm (),
+    m_num_reduc (),
+    m_prefer_unroll (true)
+{}
 
 /* Implement targetm.vectorize.create_costs.  */
 
@@ -25235,7 +25851,7 @@ ix86_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
 unsigned
 ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
 				  stmt_vec_info stmt_info, slp_tree node,
-				  tree vectype, int misalign,
+				  tree vectype, int,
 				  vect_cost_model_location where)
 {
   unsigned retval = 0;
@@ -25253,6 +25869,14 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
       if (scalar_p)
 	mode = TYPE_MODE (TREE_TYPE (vectype));
     }
+  /* When we are costing a scalar stmt use the scalar stmt to get at the
+     type of the operation.  */
+  else if (scalar_p && stmt_info)
+    if (tree lhs = gimple_get_lhs (stmt_info->stmt))
+      {
+	fp = FLOAT_TYPE_P (TREE_TYPE (lhs));
+	mode = TYPE_MODE (TREE_TYPE (lhs));
+      }
 
   if ((kind == vector_stmt || kind == scalar_stmt)
       && stmt_info
@@ -25275,7 +25899,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
 	      else if (X87_FLOAT_MODE_P (mode))
 		stmt_cost = ix86_cost->fadd;
 	      else
-	        stmt_cost = ix86_cost->add;
+		stmt_cost = ix86_cost->add;
 	    }
 	  else
 	    stmt_cost = ix86_vec_cost (mode, fp ? ix86_cost->addss
@@ -25330,7 +25954,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
 			    (subcode == RSHIFT_EXPR
 			     && !TYPE_UNSIGNED (TREE_TYPE (op1)))
 			    ? ASHIFTRT : LSHIFTRT, mode,
-		            TREE_CODE (op2) == INTEGER_CST,
+			    TREE_CODE (op2) == INTEGER_CST,
 			    cst_and_fits_in_hwi (op2)
 			    ? int_cst_value (op2) : -1,
 			    false, false, NULL, NULL);
@@ -25339,31 +25963,297 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
 	case NOP_EXPR:
 	  /* Only sign-conversions are free.  */
 	  if (tree_nop_conversion_p
-	        (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
+		(TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
 		 TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
 	    stmt_cost = 0;
+	  else if (fp)
+	    stmt_cost = vec_fp_conversion_cost
+			  (ix86_tune_cost, GET_MODE_BITSIZE (mode));
+	  break;
+
+	case FLOAT_EXPR:
+	    if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+	      stmt_cost = ix86_cost->cvtsi2ss;
+	    else if (X87_FLOAT_MODE_P (mode))
+	      /* TODO: We do not have cost tables for x87.  */
+	      stmt_cost = ix86_cost->fadd;
+	    else
+	      stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps);
+	    break;
+
+	case FIX_TRUNC_EXPR:
+	    if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+	      stmt_cost = ix86_cost->cvtss2si;
+	    else if (X87_FLOAT_MODE_P (mode))
+	      /* TODO: We do not have cost tables for x87.  */
+	      stmt_cost = ix86_cost->fadd;
+	    else
+	      stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi);
+	    break;
+
+	case COND_EXPR:
+	  {
+	    /* SSE2 conditinal move sequence is:
+		 pcmpgtd %xmm5, %xmm0 (accounted separately)
+		 pand    %xmm0, %xmm2
+		 pandn   %xmm1, %xmm0
+		 por     %xmm2, %xmm0
+	       while SSE4 uses cmp + blend
+	       and AVX512 masked moves.
+
+	       The condition is accounted separately since we usually have
+		 p = a < b
+		 c = p ? x : y
+	       and we will account first statement as setcc.  Exception is when
+	       p is loaded from memory as bool and then we will not acocunt
+	       the compare, but there is no way to check for this.  */
+
+	    int ninsns = TARGET_SSE4_1 ? 1 : 3;
+
+	    /* If one of parameters is 0 or -1 the sequence will be simplified:
+	       (if_true & mask) | (if_false & ~mask) -> if_true & mask  */
+	    if (ninsns > 1
+		&& (zerop (gimple_assign_rhs2 (stmt_info->stmt))
+		    || zerop (gimple_assign_rhs3 (stmt_info->stmt))
+		    || integer_minus_onep
+			(gimple_assign_rhs2 (stmt_info->stmt))
+		    || integer_minus_onep
+			(gimple_assign_rhs3 (stmt_info->stmt))))
+	      ninsns = 1;
+
+	    if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+	      stmt_cost = ninsns * ix86_cost->sse_op;
+	    else if (X87_FLOAT_MODE_P (mode))
+	      /* x87 requires conditional branch.  We don't have cost for
+		 that.  */
+	      ;
+	    else if (VECTOR_MODE_P (mode))
+	      stmt_cost = ix86_vec_cost (mode, ninsns * ix86_cost->sse_op);
+	    else
+	      /* compare (accounted separately) + cmov.  */
+	      stmt_cost = ix86_cost->add;
+	  }
 	  break;
 
-	case BIT_IOR_EXPR:
-	case ABS_EXPR:
-	case ABSU_EXPR:
 	case MIN_EXPR:
 	case MAX_EXPR:
+	  if (fp)
+	    {
+	      if (X87_FLOAT_MODE_P (mode)
+		  && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+		/* x87 requires conditional branch.  We don't have cost for
+		   that.  */
+		;
+	      else
+		/* minss  */
+		stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
+	    }
+	  else
+	    {
+	      if (VECTOR_MODE_P (mode))
+		{
+		  stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
+		  /* vpmin was introduced in SSE3.
+		     SSE2 needs pcmpgtd + pand + pandn + pxor.
+		     If one of parameters is 0 or -1 the sequence is simplified
+		     to pcmpgtd + pand.  */
+		  if (!TARGET_SSSE3)
+		    {
+		      if (zerop (gimple_assign_rhs2 (stmt_info->stmt))
+			  || integer_minus_onep
+				(gimple_assign_rhs2 (stmt_info->stmt)))
+			stmt_cost *= 2;
+		      else
+			stmt_cost *= 4;
+		    }
+		}
+	      else
+		/* cmp + cmov.  */
+		stmt_cost = ix86_cost->add * 2;
+	    }
+	  break;
+
+	case ABS_EXPR:
+	case ABSU_EXPR:
+	  if (fp)
+	    {
+	      if (X87_FLOAT_MODE_P (mode)
+		  && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+		/* fabs.  */
+		stmt_cost = ix86_cost->fabs;
+	      else
+		/* andss of sign bit.  */
+		stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
+	    }
+	  else
+	    {
+	      if (VECTOR_MODE_P (mode))
+		{
+		  stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
+		  /* vabs was introduced in SSE3.
+		     SSE3 uses psrat + pxor + psub.  */
+		  if (!TARGET_SSSE3)
+		    stmt_cost *= 3;
+		}
+	      else
+		/* neg + cmov.  */
+		stmt_cost = ix86_cost->add * 2;
+	    }
+	  break;
+
+	case BIT_IOR_EXPR:
 	case BIT_XOR_EXPR:
 	case BIT_AND_EXPR:
 	case BIT_NOT_EXPR:
-	  if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
-	    stmt_cost = ix86_cost->sse_op;
-	  else if (VECTOR_MODE_P (mode))
+	  gcc_assert (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)
+		      && !X87_FLOAT_MODE_P (mode));
+	  if (VECTOR_MODE_P (mode))
 	    stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
 	  else
 	    stmt_cost = ix86_cost->add;
 	  break;
+
+	default:
+	  if (truth_value_p (subcode))
+	    {
+	      if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+		/* CMPccS? insructions are cheap, so use sse_op.  While they
+		   produce a mask which may need to be turned to 0/1 by and,
+		   expect that this will be optimized away in a common case.  */
+		stmt_cost = ix86_cost->sse_op;
+	      else if (X87_FLOAT_MODE_P (mode))
+		/* fcmp + setcc.  */
+		stmt_cost = ix86_cost->fadd + ix86_cost->add;
+	      else if (VECTOR_MODE_P (mode))
+		stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
+	      else
+		/* setcc.  */
+		stmt_cost = ix86_cost->add;
+	      break;
+	    }
+	  break;
+	}
+    }
+
+  /* Record number of load/store/gather/scatter in vectorized body.  */
+  if (where == vect_body && !m_costing_for_scalar)
+    {
+      switch (kind)
+	{
+	  /* Emulated gather/scatter or any scalarization.  */
+	case scalar_load:
+	case scalar_stmt:
+	case scalar_store:
+	case vector_gather_load:
+	case vector_scatter_store:
+	  m_prefer_unroll = false;
+	  break;
+
+	case vector_stmt:
+	case vec_to_scalar:
+	  /* Count number of reduction FMA and "real" DOT_PROD_EXPR,
+	     unroll in the vectorizer will enable partial sum.  */
+	  if (stmt_info
+	      && vect_is_reduction (stmt_info)
+	      && stmt_info->stmt)
+	    {
+	      /* Handle __builtin_fma.  */
+	      if (gimple_call_combined_fn (stmt_info->stmt) == CFN_FMA)
+		{
+		  m_num_reduc[X86_REDUC_FMA] += count;
+		  break;
+		}
+
+	      if (!is_gimple_assign (stmt_info->stmt))
+		break;
+
+	      tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
+	      machine_mode inner_mode = GET_MODE_INNER (mode);
+	      tree rhs1, rhs2;
+	      bool native_vnni_p = true;
+	      gimple* def;
+	      machine_mode mode_rhs;
+	      switch (subcode)
+		{
+		case PLUS_EXPR:
+		case MINUS_EXPR:
+		  if (!fp || !flag_associative_math
+		      || flag_fp_contract_mode != FP_CONTRACT_FAST)
+		    break;
+
+		  /* FMA condition for different modes.  */
+		  if (((inner_mode == DFmode || inner_mode == SFmode)
+		       && !TARGET_FMA && !TARGET_AVX512VL)
+		      || (inner_mode == HFmode && !TARGET_AVX512FP16)
+		      || (inner_mode == BFmode && !TARGET_AVX10_2))
+		    break;
+
+		  /* MULT_EXPR + PLUS_EXPR/MINUS_EXPR is transformed
+		     to FMA/FNMA after vectorization.  */
+		  rhs1 = gimple_assign_rhs1 (stmt_info->stmt);
+		  rhs2 = gimple_assign_rhs2 (stmt_info->stmt);
+		  if (subcode == PLUS_EXPR
+		      && TREE_CODE (rhs1) == SSA_NAME
+		      && (def = SSA_NAME_DEF_STMT (rhs1), true)
+		      && is_gimple_assign (def)
+		      && gimple_assign_rhs_code (def) == MULT_EXPR)
+		    m_num_reduc[X86_REDUC_FMA] += count;
+		  else if (TREE_CODE (rhs2) == SSA_NAME
+			   && (def = SSA_NAME_DEF_STMT (rhs2), true)
+			   && is_gimple_assign (def)
+			   && gimple_assign_rhs_code (def) == MULT_EXPR)
+		    m_num_reduc[X86_REDUC_FMA] += count;
+		  break;
+
+		  /* Vectorizer lane_reducing_op_p supports DOT_PROX_EXPR,
+		     WIDEN_SUM_EXPR and SAD_EXPR, x86 backend only supports
+		     SAD_EXPR (usad{v16qi,v32qi,v64qi}) and DOT_PROD_EXPR.  */
+		case DOT_PROD_EXPR:
+		  rhs1 = gimple_assign_rhs1 (stmt_info->stmt);
+		  mode_rhs = TYPE_MODE (TREE_TYPE (rhs1));
+		  if (mode_rhs == QImode)
+		    {
+		      rhs2 = gimple_assign_rhs2 (stmt_info->stmt);
+		      signop signop1_p = TYPE_SIGN (TREE_TYPE (rhs1));
+		      signop signop2_p = TYPE_SIGN (TREE_TYPE (rhs2));
+
+		      /* vpdpbusd.  */
+		      if (signop1_p != signop2_p)
+			native_vnni_p
+			  = (GET_MODE_SIZE (mode) == 64
+			     ? TARGET_AVX512VNNI
+			     : ((TARGET_AVX512VNNI && TARGET_AVX512VL)
+				|| TARGET_AVXVNNI));
+		      else
+			/* vpdpbssd.  */
+			native_vnni_p
+			  = (GET_MODE_SIZE (mode) == 64
+			     ? TARGET_AVX10_2
+			     : (TARGET_AVXVNNIINT8 || TARGET_AVX10_2));
+		    }
+		  m_num_reduc[X86_REDUC_DOT_PROD] += count;
+
+		  /* Dislike to do unroll and partial sum for
+		     emulated DOT_PROD_EXPR.  */
+		  if (!native_vnni_p)
+		    m_num_reduc[X86_REDUC_DOT_PROD] += 3 * count;
+		  break;
+
+		case SAD_EXPR:
+		  m_num_reduc[X86_REDUC_SAD] += count;
+		  break;
+
+		default:
+		  break;
+		}
+	    }
+
 	default:
 	  break;
 	}
     }
 
+
   combined_fn cfn;
   if ((kind == vector_stmt || kind == scalar_stmt)
       && stmt_info
@@ -25383,37 +26273,59 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
 	break;
       }
 
+  if (kind == vec_promote_demote)
+    {
+      int outer_size
+	= tree_to_uhwi
+	    (TYPE_SIZE
+		(TREE_TYPE (gimple_assign_lhs (stmt_info->stmt))));
+      int inner_size
+	= tree_to_uhwi
+	    (TYPE_SIZE
+		(TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))));
+      bool inner_fp = FLOAT_TYPE_P
+			(TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)));
+
+      if (fp && inner_fp)
+	stmt_cost = vec_fp_conversion_cost
+			  (ix86_tune_cost, GET_MODE_BITSIZE (mode));
+      else if (fp && !inner_fp)
+	stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps);
+      else if (!fp && inner_fp)
+	stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi);
+      else
+	stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
+      /* VEC_PACK_TRUNC_EXPR and similar demote operations: If outer size is
+	 greater than inner size we will end up doing two conversions and
+	 packing them.  We always pack pairs; if the size difference is greater
+	 it is split into multiple demote operations.  */
+      if (inner_size > outer_size)
+	stmt_cost = stmt_cost * 2
+		    + ix86_vec_cost (mode, ix86_cost->sse_op);
+    }
+
   /* If we do elementwise loads into a vector then we are bound by
      latency and execution resources for the many scalar loads
      (AGU and load ports).  Try to account for this by scaling the
      construction cost by the number of elements involved.  */
   if ((kind == vec_construct || kind == vec_to_scalar)
-      && ((stmt_info
-	   && (STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
-	       || STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
-	   && ((STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
-		&& (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info)))
+      && ((node
+	   && (((SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_ELEMENTWISE
+		 || (SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_STRIDED_SLP
+		     && SLP_TREE_LANES (node) == 1))
+		&& (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF
+					(SLP_TREE_REPRESENTATIVE (node))))
 		    != INTEGER_CST))
-	       || (STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info)
-		   == VMAT_GATHER_SCATTER)))
-	  || (node
-	      && (((SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_ELEMENTWISE
-		    || (SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_STRIDED_SLP
-			&& SLP_TREE_LANES (node) == 1))
-		   && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF
-					     (SLP_TREE_REPRESENTATIVE (node))))
-		      != INTEGER_CST))
-		  || (SLP_TREE_MEMORY_ACCESS_TYPE (node)
-		      == VMAT_GATHER_SCATTER)))))
-    {
-      stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
+	       || mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node))))))
+    {
+      stmt_cost = ix86_default_vector_cost (kind, mode);
       stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1);
     }
   else if ((kind == vec_construct || kind == scalar_to_vec)
 	   && node
 	   && SLP_TREE_DEF_TYPE (node) == vect_external_def)
     {
-      stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
+      stmt_cost = ix86_default_vector_cost (kind, mode);
       unsigned i;
       tree op;
       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
@@ -25453,7 +26365,22 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
 	      else
 		{
 		  m_num_gpr_needed[where]++;
-		  stmt_cost += ix86_cost->sse_to_integer;
+
+		  int cost = COSTS_N_INSNS (ix86_cost->integer_to_sse) / 2;
+
+		  /* For integer construction, the number of actual GPR -> XMM
+		     moves will be somewhere between 0 and n.
+		     We do not have very good idea about actual number, since
+		     the source may be a constant, memory or a chain of
+		     instructions that will be later converted by
+		     scalar-to-vector pass.  */
+		  if (kind == vec_construct
+		      && GET_MODE_BITSIZE (mode) == 256)
+		    cost *= 2;
+		  else if (kind == vec_construct
+			   && GET_MODE_BITSIZE (mode) == 512)
+		    cost *= 3;
+		  stmt_cost += cost;
 		}
 	    }
 	}
@@ -25462,7 +26389,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
 	  TREE_VISITED (op) = 0;
     }
   if (stmt_cost == -1)
-    stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
+    stmt_cost = ix86_default_vector_cost (kind, mode);
 
   if (kind == vec_perm && vectype
       && GET_MODE_SIZE (TYPE_MODE (vectype)) == 32)
@@ -25533,6 +26460,41 @@ ix86_vector_costs::finish_cost (const vector_costs *scalar_costs)
 	  && (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ())
 	      > ceil_log2 (LOOP_VINFO_INT_NITERS (loop_vinfo))))
 	m_costs[vect_body] = INT_MAX;
+
+      bool any_reduc_p = false;
+      for (int i = 0; i != X86_REDUC_LAST; i++)
+	if (m_num_reduc[i])
+	  {
+	    any_reduc_p = true;
+	    break;
+	  }
+
+      if (any_reduc_p
+	  /* Not much gain for loop with gather and scatter.  */
+	  && m_prefer_unroll
+	  && !LOOP_VINFO_EPILOGUE_P (loop_vinfo))
+	{
+	  unsigned unroll_factor
+	    = OPTION_SET_P (ix86_vect_unroll_limit)
+	    ? ix86_vect_unroll_limit
+	    : ix86_cost->vect_unroll_limit;
+
+	  if (unroll_factor > 1)
+	    {
+	      for (int i = 0 ; i != X86_REDUC_LAST; i++)
+		{
+		  if (m_num_reduc[i])
+		    {
+		      unsigned tmp = CEIL (ix86_cost->reduc_lat_mult_thr[i],
+					   m_num_reduc[i]);
+		      unroll_factor = MIN (unroll_factor, tmp);
+		    }
+		}
+
+	      m_suggested_unroll_factor  = 1 << ceil_log2 (unroll_factor);
+	    }
+	}
+
     }
 
   ix86_vect_estimate_reg_pressure ();
@@ -25545,14 +26507,10 @@ ix86_vector_costs::finish_cost (const vector_costs *scalar_costs)
   /* When X86_TUNE_AVX512_TWO_EPILOGUES is enabled arrange for both
      a AVX2 and a SSE epilogue for AVX512 vectorized loops.  */
   if (loop_vinfo
+      && LOOP_VINFO_EPILOGUE_P (loop_vinfo)
+      && GET_MODE_SIZE (loop_vinfo->vector_mode) == 32
       && ix86_tune_features[X86_TUNE_AVX512_TWO_EPILOGUES])
-    {
-      if (GET_MODE_SIZE (loop_vinfo->vector_mode) == 64)
-	m_suggested_epilogue_mode = V32QImode;
-      else if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)
-	       && GET_MODE_SIZE (loop_vinfo->vector_mode) == 32)
-	m_suggested_epilogue_mode = V16QImode;
-    }
+    m_suggested_epilogue_mode = V16QImode;
   /* When a 128bit SSE vectorized epilogue still has a VF of 16 or larger
      enable a 64bit SSE epilogue.  */
   if (loop_vinfo
@@ -25561,6 +26519,65 @@ ix86_vector_costs::finish_cost (const vector_costs *scalar_costs)
       && LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () >= 16)
     m_suggested_epilogue_mode = V8QImode;
 
+  /* When X86_TUNE_AVX512_MASKED_EPILOGUES is enabled try to use
+     a masked epilogue if that doesn't seem detrimental.  */
+  if (loop_vinfo
+      && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
+      && LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () > 2
+      && ix86_tune_features[X86_TUNE_AVX512_MASKED_EPILOGUES]
+      && !OPTION_SET_P (param_vect_partial_vector_usage))
+    {
+      bool avoid = false;
+      if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+	  && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
+	{
+	  unsigned int peel_niter
+	    = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
+	  if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
+	    peel_niter += 1;
+	  /* When we know the number of scalar iterations of the epilogue,
+	     avoid masking when a single vector epilog iteration handles
+	     it in full.  */
+	  if (pow2p_hwi ((LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter)
+			 % LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ()))
+	    avoid = true;
+	}
+      if (!avoid && loop_outer (loop_outer (LOOP_VINFO_LOOP (loop_vinfo))))
+	for (auto ddr : LOOP_VINFO_DDRS (loop_vinfo))
+	  {
+	    if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
+	      ;
+	    else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
+	      ;
+	    else
+	      {
+		int loop_depth
+		    = index_in_loop_nest (LOOP_VINFO_LOOP (loop_vinfo)->num,
+					  DDR_LOOP_NEST (ddr));
+		if (DDR_NUM_DIST_VECTS (ddr) == 1
+		    && DDR_DIST_VECTS (ddr)[0][loop_depth] == 0)
+		  {
+		    /* Avoid the case when there's an outer loop that might
+		       traverse a multi-dimensional array with the inner
+		       loop just executing the masked epilogue with a
+		       read-write where the next outer iteration might
+		       read from the masked part of the previous write,
+		       'n' filling half a vector.
+			 for (j = 0; j < m; ++j)
+			   for (i = 0; i < n; ++i)
+			     a[j][i] = c * a[j][i];  */
+		    avoid = true;
+		    break;
+		  }
+	      }
+	  }
+      if (!avoid)
+	{
+	  m_suggested_epilogue_mode = loop_vinfo->vector_mode;
+	  m_masked_epilogue = 1;
+	}
+    }
+
   vector_costs::finish_cost (scalar_costs);
 }
 
@@ -25680,7 +26697,7 @@ ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
     {
       /* If the function isn't exported, we can pick up just one ISA
 	 for the clones.  */
-      if (TARGET_AVX512F && TARGET_EVEX512)
+      if (TARGET_AVX512F)
 	clonei->vecsize_mangle = 'e';
       else if (TARGET_AVX2)
 	clonei->vecsize_mangle = 'd';
@@ -25772,17 +26789,17 @@ ix86_simd_clone_usable (struct cgraph_node *node, machine_mode)
 	return -1;
       if (!TARGET_AVX)
 	return 0;
-      return (TARGET_AVX512F && TARGET_EVEX512) ? 3 : TARGET_AVX2 ? 2 : 1;
+      return TARGET_AVX512F ? 3 : TARGET_AVX2 ? 2 : 1;
     case 'c':
       if (!TARGET_AVX)
 	return -1;
-      return (TARGET_AVX512F && TARGET_EVEX512) ? 2 : TARGET_AVX2 ? 1 : 0;
+      return TARGET_AVX512F ? 2 : TARGET_AVX2 ? 1 : 0;
     case 'd':
       if (!TARGET_AVX2)
 	return -1;
-      return (TARGET_AVX512F && TARGET_EVEX512) ? 1 : 0;
+      return TARGET_AVX512F ? 1 : 0;
     case 'e':
-      if (!TARGET_AVX512F || !TARGET_EVEX512)
+      if (!TARGET_AVX512F)
 	return -1;
       return 0;
     default:
@@ -25997,7 +27014,7 @@ ix86_reloc_rw_mask (void)
 static bool
 symbolic_base_address_p (rtx addr)
 {
-  if (GET_CODE (addr) == SYMBOL_REF)
+  if (SYMBOL_REF_P (addr))
     return true;
 
   if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_GOTOFF)
@@ -26361,9 +27378,9 @@ ix86_memtag_can_tag_addresses ()
   return ix86_lam_type != lam_none && TARGET_LP64;
 }
 
-/* Implement TARGET_MEMTAG_TAG_SIZE.  */
+/* Implement TARGET_MEMTAG_TAG_BITSIZE.  */
 unsigned char
-ix86_memtag_tag_size ()
+ix86_memtag_tag_bitsize ()
 {
   return IX86_HWASAN_TAG_SIZE;
 }
@@ -26934,6 +27951,10 @@ static const scoped_attribute_specs *const ix86_attribute_table[] =
 #undef TARGET_ADDRESS_COST
 #define TARGET_ADDRESS_COST ix86_address_cost
 
+#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
+#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
+  ix86_use_by_pieces_infrastructure_p
+
 #undef TARGET_OVERLAP_OP_BY_PIECES_P
 #define TARGET_OVERLAP_OP_BY_PIECES_P hook_bool_void_true
 
@@ -27337,8 +28358,8 @@ ix86_libgcc_floating_mode_supported_p
 #undef TARGET_MEMTAG_UNTAGGED_POINTER
 #define TARGET_MEMTAG_UNTAGGED_POINTER ix86_memtag_untagged_pointer
 
-#undef TARGET_MEMTAG_TAG_SIZE
-#define TARGET_MEMTAG_TAG_SIZE ix86_memtag_tag_size
+#undef TARGET_MEMTAG_TAG_BITSIZE
+#define TARGET_MEMTAG_TAG_BITSIZE ix86_memtag_tag_bitsize
 
 #undef TARGET_GEN_CCMP_FIRST
 #define TARGET_GEN_CCMP_FIRST ix86_gen_ccmp_first
@@ -27454,6 +28475,195 @@ ix86_cannot_copy_insn_p (rtx_insn *insn)
 #undef TARGET_DOCUMENTATION_NAME
 #define TARGET_DOCUMENTATION_NAME "x86"
 
+/* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS.  */
+sbitmap
+ix86_get_separate_components (void)
+{
+  HOST_WIDE_INT offset, to_allocate;
+  sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
+  bitmap_clear (components);
+  struct machine_function *m = cfun->machine;
+
+  offset = m->frame.stack_pointer_offset;
+  to_allocate = offset - m->frame.sse_reg_save_offset;
+
+  /* Shrink wrap separate uses MOV, which means APX PPX cannot be used.
+     Experiments show that APX PPX can speed up the prologue.  If the function
+     does not exit early during actual execution, then using APX PPX is faster.
+     If the function always exits early during actual execution, then shrink
+     wrap separate reduces the number of MOV (PUSH/POP) instructions actually
+     executed, thus speeding up execution.
+     foo:
+	  movl    $1, %eax
+	  testq   %rdi, %rdi
+	  jne.L60
+	  ret	---> early return.
+    .L60:
+	  subq    $88, %rsp	---> belong to prologue.
+	  xorl    %eax, %eax
+	  movq    %rbx, 40 (%rsp) ---> belong to prologue.
+	  movq    8 (%rdi), %rbx
+	  movq    %rbp, 48 (%rsp) ---> belong to prologue.
+	  movq    %rdi, %rbp
+	  testq   %rbx, %rbx
+	  jne.L61
+	  movq    40 (%rsp), %rbx
+	  movq    48 (%rsp), %rbp
+	  addq    $88, %rsp
+	  ret
+     .L61:
+	  movq    %r12, 56 (%rsp) ---> belong to prologue.
+	  movq    %r13, 64 (%rsp) ---> belong to prologue.
+	  movq    %r14, 72 (%rsp) ---> belong to prologue.
+     ... ...
+
+     Disable shrink wrap separate when PPX is enabled.  */
+  if ((TARGET_APX_PPX && !crtl->calls_eh_return)
+      || cfun->machine->func_type != TYPE_NORMAL
+      || TARGET_SEH
+      || crtl->stack_realign_needed
+      || m->call_ms2sysv)
+    return components;
+
+  /* Since shrink wrapping separate uses MOV instead of PUSH/POP.
+     Disable shrink wrap separate when MOV is prohibited.  */
+  if (save_regs_using_push_pop (to_allocate))
+    return components;
+
+  for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
+      {
+	/* Skip registers with large offsets, where a pseudo may be needed.  */
+	if (IN_RANGE (offset, -0x8000, 0x7fff))
+	  bitmap_set_bit (components, regno);
+	offset += UNITS_PER_WORD;
+      }
+
+  /* Don't mess with the following registers.  */
+  if (frame_pointer_needed)
+    bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
+
+  if (crtl->drap_reg)
+    bitmap_clear_bit (components, REGNO (crtl->drap_reg));
+
+  if (pic_offset_table_rtx)
+    bitmap_clear_bit (components, REAL_PIC_OFFSET_TABLE_REGNUM);
+
+  return components;
+}
+
+/* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB.  */
+sbitmap
+ix86_components_for_bb (basic_block bb)
+{
+  bitmap in = DF_LIVE_IN (bb);
+  bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
+  bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
+
+  sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
+  bitmap_clear (components);
+
+  function_abi_aggregator callee_abis;
+  rtx_insn *insn;
+  FOR_BB_INSNS (bb, insn)
+    if (CALL_P (insn))
+      callee_abis.note_callee_abi (insn_callee_abi (insn));
+  HARD_REG_SET extra_caller_saves = callee_abis.caller_save_regs (*crtl->abi);
+
+  /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets.  */
+  for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (!fixed_regs[regno]
+	&& (TEST_HARD_REG_BIT (extra_caller_saves, regno)
+	    || bitmap_bit_p (in, regno)
+	    || bitmap_bit_p (gen, regno)
+	    || bitmap_bit_p (kill, regno)))
+      bitmap_set_bit (components, regno);
+
+  return components;
+}
+
+/* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS.  */
+void
+ix86_disqualify_components (sbitmap, edge, sbitmap, bool)
+{
+  /* Nothing to do for x86.  */
+}
+
+/* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS.  */
+void
+ix86_emit_prologue_components (sbitmap components)
+{
+  HOST_WIDE_INT cfa_offset;
+  struct machine_function *m = cfun->machine;
+
+  cfa_offset = m->frame.reg_save_offset + m->fs.sp_offset
+	       - m->frame.stack_pointer_offset;
+  for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
+      {
+	if (bitmap_bit_p (components, regno))
+	  ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
+	cfa_offset -= UNITS_PER_WORD;
+      }
+}
+
+/* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS.  */
+void
+ix86_emit_epilogue_components (sbitmap components)
+{
+  HOST_WIDE_INT cfa_offset;
+  struct machine_function *m = cfun->machine;
+  cfa_offset = m->frame.reg_save_offset + m->fs.sp_offset
+	       - m->frame.stack_pointer_offset;
+
+  for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
+      {
+	if (bitmap_bit_p (components, regno))
+	  {
+	    rtx reg = gen_rtx_REG (word_mode, regno);
+	    rtx mem;
+	    rtx_insn *insn;
+
+	    mem = choose_baseaddr (cfa_offset, NULL);
+	    mem = gen_frame_mem (word_mode, mem);
+	    insn = emit_move_insn (reg, mem);
+
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	    add_reg_note (insn, REG_CFA_RESTORE, reg);
+	  }
+	cfa_offset -= UNITS_PER_WORD;
+      }
+}
+
+/* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS.  */
+void
+ix86_set_handled_components (sbitmap components)
+{
+  for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (bitmap_bit_p (components, regno))
+      {
+	cfun->machine->reg_is_wrapped_separately[regno] = true;
+	cfun->machine->use_fast_prologue_epilogue = true;
+	cfun->machine->frame.save_regs_using_mov = true;
+      }
+}
+
+#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
+#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS ix86_get_separate_components
+#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
+#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB ix86_components_for_bb
+#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
+#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS ix86_disqualify_components
+#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
+#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
+  ix86_emit_prologue_components
+#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
+#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
+  ix86_emit_epilogue_components
+#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
+#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS ix86_set_handled_components
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-i386.h"
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 8507243..ac0ce68 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -102,6 +102,15 @@ struct stringop_algs
 #define COSTS_N_BYTES(N) ((N) * 2)
 #endif
 
+
+enum ix86_reduc_unroll_factor{
+  X86_REDUC_FMA,
+  X86_REDUC_DOT_PROD,
+  X86_REDUC_SAD,
+
+  X86_REDUC_LAST
+};
+
 /* Define the specific costs for a given cpu.  NB: hard_register is used
    by TARGET_REGISTER_MOVE_COST and TARGET_MEMORY_MOVE_COST to compute
    hard register move costs by register allocator.  Relative costs of
@@ -179,6 +188,7 @@ struct processor_costs {
   const int xmm_move, ymm_move, /* cost of moving XMM and YMM register.  */
 	    zmm_move;
   const int sse_to_integer;	/* cost of moving SSE register to integer.  */
+  const int integer_to_sse;	/* cost of moving integer register to SSE. */
   const int gather_static, gather_per_elt; /* Cost of gather load is computed
 				   as static + per_item * nelts. */
   const int scatter_static, scatter_per_elt; /* Cost of gather store is
@@ -207,6 +217,16 @@ struct processor_costs {
   const int divsd;		/* cost of DIVSD instructions.  */
   const int sqrtss;		/* cost of SQRTSS instructions.  */
   const int sqrtsd;		/* cost of SQRTSD instructions.  */
+  const int cvtss2sd;		/* cost SSE FP conversions,
+				   such as CVTSS2SD.  */
+  const int vcvtps2pd256;	/* cost 256bit packed FP conversions,
+				   such as VCVTPD2PS with larger reg in ymm.  */
+  const int vcvtps2pd512;	/* cost 512bit packed FP conversions,
+				   such as VCVTPD2PS with larger reg in zmm.  */
+  const int cvtsi2ss;		/* cost of CVTSI2SS instruction.  */
+  const int cvtss2si;		/* cost of CVT(T)SS2SI instruction.  */
+  const int cvtpi2ps;		/* cost of CVTPI2PS instruction.  */
+  const int cvtps2pi;		/* cost of CVT(T)PS2PI instruction.  */
   const int reassoc_int, reassoc_fp, reassoc_vec_int, reassoc_vec_fp;
 				/* Specify reassociation width for integer,
 				   fp, vector integer and vector fp
@@ -214,6 +234,13 @@ struct processor_costs {
 				   to number of instructions executed in
 				   parallel.  See also
 				   ix86_reassociation_width.  */
+  const unsigned reduc_lat_mult_thr[X86_REDUC_LAST];
+				/* Latency times throughput of
+				   FMA/DOT_PROD_EXPR/SAD_EXPR,
+				   it's used to determine unroll
+				   factor in the vectorizer.  */
+  const unsigned vect_unroll_limit;    /* Limit how much the autovectorizer
+					  may unroll a loop.  */
   struct stringop_algs *memcpy, *memset;
   const int cond_taken_branch_cost;    /* Cost of taken branch for vectorizer
 					  cost model.  */
@@ -479,7 +506,9 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
 #define TARGET_SSE_MOVCC_USE_BLENDV \
 	ix86_tune_features[X86_TUNE_SSE_MOVCC_USE_BLENDV]
 #define TARGET_ALIGN_TIGHT_LOOPS \
-	 ix86_tune_features[X86_TUNE_ALIGN_TIGHT_LOOPS]
+	ix86_tune_features[X86_TUNE_ALIGN_TIGHT_LOOPS]
+#define TARGET_SSE_REDUCTION_PREFER_PSHUF \
+	ix86_tune_features[X86_TUNE_SSE_REDUCTION_PREFER_PSHUF]
 
 
 /* Feature tests against the various architecture variations.  */
@@ -525,6 +554,7 @@ extern unsigned char ix86_prefetch_sse;
 #define TARGET_GNU2_TLS		(ix86_tls_dialect == TLS_DIALECT_GNU2)
 #define TARGET_ANY_GNU_TLS	(TARGET_GNU_TLS || TARGET_GNU2_TLS)
 #define TARGET_SUN_TLS		0
+#define TARGET_WIN32_TLS	0
 
 #ifndef TARGET_64BIT_DEFAULT
 #define TARGET_64BIT_DEFAULT 0
@@ -630,7 +660,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
   {"cpu_64", "%{" OPT_ARCH64 ":%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}}" }, \
   {"arch", "%{!march=*:-march=%(VALUE)}"},			   \
   {"arch_32", "%{" OPT_ARCH32 ":%{!march=*:-march=%(VALUE)}}"},	   \
-  {"arch_64", "%{" OPT_ARCH64 ":%{!march=*:-march=%(VALUE)}}"},
+  {"arch_64", "%{" OPT_ARCH64 ":%{!march=*:-march=%(VALUE)}}"},    \
+  {"tls", "%{!mtls-dialect=*:-mtls-dialect=%(VALUE)}"},
 
 /* Specs for the compiler proper */
 
@@ -804,7 +835,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
    TARGET_ABSOLUTE_BIGGEST_ALIGNMENT.  */
 
 #define BIGGEST_ALIGNMENT \
-  (TARGET_IAMCU ? 32 : ((TARGET_AVX512F && TARGET_EVEX512) \
+  (TARGET_IAMCU ? 32 : (TARGET_AVX512F \
 			? 512 : (TARGET_AVX ? 256 : 128)))
 
 /* Maximum stack alignment.  */
@@ -1682,6 +1713,8 @@ typedef struct ix86_args {
   int stdarg;                   /* Set to 1 if function is stdarg.  */
   enum calling_abi call_abi;	/* Set to SYSV_ABI for sysv abi. Otherwise
  				   MS_ABI for ms abi.  */
+  bool preserve_none_abi;	/* Set to true if the preserve_none ABI is
+				   used.  */
   tree decl;			/* Callee decl.  */
 } CUMULATIVE_ARGS;
 
@@ -1826,8 +1859,8 @@ typedef struct ix86_args {
 #define STRIP_UNARY(X) (UNARY_P (X) ? XEXP (X, 0) : X)
 
 #define SYMBOLIC_CONST(X)	\
-  (GET_CODE (X) == SYMBOL_REF						\
-   || GET_CODE (X) == LABEL_REF						\
+  (SYMBOL_REF_P (X)							\
+   || LABEL_REF_P (X)							\
    || (GET_CODE (X) == CONST && symbolic_reference_mentioned_p (X)))
 
 /* Max number of args passed in registers.  If this is more than 3, we will
@@ -1883,7 +1916,7 @@ typedef struct ix86_args {
    MOVE_MAX_PIECES defaults to MOVE_MAX.  */
 
 #define MOVE_MAX \
-  ((TARGET_AVX512F && TARGET_EVEX512\
+  ((TARGET_AVX512F \
     && (ix86_move_max == PVW_AVX512 \
 	|| ix86_store_max == PVW_AVX512)) \
    ? 64 \
@@ -1902,7 +1935,7 @@ typedef struct ix86_args {
    store_by_pieces of 16/32/64 bytes.  */
 #define STORE_MAX_PIECES \
   (TARGET_INTER_UNIT_MOVES_TO_VEC \
-   ? ((TARGET_AVX512F && TARGET_EVEX512 && ix86_store_max == PVW_AVX512) \
+   ? ((TARGET_AVX512F && ix86_store_max == PVW_AVX512) \
       ? 64 \
       : ((TARGET_AVX \
 	  && ix86_store_max >= PVW_AVX256) \
@@ -2255,6 +2288,13 @@ extern unsigned int const svr4_debugger_register_map[FIRST_PSEUDO_REGISTER];
   } while (0)
 #endif
 
+/* In Intel syntax, we have to quote user-defined labels that would
+   match (unprefixed) registers or operators.  */
+
+#undef ASM_OUTPUT_LABELREF
+#define ASM_OUTPUT_LABELREF(STREAM, NAME)	\
+  ix86_asm_output_labelref ((STREAM), user_label_prefix, (NAME))
+
 /* Under some conditions we need jump tables in the text section,
    because the assembler cannot handle label differences between
    sections.  */
@@ -2396,13 +2436,13 @@ constexpr wide_int_bitmask PTA_SKYLAKE = PTA_BROADWELL | PTA_AES
   | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES | PTA_SGX;
 constexpr wide_int_bitmask PTA_SKYLAKE_AVX512 = PTA_SKYLAKE | PTA_AVX512F
   | PTA_AVX512CD | PTA_AVX512VL | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU
-  | PTA_CLWB | PTA_EVEX512;
+  | PTA_CLWB;
 constexpr wide_int_bitmask PTA_CASCADELAKE = PTA_SKYLAKE_AVX512
   | PTA_AVX512VNNI;
 constexpr wide_int_bitmask PTA_COOPERLAKE = PTA_CASCADELAKE | PTA_AVX512BF16;
 constexpr wide_int_bitmask PTA_CANNONLAKE = PTA_SKYLAKE | PTA_AVX512F
   | PTA_AVX512CD | PTA_AVX512VL | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU
-  | PTA_AVX512VBMI | PTA_AVX512IFMA | PTA_SHA | PTA_EVEX512;
+  | PTA_AVX512VBMI | PTA_AVX512IFMA | PTA_SHA;
 constexpr wide_int_bitmask PTA_ICELAKE_CLIENT = PTA_CANNONLAKE | PTA_AVX512VNNI
   | PTA_GFNI | PTA_VAES | PTA_AVX512VBMI2 | PTA_VPCLMULQDQ | PTA_AVX512BITALG
   | PTA_RDPID | PTA_AVX512VPOPCNTDQ;
@@ -2425,14 +2465,16 @@ constexpr wide_int_bitmask PTA_GOLDMONT_PLUS = PTA_GOLDMONT | PTA_RDPID
   | PTA_SGX | PTA_PTWRITE;
 constexpr wide_int_bitmask PTA_TREMONT = PTA_GOLDMONT_PLUS | PTA_CLWB
   | PTA_GFNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_CLDEMOTE | PTA_WAITPKG;
-constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX
+constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_GOLDMONT_PLUS | PTA_CLWB
+  | PTA_GFNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_WAITPKG | PTA_ADX | PTA_AVX
   | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_LZCNT
   | PTA_PCONFIG | PTA_PKU | PTA_VAES | PTA_VPCLMULQDQ | PTA_SERIALIZE
   | PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI;
-constexpr wide_int_bitmask PTA_SIERRAFOREST = PTA_ALDERLAKE | PTA_AVXIFMA
-  | PTA_AVXVNNIINT8 | PTA_AVXNECONVERT | PTA_CMPCCXADD | PTA_ENQCMD | PTA_UINTR;
+constexpr wide_int_bitmask PTA_SIERRAFOREST = PTA_ALDERLAKE | PTA_CLDEMOTE
+  | PTA_AVXIFMA | PTA_AVXVNNIINT8 | PTA_AVXNECONVERT | PTA_CMPCCXADD
+  | PTA_ENQCMD | PTA_UINTR;
 constexpr wide_int_bitmask PTA_GRANITERAPIDS = PTA_SAPPHIRERAPIDS | PTA_AMX_FP16
-  | PTA_PREFETCHI;
+  | PTA_PREFETCHI | PTA_AVX10_1;
 constexpr wide_int_bitmask PTA_GRANITERAPIDS_D = PTA_GRANITERAPIDS
   | PTA_AMX_COMPLEX;
 constexpr wide_int_bitmask PTA_GRANDRIDGE = PTA_SIERRAFOREST;
@@ -2440,25 +2482,21 @@ constexpr wide_int_bitmask PTA_ARROWLAKE = PTA_ALDERLAKE | PTA_AVXIFMA
   | PTA_AVXVNNIINT8 | PTA_AVXNECONVERT | PTA_CMPCCXADD | PTA_UINTR;
 constexpr wide_int_bitmask PTA_ARROWLAKE_S = PTA_ARROWLAKE | PTA_AVXVNNIINT16
   | PTA_SHA512 | PTA_SM3 | PTA_SM4;
-constexpr wide_int_bitmask PTA_CLEARWATERFOREST = PTA_SIERRAFOREST
-  | PTA_AVXVNNIINT16 | PTA_SHA512 | PTA_SM3 | PTA_SM4 | PTA_USER_MSR
-  | PTA_PREFETCHI;
-constexpr wide_int_bitmask PTA_PANTHERLAKE = PTA_ARROWLAKE_S | PTA_PREFETCHI;
-constexpr wide_int_bitmask PTA_DIAMONDRAPIDS = PTA_SKYLAKE | PTA_PKU | PTA_SHA
-  | PTA_GFNI | PTA_VAES | PTA_VPCLMULQDQ | PTA_RDPID | PTA_PCONFIG
-  | PTA_WBNOINVD | PTA_CLWB | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_ENQCMD
-  | PTA_CLDEMOTE | PTA_PTWRITE | PTA_WAITPKG | PTA_SERIALIZE | PTA_TSXLDTRK
-  | PTA_AMX_TILE | PTA_AMX_INT8 | PTA_AMX_BF16 | PTA_UINTR | PTA_AVXVNNI
-  | PTA_AMX_FP16 | PTA_PREFETCHI | PTA_AMX_COMPLEX | PTA_AVX10_1_256
-  | PTA_AVX10_1 | PTA_AVXIFMA | PTA_AVXNECONVERT | PTA_AVXVNNIINT16
-  | PTA_AVXVNNIINT8 | PTA_CMPCCXADD | PTA_SHA512 | PTA_SM3 | PTA_SM4
-  | PTA_AVX10_2 | PTA_APX_F | PTA_AMX_AVX512 | PTA_AMX_FP8 | PTA_AMX_TF32
-  | PTA_AMX_TRANSPOSE | PTA_MOVRS | PTA_AMX_MOVRS | PTA_USER_MSR;
+constexpr wide_int_bitmask PTA_CLEARWATERFOREST =
+  (PTA_SIERRAFOREST & (~(PTA_KL | PTA_WIDEKL))) | PTA_AVXVNNIINT16 | PTA_SHA512
+  | PTA_SM3 | PTA_SM4 | PTA_USER_MSR | PTA_PREFETCHI;
+constexpr wide_int_bitmask PTA_PANTHERLAKE =
+  (PTA_ARROWLAKE_S & (~(PTA_KL | PTA_WIDEKL))) | PTA_PREFETCHI;
+constexpr wide_int_bitmask PTA_DIAMONDRAPIDS = PTA_GRANITERAPIDS_D
+  | PTA_AVXIFMA | PTA_AVXNECONVERT | PTA_AVXVNNIINT16 | PTA_AVXVNNIINT8
+  | PTA_CMPCCXADD | PTA_SHA512 | PTA_SM3 | PTA_SM4 | PTA_AVX10_2
+  | PTA_APX_F | PTA_AMX_AVX512 | PTA_AMX_FP8 | PTA_AMX_TF32 | PTA_AMX_TRANSPOSE
+  | PTA_MOVRS | PTA_AMX_MOVRS | PTA_USER_MSR;
 
 constexpr wide_int_bitmask PTA_BDVER1 = PTA_64BIT | PTA_MMX | PTA_SSE
-  | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3
-  | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
-  | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE;
+  | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_POPCNT | PTA_LZCNT
+  | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL
+  | PTA_AVX | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE;
 constexpr wide_int_bitmask PTA_BDVER2 = PTA_BDVER1 | PTA_BMI | PTA_TBM
   | PTA_F16C | PTA_FMA;
 constexpr wide_int_bitmask PTA_BDVER3 = PTA_BDVER2 | PTA_XSAVEOPT
@@ -2466,13 +2504,13 @@ constexpr wide_int_bitmask PTA_BDVER3 = PTA_BDVER2 | PTA_XSAVEOPT
 constexpr wide_int_bitmask PTA_BDVER4 = PTA_BDVER3 | PTA_AVX2 | PTA_BMI2
   | PTA_RDRND | PTA_MOVBE | PTA_MWAITX;
 
-constexpr wide_int_bitmask PTA_ZNVER1 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
-  | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
-  | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2 | PTA_BMI | PTA_BMI2
-  | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT
-  | PTA_FSGSBASE | PTA_RDRND | PTA_MOVBE | PTA_MWAITX | PTA_ADX | PTA_RDSEED
-  | PTA_CLZERO | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES | PTA_SHA | PTA_LZCNT
-  | PTA_POPCNT;
+constexpr wide_int_bitmask PTA_ZNVER1 = PTA_64BIT | PTA_MMX | PTA_SSE
+  | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 |  PTA_POPCNT | PTA_LZCNT
+  | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL
+  | PTA_AVX | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_PRFCHW
+  | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND | PTA_MOVBE
+  | PTA_MWAITX | PTA_ADX | PTA_RDSEED | PTA_CLZERO | PTA_CLFLUSHOPT
+  | PTA_XSAVEC | PTA_XSAVES | PTA_SHA;
 constexpr wide_int_bitmask PTA_ZNVER2 = PTA_ZNVER1 | PTA_CLWB | PTA_RDPID
   | PTA_WBNOINVD;
 constexpr wide_int_bitmask PTA_ZNVER3 = PTA_ZNVER2 | PTA_VAES | PTA_VPCLMULQDQ
@@ -2480,24 +2518,24 @@ constexpr wide_int_bitmask PTA_ZNVER3 = PTA_ZNVER2 | PTA_VAES | PTA_VPCLMULQDQ
 constexpr wide_int_bitmask PTA_ZNVER4 = PTA_ZNVER3 | PTA_AVX512F | PTA_AVX512DQ
   | PTA_AVX512IFMA | PTA_AVX512CD | PTA_AVX512BW | PTA_AVX512VL
   | PTA_AVX512BF16 | PTA_AVX512VBMI | PTA_AVX512VBMI2 | PTA_GFNI
-  | PTA_AVX512VNNI | PTA_AVX512BITALG | PTA_AVX512VPOPCNTDQ | PTA_EVEX512;
+  | PTA_AVX512VNNI | PTA_AVX512BITALG | PTA_AVX512VPOPCNTDQ;
 constexpr wide_int_bitmask PTA_ZNVER5 = PTA_ZNVER4 | PTA_AVXVNNI
   | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_AVX512VP2INTERSECT | PTA_PREFETCHI;
 
 constexpr wide_int_bitmask PTA_BTVER1 = PTA_64BIT | PTA_MMX | PTA_SSE
-  | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 | PTA_SSE4A | PTA_ABM | PTA_CX16
-  | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE;
+  | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 | PTA_SSE4A | PTA_LZCNT | PTA_POPCNT
+  | PTA_ABM | PTA_CX16 | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE;
 constexpr wide_int_bitmask PTA_BTVER2 = PTA_BTVER1 | PTA_SSE4_1 | PTA_SSE4_2
   | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_BMI | PTA_F16C | PTA_MOVBE
   | PTA_XSAVEOPT;
 
 constexpr wide_int_bitmask PTA_LUJIAZUI = PTA_64BIT | PTA_MMX | PTA_SSE
-  | PTA_SSE2 | PTA_SSE3 | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
-  | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_BMI | PTA_BMI2 | PTA_PRFCHW
-  | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND | PTA_MOVBE
-  | PTA_ADX | PTA_RDSEED | PTA_POPCNT;
+  | PTA_SSE2 | PTA_SSE3 | PTA_CX16 | PTA_LZCNT | PTA_POPCNT | PTA_ABM
+  | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_BMI
+  | PTA_BMI2 | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE
+  | PTA_RDRND | PTA_MOVBE | PTA_ADX | PTA_RDSEED;
 constexpr wide_int_bitmask PTA_YONGFENG = PTA_LUJIAZUI | PTA_AVX | PTA_AVX2
-  | PTA_F16C | PTA_FMA | PTA_SHA | PTA_LZCNT;
+  | PTA_F16C | PTA_FMA | PTA_SHA;
 
 #ifndef GENERATOR_FILE
 
@@ -2782,11 +2820,13 @@ enum call_saved_registers_type
      or "no_caller_saved_registers" attribute.  */
   TYPE_NO_CALLER_SAVED_REGISTERS,
   /* The current function is a function specified with the
-     "no_callee_saved_registers" attribute.  */
+     "no_callee_saved_registers" attribute or a function specified with
+     the "noreturn" attribute when compiled with
+     "-mnoreturn-no-callee-saved-registers".  */
   TYPE_NO_CALLEE_SAVED_REGISTERS,
-  /* The current function is a function specified with the "noreturn"
-     attribute.  */
-  TYPE_NO_CALLEE_SAVED_REGISTERS_EXCEPT_BP,
+  /* The current function is a function specified with the
+     "preserve_none" attribute.  */
+  TYPE_PRESERVE_NONE,
 };
 
 enum queued_insn_type
@@ -2805,6 +2845,10 @@ struct GTY(()) machine_function {
   /* Cached initial frame layout for the current function.  */
   struct ix86_frame frame;
 
+  /* The components already handled by separate shrink-wrapping, which should
+     not be considered by the prologue and epilogue.  */
+  bool reg_is_wrapped_separately[FIRST_PSEUDO_REGISTER];
+
   /* For -fsplit-stack support: A stack local which holds a pointer to
      the stack arguments for a function with a variable number of
      arguments.  This is set at the start of the function and is used
@@ -2838,6 +2882,9 @@ struct GTY(()) machine_function {
      approximation.  */
   BOOL_BITFIELD tls_descriptor_call_expanded_p : 1;
 
+  /* True if TLS descriptor is called more than once.  */
+  BOOL_BITFIELD tls_descriptor_call_multiple_p : 1;
+
   /* If true, the current function has a STATIC_CHAIN is placed on the
      stack below the return address.  */
   BOOL_BITFIELD static_chain_on_stack : 1;
@@ -2859,7 +2906,7 @@ struct GTY(()) machine_function {
   ENUM_BITFIELD(indirect_branch) function_return_type : 3;
 
   /* Call saved registers type.  */
-  ENUM_BITFIELD(call_saved_registers_type) call_saved_registers : 2;
+  ENUM_BITFIELD(call_saved_registers_type) call_saved_registers : 3;
 
   /* If true, there is register available for argument passing.  This
      is used only in ix86_function_ok_for_sibcall by 32-bit to determine
@@ -2904,6 +2951,12 @@ struct GTY(()) machine_function {
   /* True if inline asm with redzone clobber has been seen.  */
   BOOL_BITFIELD asm_redzone_clobber_seen : 1;
 
+  /* True if this is a recursive function.  */
+  BOOL_BITFIELD recursive_function : 1;
+
+  /* True if by_pieces op is currently in use.  */
+  BOOL_BITFIELD by_pieces_in_use : 1;
+
   /* The largest alignment, in bytes, of stack slot actually used.  */
   unsigned int max_used_stack_alignment;
 
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index d6b2f29..cea6c15 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -58,10 +58,11 @@
 ;; H -- print a memory address offset by 8; used for sse high-parts
 ;; K -- print HLE lock prefix
 ;; Y -- print condition for XOP pcom* instruction.
+;; v -- print segment override prefix
 ;; + -- print a branch hint as 'cs' or 'ds' prefix
 ;; ; -- print a semicolon (after prefixes due to bug in older gas).
 ;; ~ -- print "i" if TARGET_AVX2, "f" otherwise.
-;; ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
+;; ^ -- print addr32 prefix if Pmode != word_mode
 ;; ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
 
 (define_c_enum "unspec" [
@@ -79,6 +80,7 @@
   UNSPEC_MACHOPIC_OFFSET
   UNSPEC_PCREL
   UNSPEC_SIZEOF
+  UNSPEC_SECREL32
 
   ;; Prologue support
   UNSPEC_STACK_ALLOC
@@ -579,12 +581,11 @@
 (define_attr "isa" "base,x64,nox64,x64_sse2,x64_sse4,x64_sse4_noavx,
 		    x64_avx,x64_avx512bw,x64_avx512dq,apx_ndd,apx_ndd_64,
 		    sse_noavx,sse2,sse2_noavx,sse3,sse3_noavx,sse4,sse4_noavx,
-		    avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,avx512f_512,
-		    noavx512f,avx512bw,avx512bw_512,noavx512bw,avx512dq,
-		    noavx512dq,fma_or_avx512vl,avx512vl,noavx512vl,avxvnni,
-		    avx512vnnivl,avx512fp16,avxifma,avx512ifmavl,avxneconvert,
-		    avx512bf16vl,vpclmulqdqvl,avx_noavx512f,avx_noavx512vl,
-		    vaes_avx512vl,noapx_nf,avx10_2"
+		    avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,
+		    avx512bw,noavx512bw,avx512dq,noavx512dq,fma_or_avx512vl,
+		    avx512vl,noavx512vl,avxvnni,avx512vnnivl,avx512fp16,avxifma,
+		    avx512ifmavl,avxneconvert,avx512bf16vl,vpclmulqdqvl,
+		    avx_noavx512f,avx_noavx512vl,vaes_avx512vl,noapx_nf,avx10_2"
   (const_string "base"))
 
 ;; The (bounding maximum) length of an instruction immediate.
@@ -900,6 +901,10 @@
 (define_attr "avx_partial_xmm_update" "false,true"
   (const_string "false"))
 
+;; Define attribute to indicate 64-bit TLS insns.
+(define_attr "tls64" "gd,ld_base,call,combine,lea,none"
+  (const_string "none"))
+
 ;; Define attribute to classify add/sub insns that consumes carry flag (CF)
 (define_attr "use_carry" "0,1" (const_string "0"))
 
@@ -954,12 +959,8 @@
 	 (eq_attr "isa" "fma_or_avx512vl")
 	   (symbol_ref "TARGET_FMA || TARGET_AVX512VL")
 	 (eq_attr "isa" "avx512f") (symbol_ref "TARGET_AVX512F")
-	 (eq_attr "isa" "avx512f_512")
-	   (symbol_ref "TARGET_AVX512F && TARGET_EVEX512")
 	 (eq_attr "isa" "noavx512f") (symbol_ref "!TARGET_AVX512F")
 	 (eq_attr "isa" "avx512bw") (symbol_ref "TARGET_AVX512BW")
-	 (eq_attr "isa" "avx512bw_512")
-	   (symbol_ref "TARGET_AVX512BW && TARGET_EVEX512")
 	 (eq_attr "isa" "noavx512bw") (symbol_ref "!TARGET_AVX512BW")
 	 (eq_attr "isa" "avx512dq") (symbol_ref "TARGET_AVX512DQ")
 	 (eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ")
@@ -1495,7 +1496,7 @@
 		[(reg:CC FLAGS_REG) (const_int 0)])
 	       (label_ref (match_operand 3))
 	       (pc)))]
-  "TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256"
+  "TARGET_AVX512F && !TARGET_PREFER_AVX256"
 {
   ix86_expand_branch (GET_CODE (operands[0]),
 		      operands[1], operands[2], operands[3]);
@@ -1602,15 +1603,27 @@
   [(set_attr "type" "icmp")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*cmp<mode>_plus_1"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (plus:SWI (match_operand:SWI 0 "nonimmediate_operand" "<r>m")
+		    (match_operand:SWI 1 "x86_64_neg_const_int_operand" "n"))
+	  (const_int 0)))]
+  "ix86_match_ccmode (insn, CCGOCmode)"
+{
+  operands[1] = gen_int_mode (-INTVAL (operands[1]), <MODE>mode);
+  return "cmp{<imodesuffix>}\t{%1, %0|%0, %1}";
+}
+  [(set_attr "type" "icmp")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*cmpqi_ext<mode>_1"
   [(set (reg FLAGS_REG)
 	(compare
 	  (match_operand:QI 0 "nonimmediate_operand" "QBn")
 	  (subreg:QI
-	    (match_operator:SWI248 2 "extract_operator"
-	      [(match_operand 1 "int248_register_operand" "Q")
-	       (const_int 8)
-	       (const_int 8)]) 0)))]
+	    (match_operator:SWI248 2 "extract_high_operator"
+	      [(match_operand 1 "int248_register_operand" "Q")]) 0)))]
   "ix86_match_ccmode (insn, CCmode)"
   "cmp{b}\t{%h1, %0|%0, %h1}"
   [(set_attr "addr" "gpr8")
@@ -1621,10 +1634,8 @@
   [(set (reg FLAGS_REG)
 	(compare
 	  (subreg:QI
-	    (match_operator:SWI248 2 "extract_operator"
-	      [(match_operand 0 "int248_register_operand" "Q")
-	       (const_int 8)
-	       (const_int 8)]) 0)
+	    (match_operator:SWI248 2 "extract_high_operator"
+	      [(match_operand 0 "int248_register_operand" "Q")]) 0)
 	  (match_operand:QI 1 "const0_operand")))]
   "ix86_match_ccmode (insn, CCNOmode)"
   "test{b}\t%h0, %h0"
@@ -1646,10 +1657,8 @@
   [(set (reg FLAGS_REG)
 	(compare
 	  (subreg:QI
-	    (match_operator:SWI248 2 "extract_operator"
-	      [(match_operand 0 "int248_register_operand" "Q")
-	       (const_int 8)
-	       (const_int 8)]) 0)
+	    (match_operator:SWI248 2 "extract_high_operator"
+	      [(match_operand 0 "int248_register_operand" "Q")]) 0)
 	  (match_operand:QI 1 "general_operand" "QnBn")))]
   "ix86_match_ccmode (insn, CCmode)"
   "cmp{b}\t{%1, %h0|%h0, %1}"
@@ -1661,15 +1670,11 @@
   [(set (reg FLAGS_REG)
 	(compare
 	  (subreg:QI
-	    (match_operator:SWI248 2 "extract_operator"
-	      [(match_operand 0 "int248_register_operand" "Q")
-	       (const_int 8)
-	       (const_int 8)]) 0)
+	    (match_operator:SWI248 2 "extract_high_operator"
+	      [(match_operand 0 "int248_register_operand" "Q")]) 0)
 	  (subreg:QI
-	    (match_operator:SWI248 3 "extract_operator"
-	      [(match_operand 1 "int248_register_operand" "Q")
-	       (const_int 8)
-	       (const_int 8)]) 0)))]
+	    (match_operator:SWI248 3 "extract_high_operator"
+	      [(match_operand 1 "int248_register_operand" "Q")]) 0)))]
   "ix86_match_ccmode (insn, CCmode)"
   "cmp{b}\t{%h1, %h0|%h0, %h1}"
   [(set_attr "type" "icmp")
@@ -2374,7 +2379,7 @@
 (define_expand "movxi"
   [(set (match_operand:XI 0 "nonimmediate_operand")
 	(match_operand:XI 1 "general_operand"))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "ix86_expand_vector_move (XImode, operands); DONE;")
 
 (define_expand "movoi"
@@ -2427,22 +2432,32 @@
    (set_attr "mode" "SI")
    (set_attr "length_immediate" "0")])
 
-(define_insn "*mov<mode>_and"
+;; Generate shorter "and $0,mem" for -Oz.  Split it to "mov $0,mem"
+;; otherwise.
+(define_insn_and_split "*mov<mode>_and"
   [(set (match_operand:SWI248 0 "memory_operand" "=m")
 	(match_operand:SWI248 1 "const0_operand"))
    (clobber (reg:CC FLAGS_REG))]
   "reload_completed"
   "and{<imodesuffix>}\t{%1, %0|%0, %1}"
+  "&& !(optimize_insn_for_size_p () && optimize_size > 1)"
+  [(set (match_dup 0) (match_dup 1))]
+  ""
   [(set_attr "type" "alu1")
    (set_attr "mode" "<MODE>")
    (set_attr "length_immediate" "1")])
 
-(define_insn "*mov<mode>_or"
+;; Generate shorter "or $-1,mem" for -Oz.  Split it to "mov $-1,mem"
+;; otherwise.
+(define_insn_and_split "*mov<mode>_or"
   [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm")
 	(match_operand:SWI248 1 "constm1_operand"))
    (clobber (reg:CC FLAGS_REG))]
   "reload_completed"
   "or{<imodesuffix>}\t{%1, %0|%0, %1}"
+  "&& !(optimize_insn_for_size_p () && optimize_size > 1)"
+  [(set (match_dup 0) (match_dup 1))]
+  ""
   [(set_attr "type" "alu1")
    (set_attr "mode" "<MODE>")
    (set_attr "length_immediate" "1")])
@@ -2450,7 +2465,7 @@
 (define_insn "*movxi_internal_avx512f"
   [(set (match_operand:XI 0 "nonimmediate_operand"		"=v,v ,v ,m")
 	(match_operand:XI 1 "nonimmediate_or_sse_const_operand" " C,BC,vm,v"))]
-  "TARGET_AVX512F && TARGET_EVEX512
+  "TARGET_AVX512F
    && (register_operand (operands[0], XImode)
        || register_operand (operands[1], XImode))"
 {
@@ -2947,6 +2962,8 @@
 	(match_operand:SWI248 1 "const_int_operand"))]
   "optimize_insn_for_size_p () && optimize_size > 1
    && operands[1] != const0_rtx
+   && (operands[1] != constm1_rtx
+       || (<MODE>mode == DImode && LEGACY_INT_REG_P (operands[0])))
    && IN_RANGE (INTVAL (operands[1]), -128, 127)
    && !ix86_red_zone_used
    && REGNO (operands[0]) != SP_REG"
@@ -3457,10 +3474,8 @@
   [(set (strict_low_part
 	  (match_operand:QI 0 "register_operand" "+Q"))
 	  (subreg:QI
-	    (match_operator:SWI248 2 "extract_operator"
-	      [(match_operand 1 "int248_register_operand" "Q")
-	       (const_int 8)
-	       (const_int 8)]) 0))]
+	    (match_operator:SWI248 2 "extract_high_operator"
+	      [(match_operand 1 "int248_register_operand" "Q")]) 0))]
   "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
   "mov{b}\t{%h1, %0|%0, %h1}"
   [(set_attr "type" "imov")
@@ -3543,10 +3558,8 @@
 (define_insn "*extzvqi"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=QBn,?R")
 	(subreg:QI
-	  (match_operator:SWI248 2 "extract_operator"
-	    [(match_operand 1 "int248_register_operand" "Q,Q")
-	     (const_int 8)
-	     (const_int 8)]) 0))]
+	  (match_operator:SWI248 2 "extract_high_operator"
+	    [(match_operand 1 "int248_register_operand" "Q,Q")]) 0))]
   ""
 {
   switch (get_attr_type (insn))
@@ -3667,10 +3680,8 @@
 	  (match_operand 0 "int248_register_operand" "+Q")
 	  (const_int 8)
 	  (const_int 8))
-	(match_operator:SWI248 2 "extract_operator"
-	  [(match_operand 1 "int248_register_operand" "Q")
-	   (const_int 8)
-	   (const_int 8)]))]
+	(match_operator:SWI248 2 "extract_high_operator"
+	  [(match_operand 1 "int248_register_operand" "Q")]))]
   ""
   "mov{b}\t{%h1, %h0|%h0, %h1}"
   [(set_attr "type" "imov")
@@ -4414,7 +4425,7 @@
 	       (eq_attr "alternative" "11")
 		 (const_string "DI")
 	       (eq_attr "alternative" "5")
-		 (cond [(and (match_test "TARGET_AVX512F && TARGET_EVEX512")
+		 (cond [(and (match_test "TARGET_AVX512F")
 			     (not (match_test "TARGET_PREFER_AVX256")))
 			  (const_string "V16SF")
 			(match_test "TARGET_AVX")
@@ -5237,10 +5248,8 @@
   [(set (match_operand:SWI24 0 "register_operand" "=R")
 	(sign_extend:SWI24
 	  (subreg:QI
-	    (match_operator:SWI248 2 "extract_operator"
-	      [(match_operand 1 "int248_register_operand" "Q")
-	       (const_int 8)
-	       (const_int 8)]) 0)))]
+	    (match_operator:SWI248 2 "extract_high_operator"
+	      [(match_operand 1 "int248_register_operand" "Q")]) 0)))]
   ""
   "movs{b<SWI24:imodesuffix>|x}\t{%h1, %0|%0, %h1}"
    [(set_attr "type" "imovx")
@@ -5482,7 +5491,7 @@
    (set_attr "memory" "none")
    (set (attr "enabled")
      (if_then_else (eq_attr "alternative" "2")
-       (symbol_ref "TARGET_AVX512F && TARGET_EVEX512
+       (symbol_ref "TARGET_AVX512F
 		    && !TARGET_AVX512VL && !TARGET_PREFER_AVX256")
        (const_string "*")))])
 
@@ -5704,7 +5713,7 @@
 
 /* vcvtneps2bf16 doesn't honor SNAN, and turn sNAN into qNAN quietly,
    and it always round to even.
-   flag_unsafte_math_optimization is needed for psrld.
+   flag_unsafe_math_optimization is needed for psrld.
    If we don't expect qNaNs nor sNaNs and can assume rounding
    to nearest, we can expand the conversion inline as
    (fromi + 0x7fff + ((fromi >> 16) & 1)) >> 16.  */
@@ -6986,10 +6995,8 @@
   [(set (strict_low_part (match_operand:QI 0 "register_operand" "+Q,&Q"))
 	(plus:QI
 	  (subreg:QI
-	    (match_operator:SWI248 3 "extract_operator"
-	      [(match_operand 2 "int248_register_operand" "Q,Q")
-	       (const_int 8)
-	       (const_int 8)]) 0)
+	    (match_operator:SWI248 3 "extract_high_operator"
+	      [(match_operand 2 "int248_register_operand" "Q,Q")]) 0)
 	  (match_operand:QI 1 "nonimmediate_operand" "0,!qm")))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
@@ -7003,8 +7010,8 @@
      [(set (strict_low_part (match_dup 0))
 	   (plus:QI
 	     (subreg:QI
-	       (match_op_dup 3
-		 [(match_dup 2) (const_int 8) (const_int 8)]) 0)
+	       (zero_extract:SWI248
+		 (match_dup 2) (const_int 8) (const_int 8)) 0)
 	     (match_dup 0)))
       (clobber (reg:CC FLAGS_REG))])]
   ""
@@ -7015,29 +7022,25 @@
   [(set (strict_low_part (match_operand:QI 0 "register_operand" "+&Q"))
 	(plus:QI
 	  (subreg:QI
-	    (match_operator:SWI248 3 "extract_operator"
-	      [(match_operand 1 "int248_register_operand" "Q")
-	       (const_int 8)
-	       (const_int 8)]) 0)
+	    (match_operator:SWI248 3 "extract_high_operator"
+	      [(match_operand 1 "int248_register_operand" "Q")]) 0)
 	  (subreg:QI
-	    (match_operator:SWI248 4 "extract_operator"
-	      [(match_operand 2 "int248_register_operand" "Q")
-	       (const_int 8)
-	       (const_int 8)]) 0)))
+	    (match_operator:SWI248 4 "extract_high_operator"
+	      [(match_operand 2 "int248_register_operand" "Q")]) 0)))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
   "#"
   "&& reload_completed"
   [(set (strict_low_part (match_dup 0))
 	(subreg:QI
-	  (match_op_dup 4
-	    [(match_dup 2) (const_int 8) (const_int 8)]) 0))
+	  (zero_extract:SWI248
+	    (match_dup 2) (const_int 8) (const_int 8)) 0))
    (parallel
      [(set (strict_low_part (match_dup 0))
 	   (plus:QI
 	     (subreg:QI
-	       (match_op_dup 3
-		 [(match_dup 1) (const_int 8) (const_int 8)]) 0)
+	       (zero_extract:SWI248
+		 (match_dup 1) (const_int 8) (const_int 8)) 0)
 	     (match_dup 0)))
       (clobber (reg:CC FLAGS_REG))])]
   ""
@@ -7452,10 +7455,8 @@
   [(set (match_operand:QI 0 "nonimmediate_operand" "=QBn")
 	(plus:QI
 	  (subreg:QI
-	    (match_operator:SWI248 3 "extract_operator"
-	      [(match_operand 2 "int248_register_operand" "Q")
-	       (const_int 8)
-	       (const_int 8)]) 0)
+	    (match_operator:SWI248 3 "extract_high_operator"
+	      [(match_operand 2 "int248_register_operand" "Q")]) 0)
 	  (match_operand:QI 1 "nonimmediate_operand" "0")))
    (clobber (reg:CC FLAGS_REG))]
   ""
@@ -7468,29 +7469,25 @@
   [(set (match_operand:QI 0 "register_operand" "=&Q")
 	(plus:QI
 	  (subreg:QI
-	    (match_operator:SWI248 3 "extract_operator"
-	      [(match_operand 1 "int248_register_operand" "Q")
-	       (const_int 8)
-	       (const_int 8)]) 0)
+	    (match_operator:SWI248 3 "extract_high_operator"
+	      [(match_operand 1 "int248_register_operand" "Q")]) 0)
 	  (subreg:QI
-	    (match_operator:SWI248 4 "extract_operator"
-	      [(match_operand 2 "int248_register_operand" "Q")
-	       (const_int 8)
-	       (const_int 8)]) 0)))
+	    (match_operator:SWI248 4 "extract_high_operator"
+	      [(match_operand 2 "int248_register_operand" "Q")]) 0)))
    (clobber (reg:CC FLAGS_REG))]
   ""
   "#"
   "&& reload_completed"
   [(set (match_dup 0)
 	(subreg:QI
-	  (match_op_dup 4
-	    [(match_dup 2) (const_int 8) (const_int 8)]) 0))
+	  (zero_extract:SWI248
+	    (match_dup 2) (const_int 8) (const_int 8)) 0))
    (parallel
      [(set (match_dup 0)
 	   (plus:QI
 	     (subreg:QI
-	       (match_op_dup 3
-		 [(match_dup 1) (const_int 8) (const_int 8)]) 0)
+	       (zero_extract:SWI248
+		 (match_dup 1) (const_int 8) (const_int 8)) 0)
 	   (match_dup 0)))
       (clobber (reg:CC FLAGS_REG))])]
   ""
@@ -7520,10 +7517,8 @@
 	(subreg:SWI248
 	  (plus:QI
 	    (subreg:QI
-	      (match_operator:SWI248 3 "extract_operator"
-		[(match_operand 1 "int248_register_operand" "0,!Q")
-		 (const_int 8)
-		 (const_int 8)]) 0)
+	      (match_operator:SWI248 3 "extract_high_operator"
+		[(match_operand 1 "int248_register_operand" "0,!Q")]) 0)
 	    (match_operand:QI 2 "general_operand" "QnBn,QnBn")) 0))
    (clobber (reg:CC FLAGS_REG))]
   ""
@@ -7558,8 +7553,8 @@
 	   (subreg:SWI248
 	     (plus:QI
 	       (subreg:QI
-		 (match_op_dup 3
-		   [(match_dup 0) (const_int 8) (const_int 8)]) 0)
+		 (zero_extract:SWI248
+		   (match_dup 0) (const_int 8) (const_int 8)) 0)
 	       (match_dup 2)) 0))
       (clobber (reg:CC FLAGS_REG))])]
   ""
@@ -7579,15 +7574,11 @@
 	(subreg:SWI248
 	  (plusminus:QI
 	    (subreg:QI
-	      (match_operator:SWI248 3 "extract_operator"
-		[(match_operand 1 "int248_register_operand" "<comm>0,!Q")
-		 (const_int 8)
-		 (const_int 8)]) 0)
+	      (match_operator:SWI248 3 "extract_high_operator"
+		[(match_operand 1 "int248_register_operand" "<comm>0,!Q")]) 0)
 	    (subreg:QI
-	      (match_operator:SWI248 4 "extract_operator"
-		[(match_operand 2 "int248_register_operand" "Q,Q")
-		 (const_int 8)
-		 (const_int 8)]) 0)) 0))
+	      (match_operator:SWI248 4 "extract_high_operator"
+		[(match_operand 2 "int248_register_operand" "Q,Q")]) 0)) 0))
    (clobber (reg:CC FLAGS_REG))]
   ""
   "@
@@ -7606,11 +7597,11 @@
 	   (subreg:SWI248
 	     (plusminus:QI
 	       (subreg:QI
-		 (match_op_dup 3
-		   [(match_dup 0) (const_int 8) (const_int 8)]) 0)
+		 (zero_extract:SWI248
+		   (match_dup 0) (const_int 8) (const_int 8)) 0)
 	       (subreg:QI
-		 (match_op_dup 4
-		   [(match_dup 2) (const_int 8) (const_int 8)]) 0)) 0))
+		 (zero_extract:SWI248
+		   (match_dup 2) (const_int 8) (const_int 8)) 0)) 0))
       (clobber (reg:CC FLAGS_REG))])]
   ""
   [(set_attr "type" "alu")
@@ -8207,10 +8198,8 @@
 	(minus:QI
 	  (match_operand:QI 1 "nonimmediate_operand" "0,!qm")
 	  (subreg:QI
-	    (match_operator:SWI248 3 "extract_operator"
-	      [(match_operand 2 "int248_register_operand" "Q,Q")
-	       (const_int 8)
-	       (const_int 8)]) 0)))
+	    (match_operator:SWI248 3 "extract_high_operator"
+	      [(match_operand 2 "int248_register_operand" "Q,Q")]) 0)))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
   "@
@@ -8224,8 +8213,8 @@
 	   (minus:QI
 	     (match_dup 0)
 	     (subreg:QI
-	       (match_op_dup 3
-		 [(match_dup 2) (const_int 8) (const_int 8)]) 0)))
+	       (zero_extract:SWI248
+		 (match_dup 2) (const_int 8) (const_int 8)) 0)))
       (clobber (reg:CC FLAGS_REG))])]
   ""
   [(set_attr "type" "alu")
@@ -8235,30 +8224,26 @@
   [(set (strict_low_part (match_operand:QI 0 "register_operand" "+&Q"))
 	(minus:QI
 	  (subreg:QI
-	    (match_operator:SWI248 3 "extract_operator"
-	      [(match_operand 1 "int248_register_operand" "Q")
-	       (const_int 8)
-	       (const_int 8)]) 0)
+	    (match_operator:SWI248 3 "extract_high_operator"
+	      [(match_operand 1 "int248_register_operand" "Q")]) 0)
 	  (subreg:QI
-	    (match_operator:SWI248 4 "extract_operator"
-	      [(match_operand 2 "int248_register_operand" "Q")
-	       (const_int 8)
-	       (const_int 8)]) 0)))
+	    (match_operator:SWI248 4 "extract_high_operator"
+	      [(match_operand 2 "int248_register_operand" "Q")]) 0)))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
   "#"
   "&& reload_completed"
   [(set (strict_low_part (match_dup 0))
 	(subreg:QI
-	  (match_op_dup 3
-	    [(match_dup 1) (const_int 8) (const_int 8)]) 0))
+	  (zero_extract:SWI248
+	    (match_dup 1) (const_int 8) (const_int 8)) 0))
    (parallel
      [(set (strict_low_part (match_dup 0))
 	   (minus:QI
 	   (match_dup 0)
 	     (subreg:QI
-	       (match_op_dup 4
-		 [(match_dup 2) (const_int 8) (const_int 8)]) 0)))
+	       (zero_extract:SWI248
+		 (match_dup 2) (const_int 8) (const_int 8)) 0)))
       (clobber (reg:CC FLAGS_REG))])]
   ""
   [(set_attr "type" "alu")
@@ -8309,10 +8294,8 @@
 	(minus:QI
 	  (match_operand:QI 1 "nonimmediate_operand" "0")
 	  (subreg:QI
-	    (match_operator:SWI248 3 "extract_operator"
-	      [(match_operand 2 "int248_register_operand" "Q")
-	       (const_int 8)
-	       (const_int 8)]) 0)))
+	    (match_operator:SWI248 3 "extract_high_operator"
+	      [(match_operand 2 "int248_register_operand" "Q")]) 0)))
    (clobber (reg:CC FLAGS_REG))]
   ""
   "sub{b}\t{%h2, %0|%0, %h2}"
@@ -8324,30 +8307,26 @@
   [(set (match_operand:QI 0 "register_operand" "=&Q")
 	(minus:QI
 	  (subreg:QI
-	    (match_operator:SWI248 3 "extract_operator"
-	      [(match_operand 1 "int248_register_operand" "Q")
-	       (const_int 8)
-	       (const_int 8)]) 0)
+	    (match_operator:SWI248 3 "extract_high_operator"
+	      [(match_operand 1 "int248_register_operand" "Q")]) 0)
 	  (subreg:QI
-	    (match_operator:SWI248 4 "extract_operator"
-	      [(match_operand 2 "int248_register_operand" "Q")
-	       (const_int 8)
-	       (const_int 8)]) 0)))
+	    (match_operator:SWI248 4 "extract_high_operator"
+	      [(match_operand 2 "int248_register_operand" "Q")]) 0)))
    (clobber (reg:CC FLAGS_REG))]
   ""
   "#"
   "&& reload_completed"
   [(set (match_dup 0)
 	(subreg:QI
-	  (match_op_dup 3
-	    [(match_dup 1) (const_int 8) (const_int 8)]) 0))
+	  (zero_extract:SWI248
+	    (match_dup 1) (const_int 8) (const_int 8)) 0))
    (parallel
      [(set (match_dup 0)
 	   (minus:QI
 	     (match_dup 0)
 	     (subreg:QI
-	       (match_op_dup 4
-		 [(match_dup 2) (const_int 8) (const_int 8)]) 0)))
+	       (zero_extract:SWI248
+		 (match_dup 2) (const_int 8) (const_int 8)) 0)))
       (clobber (reg:CC FLAGS_REG))])]
   ""
   [(set_attr "type" "alu")
@@ -8362,10 +8341,8 @@
 	(subreg:SWI248
 	  (minus:QI
 	    (subreg:QI
-	      (match_operator:SWI248 3 "extract_operator"
-		[(match_operand 1 "int248_register_operand" "0,!Q")
-		 (const_int 8)
-		 (const_int 8)]) 0)
+	      (match_operator:SWI248 3 "extract_high_operator"
+		[(match_operand 1 "int248_register_operand" "0,!Q")]) 0)
 	    (match_operand:QI 2 "general_operand" "QnBn,QnBn")) 0))
    (clobber (reg:CC FLAGS_REG))]
   ""
@@ -8384,8 +8361,8 @@
 	   (subreg:SWI248
 	     (minus:QI
 	       (subreg:QI
-		 (match_op_dup 3
-		   [(match_dup 0) (const_int 8) (const_int 8)]) 0)
+		 (zero_extract:SWI248
+		   (match_dup 0) (const_int 8) (const_int 8)) 0)
 	       (match_dup 2)) 0))
       (clobber (reg:CC FLAGS_REG))])]
   ""
@@ -8708,6 +8685,34 @@
 	      (set (match_dup 1)
 		   (minus:SWI (match_dup 1) (match_dup 0)))])])
 
+;; Under APX NDD, 'sub reg, mem, reg' is valid.
+;; New format for
+;; mov reg0, mem1
+;; sub reg0, mem2, reg0
+;; mov mem2, reg0
+;; to
+;; mov reg0, mem1
+;; sub mem2, reg0
+(define_peephole2
+  [(set (match_operand:SWI 0 "general_reg_operand")
+	(match_operand:SWI 1 "memory_operand"))
+   (parallel [(set (reg:CC FLAGS_REG)
+		   (compare:CC (match_operand:SWI 2 "memory_operand")
+			       (match_dup 0)))
+	      (set (match_dup 0)
+		   (minus:SWI (match_dup 2) (match_dup 0)))])
+   (set (match_dup 2) (match_dup 0))]
+  "TARGET_APX_NDD
+   && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && peep2_reg_dead_p (3, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])"
+  [(set (match_dup 0) (match_dup 1))
+   (parallel [(set (reg:CC FLAGS_REG)
+		   (compare:CC (match_dup 2) (match_dup 0)))
+	      (set (match_dup 2)
+		   (minus:SWI (match_dup 2) (match_dup 0)))])])
+
 ;; decl %eax; cmpl $-1, %eax; jne .Lxx; can be optimized into
 ;; subl $1, %eax; jnc .Lxx;
 (define_peephole2
@@ -9155,6 +9160,118 @@
 					   (match_dup 1))
 			       (match_dup 0)))])])
 
+;; Under APX NDD, 'adc reg, mem, reg' is valid.
+;;
+;; New format for
+;; mov reg0, mem1
+;; adc reg0, mem2, reg0
+;; mov mem1, reg0
+;; to
+;; mov reg0, mem2
+;; adc mem1, reg0
+(define_peephole2
+  [(set (match_operand:SWI48 0 "general_reg_operand")
+	(match_operand:SWI48 1 "memory_operand"))
+   (parallel [(set (reg:CCC FLAGS_REG)
+		   (compare:CCC
+		     (zero_extend:<DWI>
+		       (plus:SWI48
+			 (plus:SWI48
+			   (match_operator:SWI48 5 "ix86_carry_flag_operator"
+			     [(match_operand 3 "flags_reg_operand")
+			      (const_int 0)])
+			   (match_operand:SWI48 2 "memory_operand"))
+			 (match_dup 0)))
+		     (plus:<DWI>
+		       (match_operator:<DWI> 4 "ix86_carry_flag_operator"
+			 [(match_dup 3) (const_int 0)])
+		       (zero_extend:<DWI> (match_dup 0)))))
+	      (set (match_dup 0)
+		   (plus:SWI48 (plus:SWI48 (match_op_dup 5
+					     [(match_dup 3) (const_int 0)])
+					   (match_dup 2))
+			       (match_dup 0)))])
+   (set (match_dup 1) (match_dup 0))]
+  "TARGET_APX_NDD
+   && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && peep2_reg_dead_p (3, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])"
+  [(set (match_dup 0) (match_dup 2))
+   (parallel [(set (reg:CCC FLAGS_REG)
+		   (compare:CCC
+		     (zero_extend:<DWI>
+		       (plus:SWI48
+			 (plus:SWI48
+			   (match_op_dup 5
+			     [(match_dup 3) (const_int 0)])
+			   (match_dup 1))
+			 (match_dup 0)))
+		     (plus:<DWI>
+		       (match_op_dup 4
+			 [(match_dup 3) (const_int 0)])
+		       (zero_extend:<DWI> (match_dup 0)))))
+	      (set (match_dup 1)
+		   (plus:SWI48 (plus:SWI48 (match_op_dup 5
+					     [(match_dup 3) (const_int 0)])
+					   (match_dup 1))
+			       (match_dup 0)))])])
+
+;; New format for
+;; mov reg0, mem1
+;; adc reg0, mem2, reg0
+;; mov mem2, reg0
+;; to
+;; mov reg0, mem1
+;; adc mem2, reg0
+(define_peephole2
+  [(set (match_operand:SWI48 0 "general_reg_operand")
+	(match_operand:SWI48 1 "memory_operand"))
+   (parallel [(set (reg:CCC FLAGS_REG)
+		   (compare:CCC
+		     (zero_extend:<DWI>
+		       (plus:SWI48
+			 (plus:SWI48
+			   (match_operator:SWI48 5 "ix86_carry_flag_operator"
+			     [(match_operand 3 "flags_reg_operand")
+			      (const_int 0)])
+			   (match_operand:SWI48 2 "memory_operand"))
+			 (match_dup 0)))
+		     (plus:<DWI>
+		       (match_operator:<DWI> 4 "ix86_carry_flag_operator"
+			 [(match_dup 3) (const_int 0)])
+		       (zero_extend:<DWI> (match_dup 0)))))
+	      (set (match_dup 0)
+		   (plus:SWI48 (plus:SWI48 (match_op_dup 5
+					     [(match_dup 3) (const_int 0)])
+					   (match_dup 2))
+			       (match_dup 0)))])
+   (set (match_dup 2) (match_dup 0))]
+  "TARGET_APX_NDD
+   && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && peep2_reg_dead_p (3, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])"
+  [(set (match_dup 0) (match_dup 1))
+   (parallel [(set (reg:CCC FLAGS_REG)
+		   (compare:CCC
+		     (zero_extend:<DWI>
+		       (plus:SWI48
+			 (plus:SWI48
+			   (match_op_dup 5
+			     [(match_dup 3) (const_int 0)])
+			   (match_dup 2))
+			 (match_dup 0)))
+		     (plus:<DWI>
+		       (match_op_dup 4
+			 [(match_dup 3) (const_int 0)])
+		       (zero_extend:<DWI> (match_dup 0)))))
+	      (set (match_dup 2)
+		   (plus:SWI48 (plus:SWI48 (match_op_dup 5
+					     [(match_dup 3) (const_int 0)])
+					   (match_dup 2))
+			       (match_dup 0)))])])
+
 (define_peephole2
   [(parallel [(set (reg:CCC FLAGS_REG)
 		   (compare:CCC
@@ -9635,6 +9752,52 @@
 					       [(match_dup 3) (const_int 0)]))
 				(match_dup 0)))])])
 
+;; Under APX NDD, 'sbb reg, mem, reg' is valid.
+;;
+;; New format for
+;; mov reg0, mem1
+;; sbb reg0, mem2, reg0
+;; mov mem2, reg0
+;; to
+;; mov reg0, mem1
+;; sbb mem2, reg0
+(define_peephole2
+  [(set (match_operand:SWI48 0 "general_reg_operand")
+	(match_operand:SWI48 1 "memory_operand"))
+   (parallel [(set (reg:CCC FLAGS_REG)
+		   (compare:CCC
+		     (zero_extend:<DWI> (match_operand:SWI48 2 "memory_operand"))
+		     (plus:<DWI>
+		       (match_operator:<DWI> 4 "ix86_carry_flag_operator"
+			 [(match_operand 3 "flags_reg_operand") (const_int 0)])
+		       (zero_extend:<DWI>
+			 (match_dup 0)))))
+	      (set (match_dup 0)
+		   (minus:SWI48
+		     (minus:SWI48
+		       (match_dup 2)
+		       (match_operator:SWI48 5 "ix86_carry_flag_operator"
+			 [(match_dup 3) (const_int 0)]))
+		     (match_dup 0)))])
+   (set (match_dup 2) (match_dup 0))]
+  "TARGET_APX_NDD
+   && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && peep2_reg_dead_p (3, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])"
+  [(set (match_dup 0) (match_dup 1))
+   (parallel [(set (reg:CCC FLAGS_REG)
+		   (compare:CCC
+		     (zero_extend:<DWI> (match_dup 2))
+		     (plus:<DWI> (match_op_dup 4
+				   [(match_dup 3) (const_int 0)])
+				 (zero_extend:<DWI> (match_dup 0)))))
+	      (set (match_dup 2)
+		   (minus:SWI48 (minus:SWI48 (match_dup 2)
+					     (match_op_dup 5
+					       [(match_dup 3) (const_int 0)]))
+				(match_dup 0)))])])
+
 (define_peephole2
   [(set (match_operand:SWI48 6 "general_reg_operand")
 	(match_operand:SWI48 7 "memory_operand"))
@@ -12147,10 +12310,8 @@
 	(compare
 	  (and:QI
 	    (subreg:QI
-	      (match_operator:SWI248 2 "extract_operator"
-		[(match_operand 0 "int248_register_operand" "Q")
-		 (const_int 8)
-		 (const_int 8)]) 0)
+	      (match_operator:SWI248 2 "extract_high_operator"
+		[(match_operand 0 "int248_register_operand" "Q")]) 0)
 	    (match_operand:QI 1 "general_operand" "QnBn"))
 	  (const_int 0)))]
   "ix86_match_ccmode (insn, CCNOmode)"
@@ -12164,15 +12325,11 @@
 	(compare
 	  (and:QI
 	    (subreg:QI
-	      (match_operator:SWI248 2 "extract_operator"
-		[(match_operand 0 "int248_register_operand" "Q")
-		 (const_int 8)
-		 (const_int 8)]) 0)
+	      (match_operator:SWI248 2 "extract_high_operator"
+		[(match_operand 0 "int248_register_operand" "Q")]) 0)
 	    (subreg:QI
-	      (match_operator:SWI248 3 "extract_operator"
-		[(match_operand 1 "int248_register_operand" "Q")
-		 (const_int 8)
-		 (const_int 8)]) 0))
+	      (match_operator:SWI248 3 "extract_high_operator"
+		[(match_operand 1 "int248_register_operand" "Q")]) 0))
 	  (const_int 0)))]
   "ix86_match_ccmode (insn, CCNOmode)"
   "test{b}\t{%h1, %h0|%h0, %h1}"
@@ -12602,8 +12759,8 @@
 		   (zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))
 	      (clobber (reg:CC FLAGS_REG))])]
 {
-  if (GET_CODE (operands[2]) == SYMBOL_REF
-      || GET_CODE (operands[2]) == LABEL_REF)
+  if (SYMBOL_REF_P (operands[2])
+      || LABEL_REF_P (operands[2]))
     {
       operands[2] = shallow_copy_rtx (operands[2]);
       PUT_MODE (operands[2], SImode);
@@ -12761,10 +12918,8 @@
   [(set (strict_low_part (match_operand:QI 0 "register_operand" "+Q,&Q"))
 	(any_logic:QI
 	  (subreg:QI
-	    (match_operator:SWI248 3 "extract_operator"
-	      [(match_operand 2 "int248_register_operand" "Q,Q")
-	       (const_int 8)
-	       (const_int 8)]) 0)
+	    (match_operator:SWI248 3 "extract_high_operator"
+	      [(match_operand 2 "int248_register_operand" "Q,Q")]) 0)
 	  (match_operand:QI 1 "nonimmediate_operand" "0,!qm")))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
@@ -12778,8 +12933,8 @@
      [(set (strict_low_part (match_dup 0))
 	   (any_logic:QI
 	     (subreg:QI
-	       (match_op_dup 3
-		 [(match_dup 2) (const_int 8) (const_int 8)]) 0)
+	       (zero_extract:SWI248
+		 (match_dup 2) (const_int 8) (const_int 8)) 0)
 	     (match_dup 0)))
       (clobber (reg:CC FLAGS_REG))])]
   ""
@@ -12790,29 +12945,25 @@
   [(set (strict_low_part (match_operand:QI 0 "register_operand" "+&Q"))
 	(any_logic:QI
 	  (subreg:QI
-	    (match_operator:SWI248 3 "extract_operator"
-	      [(match_operand 1 "int248_register_operand" "Q")
-	       (const_int 8)
-	       (const_int 8)]) 0)
+	    (match_operator:SWI248 3 "extract_high_operator"
+	      [(match_operand 1 "int248_register_operand" "Q")]) 0)
 	  (subreg:QI
-	    (match_operator:SWI248 4 "extract_operator"
-	      [(match_operand 2 "int248_register_operand" "Q")
-	       (const_int 8)
-	       (const_int 8)]) 0)))
+	    (match_operator:SWI248 4 "extract_high_operator"
+	      [(match_operand 2 "int248_register_operand" "Q")]) 0)))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
   "#"
   "&& reload_completed"
   [(set (strict_low_part (match_dup 0))
 	(subreg:QI
-	  (match_op_dup 4
-	    [(match_dup 2) (const_int 8) (const_int 8)]) 0))
+	  (zero_extract:SWI248
+	    (match_dup 2) (const_int 8) (const_int 8)) 0))
    (parallel
      [(set (strict_low_part (match_dup 0))
 	   (any_logic:QI
 	     (subreg:QI
-	       (match_op_dup 3
-		 [(match_dup 1) (const_int 8) (const_int 8)]) 0)
+	       (zero_extract:SWI248
+		 (match_dup 1) (const_int 8) (const_int 8)) 0)
 	     (match_dup 0)))
       (clobber (reg:CC FLAGS_REG))])]
   ""
@@ -13015,10 +13166,8 @@
   [(set (match_operand:QI 0 "nonimmediate_operand" "=QBn")
 	(any_logic:QI
 	  (subreg:QI
-	    (match_operator:SWI248 3 "extract_operator"
-	      [(match_operand 2 "int248_register_operand" "Q")
-	       (const_int 8)
-	       (const_int 8)]) 0)
+	    (match_operator:SWI248 3 "extract_high_operator"
+	      [(match_operand 2 "int248_register_operand" "Q")]) 0)
 	  (match_operand:QI 1 "nonimmediate_operand" "0")))
    (clobber (reg:CC FLAGS_REG))]
   ""
@@ -13031,29 +13180,25 @@
   [(set (match_operand:QI 0 "register_operand" "=&Q")
 	(any_logic:QI
 	  (subreg:QI
-	    (match_operator:SWI248 3 "extract_operator"
-	      [(match_operand 1 "int248_register_operand" "Q")
-	       (const_int 8)
-	       (const_int 8)]) 0)
+	    (match_operator:SWI248 3 "extract_high_operator"
+	      [(match_operand 1 "int248_register_operand" "Q")]) 0)
 	  (subreg:QI
-	    (match_operator:SWI248 4 "extract_operator"
-	      [(match_operand 2 "int248_register_operand" "Q")
-	       (const_int 8)
-	       (const_int 8)]) 0)))
+	    (match_operator:SWI248 4 "extract_high_operator"
+	      [(match_operand 2 "int248_register_operand" "Q")]) 0)))
    (clobber (reg:CC FLAGS_REG))]
   ""
   "#"
   "&& reload_completed"
   [(set (match_dup 0)
 	(subreg:QI
-	  (match_op_dup 4
-	    [(match_dup 2) (const_int 8) (const_int 8)]) 0))
+	  (zero_extract:SWI248
+	    (match_dup 2) (const_int 8) (const_int 8)) 0))
    (parallel
      [(set (match_dup 0)
 	   (any_logic:QI
 	     (subreg:QI
-	       (match_op_dup 3
-		 [(match_dup 1) (const_int 8) (const_int 8)]) 0)
+	       (zero_extract:SWI248
+		 (match_dup 1) (const_int 8) (const_int 8)) 0)
 	   (match_dup 0)))
       (clobber (reg:CC FLAGS_REG))])]
   ""
@@ -13083,10 +13228,8 @@
 	(subreg:SWI248
 	  (any_logic:QI
 	    (subreg:QI
-	      (match_operator:SWI248 3 "extract_operator"
-		[(match_operand 1 "int248_register_operand" "0,!Q")
-		 (const_int 8)
-		 (const_int 8)]) 0)
+	      (match_operator:SWI248 3 "extract_high_operator"
+		[(match_operand 1 "int248_register_operand" "0,!Q")]) 0)
 	    (match_operand:QI 2 "general_operand" "QnBn,QnBn")) 0))
    (clobber (reg:CC FLAGS_REG))]
   ""
@@ -13105,8 +13248,8 @@
 	   (subreg:SWI248
 	     (any_logic:QI
 	       (subreg:QI
-		 (match_op_dup 3
-		   [(match_dup 0) (const_int 8) (const_int 8)]) 0)
+		 (zero_extract:SWI248
+		   (match_dup 0) (const_int 8) (const_int 8)) 0)
 	       (match_dup 2)) 0))
       (clobber (reg:CC FLAGS_REG))])]
   ""
@@ -13120,10 +13263,8 @@
 	(match_operator 5 "compare_operator"
 	  [(any_logic:QI
 	     (subreg:QI
-	       (match_operator:SWI248 3 "extract_operator"
-		 [(match_operand 1 "int248_register_operand" "0,!Q")
-		  (const_int 8)
-		  (const_int 8)]) 0)
+	       (match_operator:SWI248 3 "extract_high_operator"
+		 [(match_operand 1 "int248_register_operand" "0,!Q")]) 0)
 	     (match_operand:QI 2 "general_operand" "QnBn,QnBn"))
 	  (const_int 0)]))
    (set (zero_extract:SWI248
@@ -13133,8 +13274,8 @@
 	(subreg:SWI248
 	  (any_logic:QI
 	    (subreg:QI
-	      (match_op_dup 3
-		[(match_dup 0) (const_int 8) (const_int 8)]) 0)
+	      (zero_extract:SWI248
+		(match_dup 0) (const_int 8) (const_int 8)) 0)
 	    (match_dup 2)) 0))]
   "ix86_match_ccmode (insn, CCNOmode)"
   "@
@@ -13150,9 +13291,9 @@
      [(set (match_dup 4)
 	   (match_op_dup 5
 	     [(any_logic:QI
-	        (subreg:QI
-		  (match_op_dup 3
-		    [(match_dup 0) (const_int 8) (const_int 8)]) 0)
+		(subreg:QI
+		  (zero_extract:SWI248
+		    (match_dup 0) (const_int 8) (const_int 8)) 0)
 		(match_dup 2))
 	      (const_int 0)]))
       (set (zero_extract:SWI248
@@ -13160,8 +13301,8 @@
 	   (subreg:SWI248
 	     (any_logic:QI
 	       (subreg:QI
-		 (match_op_dup 3
-		   [(match_dup 1) (const_int 8) (const_int 8)]) 0)
+		 (zero_extract:SWI248
+		   (match_dup 1) (const_int 8) (const_int 8)) 0)
 	       (match_dup 2)) 0))])]
   ""
   [(set_attr "addr" "gpr8")
@@ -13177,15 +13318,11 @@
 	(subreg:SWI248
 	  (any_logic:QI
 	    (subreg:QI
-	      (match_operator:SWI248 3 "extract_operator"
-		[(match_operand 1 "int248_register_operand" "%0,!Q")
-		 (const_int 8)
-		 (const_int 8)]) 0)
+	      (match_operator:SWI248 3 "extract_high_operator"
+		[(match_operand 1 "int248_register_operand" "%0,!Q")]) 0)
 	    (subreg:QI
-	      (match_operator:SWI248 4 "extract_operator"
-		[(match_operand 2 "int248_register_operand" "Q,Q")
-		 (const_int 8)
-		 (const_int 8)]) 0)) 0))
+	      (match_operator:SWI248 4 "extract_high_operator"
+		[(match_operand 2 "int248_register_operand" "Q,Q")]) 0)) 0))
    (clobber (reg:CC FLAGS_REG))]
   ""
   "@
@@ -13204,11 +13341,11 @@
 	   (subreg:SWI248
 	     (any_logic:QI
 	       (subreg:QI
-		 (match_op_dup 3
-		   [(match_dup 0) (const_int 8) (const_int 8)]) 0)
+		 (zero_extract:SWI248
+		   (match_dup 0) (const_int 8) (const_int 8)) 0)
 	       (subreg:QI
-		 (match_op_dup 4
-		   [(match_dup 2) (const_int 8) (const_int 8)]) 0)) 0))
+		 (zero_extract:SWI248
+		   (match_dup 2) (const_int 8) (const_int 8)) 0)) 0))
       (clobber (reg:CC FLAGS_REG))])]
   ""
   [(set_attr "type" "alu")
@@ -13220,12 +13357,10 @@
 	  (match_operand 0 "int248_register_operand" "+Q,&Q")
 	  (const_int 8)
 	  (const_int 8))
-	(match_operator:SWI248 3 "extract_operator"
+	(match_operator:SWI248 3 "extract_high_operator"
 	  [(any_logic
 	     (match_operand 1 "int248_register_operand" "%0,!Q")
-	     (match_operand 2 "int248_register_operand" "Q,Q"))
-	   (const_int 8)
-	   (const_int 8)]))
+	     (match_operand 2 "int248_register_operand" "Q,Q"))]))
    (clobber (reg:CC FLAGS_REG))]
   "GET_MODE (operands[1]) == GET_MODE (operands[2])"
   "@
@@ -13241,9 +13376,9 @@
    (parallel
      [(set (zero_extract:SWI248
 	     (match_dup 0) (const_int 8) (const_int 8))
-	   (match_op_dup 3
-	     [(any_logic (match_dup 4) (match_dup 2))
-	      (const_int 8) (const_int 8)]))
+	   (zero_extract:SWI248
+	     (any_logic (match_dup 4) (match_dup 2))
+	     (const_int 8) (const_int 8)))
       (clobber (reg:CC FLAGS_REG))])]
   "operands[4] = gen_lowpart (GET_MODE (operands[1]), operands[0]);"
   [(set_attr "type" "alu")
@@ -14488,10 +14623,8 @@
 	(subreg:SWI248
 	  (neg:QI
 	    (subreg:QI
-	      (match_operator:SWI248 2 "extract_operator"
-		[(match_operand 1 "int248_register_operand" "0,!Q")
-		 (const_int 8)
-		 (const_int 8)]) 0)) 0))
+	      (match_operator:SWI248 2 "extract_high_operator"
+		[(match_operand 1 "int248_register_operand" "0,!Q")]) 0)) 0))
    (clobber (reg:CC FLAGS_REG))]
   ""
   "@
@@ -14509,8 +14642,8 @@
 	   (subreg:SWI248
 	     (neg:QI
 	       (subreg:QI
-		 (match_op_dup 2
-		   [(match_dup 0) (const_int 8) (const_int 8)]) 0)) 0))
+		 (zero_extract:SWI248
+		   (match_dup 0) (const_int 8) (const_int 8)) 0)) 0))
       (clobber (reg:CC FLAGS_REG))])]
   ""
   [(set_attr "type" "negnot")
@@ -14561,6 +14694,17 @@
 		   (compare:CCZ (neg:SWI (match_dup 0)) (const_int 0)))
 	      (set (match_dup 0) (neg:SWI (match_dup 0)))])])
 
+;; Optimize *negsi_1 followed by *cmpsi_ccno_1 (PR target/91384) with APX_F
+(define_peephole2
+  [(parallel [(set (match_operand:SWI 0 "general_reg_operand")
+		   (neg:SWI (match_operand:SWI 1 "general_reg_operand")))
+	      (clobber (reg:CC FLAGS_REG))])
+   (set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 1) (const_int 0)))]
+  "TARGET_APX_NDD"
+  [(parallel [(set (reg:CCZ FLAGS_REG)
+		   (compare:CCZ (neg:SWI (match_dup 1)) (const_int 0)))
+	      (set (match_dup 0) (neg:SWI (match_dup 1)))])])
+
 ;; Special expand pattern to handle integer mode abs
 
 (define_expand "abs<mode>2"
@@ -15131,13 +15275,9 @@
 	  (match_operand 0 "int248_register_operand" "+Q,&Q")
 	  (const_int 8)
 	  (const_int 8))
-	(subreg:SWI248
-	  (not:QI
-	    (subreg:QI
-	      (match_operator:SWI248 2 "extract_operator"
-		[(match_operand 1 "int248_register_operand" "0,!Q")
-		 (const_int 8)
-		 (const_int 8)]) 0)) 0))]
+	(not:SWI248
+	  (match_operator:SWI248 2 "extract_high_operator"
+	    [(match_operand 1 "int248_register_operand" "0,!Q")])))]
   ""
   "@
    not{b}\t%h0
@@ -15150,11 +15290,8 @@
 	  (match_dup 1) (const_int 8) (const_int 8)))
    (set (zero_extract:SWI248
 	  (match_dup 0) (const_int 8) (const_int 8))
-	(subreg:SWI248
-	  (not:QI
-	    (subreg:QI
-	      (match_op_dup 2
-		[(match_dup 0) (const_int 8) (const_int 8)]) 0)) 0))]
+	(not:SWI248
+	  (zero_extract:SWI248 (match_dup 0) (const_int 8) (const_int 8))))]
   ""
   [(set_attr "type" "negnot")
    (set_attr "mode" "QI")])
@@ -16501,10 +16638,8 @@
 	(subreg:SWI248
 	  (ashift:QI
 	    (subreg:QI
-	      (match_operator:SWI248 3 "extract_operator"
-		[(match_operand 1 "int248_register_operand" "0,!Q")
-		 (const_int 8)
-		 (const_int 8)]) 0)
+	      (match_operator:SWI248 3 "extract_high_operator"
+		[(match_operand 1 "int248_register_operand" "0,!Q")]) 0)
 	    (match_operand:QI 2 "nonmemory_operand" "cI,cI")) 0))
    (clobber (reg:CC FLAGS_REG))]
   ""
@@ -16538,8 +16673,8 @@
 	   (subreg:SWI248
 	     (ashift:QI
 	       (subreg:QI
-		 (match_op_dup 3
-		   [(match_dup 0) (const_int 8) (const_int 8)]) 0)
+		 (zero_extract:SWI248
+		   (match_dup 0) (const_int 8) (const_int 8)) 0)
 	       (match_dup 2)) 0))
       (clobber (reg:CC FLAGS_REG))])]
   ""
@@ -17785,10 +17920,8 @@
 	(subreg:SWI248
 	  (any_shiftrt:QI
 	    (subreg:QI
-	      (match_operator:SWI248 3 "extract_operator"
-		[(match_operand 1 "int248_register_operand" "0,!Q")
-		 (const_int 8)
-		 (const_int 8)]) 0)
+	      (match_operator:SWI248 3 "extract_high_operator"
+		[(match_operand 1 "int248_register_operand" "0,!Q")]) 0)
 	    (match_operand:QI 2 "nonmemory_operand" "cI,cI")) 0))
    (clobber (reg:CC FLAGS_REG))]
   ""
@@ -17814,8 +17947,8 @@
 	   (subreg:SWI248
 	     (any_shiftrt:QI
 	       (subreg:QI
-		 (match_op_dup 3
-		   [(match_dup 0) (const_int 8) (const_int 8)]) 0)
+		 (zero_extract:SWI248
+		   (match_dup 0) (const_int 8) (const_int 8)) 0)
 	       (match_dup 2)) 0))
       (clobber (reg:CC FLAGS_REG))])]
   ""
@@ -18169,17 +18302,17 @@
 	(any_rotate:SWI
 	  (match_operand:SWI 1 "const_int_operand")
 	  (subreg:QI
-	    (and
-	      (match_operand 2 "int248_register_operand")
-	      (match_operand 3 "const_int_operand")) 0)))]
+	    (match_operator 4 "and_operator"
+	      [(match_operand 2 "int248_register_operand")
+	       (match_operand 3 "const_int_operand")]) 0)))]
  "(INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode) - 1))
    == GET_MODE_BITSIZE (<MODE>mode) - 1"
- [(set (match_dup 4) (match_dup 1))
+ [(set (match_dup 5) (match_dup 1))
   (set (match_dup 0)
-       (any_rotate:SWI (match_dup 4)
+       (any_rotate:SWI (match_dup 5)
 		       (subreg:QI
-			 (and:SI (match_dup 2) (match_dup 3)) 0)))]
- "operands[4] = gen_reg_rtx (<MODE>mode);")
+			 (match_op_dup 4 [(match_dup 2) (match_dup 3)]) 0)))]
+ "operands[5] = gen_reg_rtx (<MODE>mode);")
 
 (define_insn_and_split "*<insn><mode>3_mask_1"
   [(set (match_operand:SWI 0 "nonimmediate_operand")
@@ -19894,7 +20027,7 @@
 
       /* We can't use @GOTOFF for text labels on VxWorks;
 	 see gotoff_operand.  */
-      if (TARGET_64BIT || TARGET_VXWORKS_RTP)
+      if (TARGET_64BIT || TARGET_VXWORKS_VAROFF)
 	{
 	  code = PLUS;
 	  op0 = operands[0];
@@ -20762,7 +20895,7 @@
       (clobber (reg:CC FLAGS_REG))])]
   "!TARGET_64BIT"
 {
-  if (flag_pic && !TARGET_VXWORKS_RTP)
+  if (flag_pic && !TARGET_VXWORKS_GOTTPIC)
     ix86_pc_thunk_call_expanded = true;
 })
 
@@ -20783,7 +20916,7 @@
       (clobber (reg:CC FLAGS_REG))])]
   "!TARGET_64BIT"
 {
-  if (flag_pic && !TARGET_VXWORKS_RTP)
+  if (flag_pic && !TARGET_VXWORKS_GOTTPIC)
     ix86_pc_thunk_call_expanded = true;
 })
 
@@ -21315,11 +21448,12 @@
    (set_attr "mode" "SI")])
 
 ; As bsr is undefined behavior on zero and for other input
-; values it is in range 0 to 63, we can optimize away sign-extends.
-(define_insn_and_split "*bsr_rex64_2"
+; values it is in range 0 to 63, we can optimize away sign-extends
+; or zero-extends.
+(define_insn_and_split "*bsr_rex64<u>_2"
   [(set (match_operand:DI 0 "register_operand")
 	(xor:DI
-	  (sign_extend:DI
+	  (any_extend:DI
 	    (minus:SI
 	      (const_int 63)
 	      (subreg:SI (clz:DI (match_operand:DI 1 "nonimmediate_operand"))
@@ -21341,9 +21475,9 @@
   operands[3] = lowpart_subreg (SImode, operands[2], DImode);
 })
 
-(define_insn_and_split "*bsr_2"
+(define_insn_and_split "*bsr<u>_2"
   [(set (match_operand:DI 0 "register_operand")
-	(sign_extend:DI
+	(any_extend:DI
 	  (xor:SI
 	    (minus:SI
 	      (const_int 31)
@@ -21420,7 +21554,7 @@
 	(minus:DI
 	  (match_operand:DI 2 "const_int_operand")
 	  (xor:DI
-	    (sign_extend:DI
+	    (any_extend:DI
 	      (minus:SI (const_int 63)
 			(subreg:SI
 			  (clz:DI (match_operand:DI 1 "nonimmediate_operand"))
@@ -21450,7 +21584,7 @@
   [(set (match_operand:DI 0 "register_operand")
 	(minus:DI
 	  (match_operand:DI 2 "const_int_operand")
-	  (sign_extend:DI
+	  (any_extend:DI
 	    (xor:SI
 	      (minus:SI (const_int 31)
 			(clz:SI (match_operand:SI 1 "nonimmediate_operand")))
@@ -22992,7 +23126,8 @@
 	 (match_operand 3)))
    (unspec:P [(match_operand 1 "tls_symbolic_operand")
 	      (reg:P SP_REG)]
-	     UNSPEC_TLS_GD)]
+	     UNSPEC_TLS_GD)
+   (clobber (match_operand:P 4 "register_operand" "=D"))]
   "TARGET_64BIT"
 {
   if (!TARGET_X32)
@@ -23009,7 +23144,7 @@
        Use data16 prefix instead, which doesn't have this problem.  */
     fputs ("\tdata16", asm_out_file);
   output_asm_insn
-    ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands);
+    ("lea{q}\t{%E1@tlsgd(%%rip), %q4|%q4, %E1@tlsgd[rip]}", operands);
   if (TARGET_SUN_TLS || flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
     fputs (ASM_SHORT "0x6666\n", asm_out_file);
   else
@@ -23022,6 +23157,7 @@
   return "call\t{*%p2@GOTPCREL(%%rip)|[QWORD PTR %p2@GOTPCREL[rip]]}";
 }
   [(set_attr "type" "multi")
+   (set_attr "tls64" "gd")
    (set (attr "length")
 	(symbol_ref "TARGET_X32 ? 15 : 16"))])
 
@@ -23033,14 +23169,15 @@
 	 (match_operand 4)))
    (unspec:DI [(match_operand 1 "tls_symbolic_operand")
 	       (reg:DI SP_REG)]
-	      UNSPEC_TLS_GD)]
+	      UNSPEC_TLS_GD)
+   (clobber (match_operand:DI 5 "register_operand" "=D"))]
   "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF
    && GET_CODE (operands[3]) == CONST
    && GET_CODE (XEXP (operands[3], 0)) == UNSPEC
    && XINT (XEXP (operands[3], 0), 1) == UNSPEC_PLTOFF"
 {
   output_asm_insn
-    ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands);
+    ("lea{q}\t{%E1@tlsgd(%%rip), %5|%5, %E1@tlsgd[rip]}", operands);
   output_asm_insn ("movabs{q}\t{%3, %%rax|rax, %3}", operands);
   output_asm_insn ("add{q}\t{%2, %%rax|rax, %2}", operands);
   return "call\t{*%%rax|rax}";
@@ -23056,9 +23193,14 @@
 	   (const_int 0)))
      (unspec:P [(match_operand 1 "tls_symbolic_operand")
 		(reg:P SP_REG)]
-	       UNSPEC_TLS_GD)])]
+	       UNSPEC_TLS_GD)
+     (clobber (match_operand:P 3 "register_operand"))])]
   "TARGET_64BIT"
-  "ix86_tls_descriptor_calls_expanded_in_cfun = true;")
+{
+  if (ix86_tls_descriptor_calls_expanded_in_cfun)
+    cfun->machine->tls_descriptor_call_multiple_p = true;
+  ix86_tls_descriptor_calls_expanded_in_cfun = true;
+})
 
 (define_insn "*tls_local_dynamic_base_32_gnu"
   [(set (match_operand:SI 0 "register_operand" "=a")
@@ -23107,11 +23249,12 @@
 	(call:P
 	 (mem:QI (match_operand 1 "constant_call_address_operand" "Bz"))
 	 (match_operand 2)))
-   (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)]
+   (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)
+   (clobber (match_operand:P 3 "register_operand" "=D"))]
   "TARGET_64BIT"
 {
   output_asm_insn
-    ("lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}", operands);
+    ("lea{q}\t{%&@tlsld(%%rip), %q3|%q3, %&@tlsld[rip]}", operands);
   if (TARGET_SUN_TLS)
     return "call\t%p1@plt";
   if (flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
@@ -23119,6 +23262,7 @@
   return "call\t{*%p1@GOTPCREL(%%rip)|[QWORD PTR %p1@GOTPCREL[rip]]}";
 }
   [(set_attr "type" "multi")
+   (set_attr "tls64" "ld_base")
    (set_attr "length" "12")])
 
 (define_insn "*tls_local_dynamic_base_64_largepic"
@@ -23127,14 +23271,15 @@
 	 (mem:QI (plus:DI (match_operand:DI 1 "register_operand" "b")
 			  (match_operand:DI 2 "immediate_operand" "i")))
 	 (match_operand 3)))
-   (unspec:DI [(reg:DI SP_REG)] UNSPEC_TLS_LD_BASE)]
+   (unspec:DI [(reg:DI SP_REG)] UNSPEC_TLS_LD_BASE)
+   (clobber (match_operand:DI 4 "register_operand" "=D"))]
   "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF
    && GET_CODE (operands[2]) == CONST
    && GET_CODE (XEXP (operands[2], 0)) == UNSPEC
    && XINT (XEXP (operands[2], 0), 1) == UNSPEC_PLTOFF"
 {
   output_asm_insn
-    ("lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}", operands);
+    ("lea{q}\t{%&@tlsld(%%rip), %4|%4, %&@tlsld[rip]}", operands);
   output_asm_insn ("movabs{q}\t{%2, %%rax|rax, %2}", operands);
   output_asm_insn ("add{q}\t{%1, %%rax|rax, %1}", operands);
   return "call\t{*%%rax|rax}";
@@ -23148,9 +23293,14 @@
 	   (call:P
 	    (mem:QI (match_operand 1))
 	    (const_int 0)))
-      (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)])]
+      (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)
+      (clobber (match_operand:P 2 "register_operand"))])]
   "TARGET_64BIT"
-  "ix86_tls_descriptor_calls_expanded_in_cfun = true;")
+{
+  if (ix86_tls_descriptor_calls_expanded_in_cfun)
+    cfun->machine->tls_descriptor_call_multiple_p = true;
+  ix86_tls_descriptor_calls_expanded_in_cfun = true;
+})
 
 ;; Local dynamic of a single variable is a lose.  Show combine how
 ;; to convert that back to global dynamic.
@@ -23344,6 +23494,8 @@
   "TARGET_64BIT && TARGET_GNU2_TLS"
 {
   operands[2] = can_create_pseudo_p () ? gen_reg_rtx (ptr_mode) : operands[0];
+  if (ix86_tls_descriptor_calls_expanded_in_cfun)
+    cfun->machine->tls_descriptor_call_multiple_p = true;
   ix86_tls_descriptor_calls_expanded_in_cfun = true;
 })
 
@@ -23355,6 +23507,7 @@
   "lea%z0\t{%E1@TLSDESC(%%rip), %0|%0, %E1@TLSDESC[rip]}"
   [(set_attr "type" "lea")
    (set_attr "mode" "<MODE>")
+   (set_attr "tls64" "lea")
    (set_attr "length" "7")
    (set_attr "length_address" "4")])
 
@@ -23368,6 +23521,7 @@
   "TARGET_64BIT && TARGET_GNU2_TLS"
   "call\t{*%a1@TLSCALL(%2)|[QWORD PTR [%2+%a1@TLSCALL]]}"
   [(set_attr "type" "call")
+   (set_attr "tls64" "call")
    (set_attr "length" "2")
    (set_attr "length_address" "0")])
 
@@ -23389,7 +23543,8 @@
 {
   operands[4] = can_create_pseudo_p () ? gen_reg_rtx (ptr_mode) : operands[0];
   emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, operands[4], operands[1]));
-})
+}
+  [(set_attr "tls64" "combine")])
 
 (define_split
   [(match_operand 0 "tls_address_pattern")]
@@ -25587,10 +25742,6 @@
 	      (clobber (reg:CC FLAGS_REG))])]
   ""
 {
-  /* Can't use this for non-default address spaces.  */
-  if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[3])))
-    FAIL;
-
   int piece_size = GET_MODE_SIZE (GET_MODE (operands[1]));
 
   /* If .md ever supports :P for Pmode, these can be directly
@@ -25598,9 +25749,14 @@
   operands[5] = plus_constant (Pmode, operands[0], piece_size);
   operands[6] = plus_constant (Pmode, operands[2], piece_size);
 
-  /* Can't use this if the user has appropriated esi or edi.  */
+  /* Can't use this if the user has appropriated esi or edi,
+   * or if we have the destination in the non-default address space,
+   * since string insns cannot override the destination segment.  */
   if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ())
-      && !(fixed_regs[SI_REG] || fixed_regs[DI_REG]))
+      && !(fixed_regs[SI_REG] || fixed_regs[DI_REG])
+      && ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[1]))
+      && (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[3]))
+	  || Pmode == word_mode))
     {
       emit_insn (gen_strmov_singleop (operands[0], operands[1],
 				      operands[2], operands[3],
@@ -25635,8 +25791,15 @@
 		(const_int 8)))]
   "TARGET_64BIT
    && !(fixed_regs[SI_REG] || fixed_regs[DI_REG])
-   && ix86_check_no_addr_space (insn)"
-  "%^movsq"
+   && ix86_check_movs (insn, 0)"
+{
+  rtx exp = XVECEXP (PATTERN (insn), 0, 0);
+
+  operands[0] = SET_DEST (exp);
+  operands[1] = SET_SRC (exp);
+
+  return "%^%v1movsq";
+}
   [(set_attr "type" "str")
    (set_attr "memory" "both")
    (set_attr "mode" "DI")])
@@ -25651,8 +25814,15 @@
 	(plus:P (match_dup 3)
 		(const_int 4)))]
   "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])
-   && ix86_check_no_addr_space (insn)"
-  "%^movs{l|d}"
+   && ix86_check_movs (insn, 0)"
+{
+  rtx exp = XVECEXP (PATTERN (insn), 0, 0);
+
+  operands[0] = SET_DEST (exp);
+  operands[1] = SET_SRC (exp);
+
+  return "%^%v1movs{l|d}";
+}
   [(set_attr "type" "str")
    (set_attr "memory" "both")
    (set_attr "mode" "SI")])
@@ -25667,8 +25837,15 @@
 	(plus:P (match_dup 3)
 		(const_int 2)))]
   "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])
-   && ix86_check_no_addr_space (insn)"
-  "%^movsw"
+   && ix86_check_movs (insn, 0)"
+{
+  rtx exp = XVECEXP (PATTERN (insn), 0, 0);
+
+  operands[0] = SET_DEST (exp);
+  operands[1] = SET_SRC (exp);
+
+  return "%^%v1movsw";
+}
   [(set_attr "type" "str")
    (set_attr "memory" "both")
    (set_attr "mode" "HI")])
@@ -25683,8 +25860,15 @@
 	(plus:P (match_dup 3)
 		(const_int 1)))]
   "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])
-   && ix86_check_no_addr_space (insn)"
-  "%^movsb"
+   && ix86_check_movs (insn, 0)"
+{
+  rtx exp = XVECEXP (PATTERN (insn), 0, 0);
+
+  operands[0] = SET_DEST (exp);
+  operands[1] = SET_SRC (exp);
+
+  return "%^%v1movsb";
+}
   [(set_attr "type" "str")
    (set_attr "memory" "both")
    (set (attr "prefix_rex")
@@ -25723,8 +25907,15 @@
    (use (match_dup 5))]
   "TARGET_64BIT
    && !(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
-   && ix86_check_no_addr_space (insn)"
-  "%^rep{%;} movsq"
+   && ix86_check_movs (insn, 3)"
+{
+  rtx exp = XVECEXP (PATTERN (insn), 0, 3);
+
+  operands[0] = SET_DEST (exp);
+  operands[1] = SET_SRC (exp);
+
+  return "%^%v1rep{%;} movsq";
+}
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "both")
@@ -25743,8 +25934,15 @@
 	(mem:BLK (match_dup 4)))
    (use (match_dup 5))]
   "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
-   && ix86_check_no_addr_space (insn)"
-  "%^rep{%;} movs{l|d}"
+   && ix86_check_movs (insn, 3)"
+{
+  rtx exp = XVECEXP (PATTERN (insn), 0, 3);
+
+  operands[0] = SET_DEST (exp);
+  operands[1] = SET_SRC (exp);
+
+  return "%^%v1rep{%;} movs{l|d}";
+}
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "both")
@@ -25761,8 +25959,15 @@
 	(mem:BLK (match_dup 4)))
    (use (match_dup 5))]
   "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
-   && ix86_check_no_addr_space (insn)"
-  "%^rep{%;} movsb"
+   && ix86_check_movs (insn, 3)"
+{
+  rtx exp = XVECEXP (PATTERN (insn), 0, 3);
+
+  operands[0] = SET_DEST (exp);
+  operands[1] = SET_SRC (exp);
+
+  return "%^%v1rep{%;} movsb";
+}
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "both")
@@ -25844,7 +26049,8 @@
    (unspec [(const_int 0)] UNSPEC_STOS)]
   "TARGET_64BIT
    && !(fixed_regs[AX_REG] || fixed_regs[DI_REG])
-   && ix86_check_no_addr_space (insn)"
+   && ADDR_SPACE_GENERIC_P
+	(MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))"
   "%^stosq"
   [(set_attr "type" "str")
    (set_attr "memory" "store")
@@ -25858,7 +26064,8 @@
 		(const_int 4)))
    (unspec [(const_int 0)] UNSPEC_STOS)]
   "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])
-   && ix86_check_no_addr_space (insn)"
+   && ADDR_SPACE_GENERIC_P
+	(MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))"
   "%^stos{l|d}"
   [(set_attr "type" "str")
    (set_attr "memory" "store")
@@ -25872,7 +26079,8 @@
 		(const_int 2)))
    (unspec [(const_int 0)] UNSPEC_STOS)]
   "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])
-   && ix86_check_no_addr_space (insn)"
+   && ADDR_SPACE_GENERIC_P
+	(MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))"
   "%^stosw"
   [(set_attr "type" "str")
    (set_attr "memory" "store")
@@ -25886,7 +26094,8 @@
 		(const_int 1)))
    (unspec [(const_int 0)] UNSPEC_STOS)]
   "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])
-   && ix86_check_no_addr_space (insn)"
+   && ADDR_SPACE_GENERIC_P
+	(MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))"
   "%^stosb"
   [(set_attr "type" "str")
    (set_attr "memory" "store")
@@ -25922,7 +26131,8 @@
    (use (match_dup 4))]
   "TARGET_64BIT
    && !(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
-   && ix86_check_no_addr_space (insn)"
+   && ADDR_SPACE_GENERIC_P
+	(MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 2))))"
   "%^rep{%;} stosq"
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
@@ -25940,7 +26150,8 @@
    (use (match_operand:SI 2 "register_operand" "a"))
    (use (match_dup 4))]
   "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
-   && ix86_check_no_addr_space (insn)"
+   && ADDR_SPACE_GENERIC_P
+	(MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 2))))"
   "%^rep{%;} stos{l|d}"
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
@@ -25957,7 +26168,8 @@
    (use (match_operand:QI 2 "register_operand" "a"))
    (use (match_dup 4))]
   "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
-   && ix86_check_no_addr_space (insn)"
+   && ADDR_SPACE_GENERIC_P
+	(MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 2))))"
   "%^rep{%;} stosb"
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
@@ -26224,8 +26436,8 @@
 (define_expand "mov<mode>cc"
   [(set (match_operand:SWIM 0 "register_operand")
 	(if_then_else:SWIM (match_operand 1 "comparison_operator")
-			   (match_operand:SWIM 2 "<general_operand>")
-			   (match_operand:SWIM 3 "<general_operand>")))]
+			   (match_operand:SWIM 2 "general_operand")
+			   (match_operand:SWIM 3 "general_operand")))]
   ""
   "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")
 
@@ -26592,8 +26804,8 @@
   [(set (match_operand:X87MODEF 0 "register_operand")
 	(if_then_else:X87MODEF
 	  (match_operand 1 "comparison_operator")
-	  (match_operand:X87MODEF 2 "register_operand")
-	  (match_operand:X87MODEF 3 "register_operand")))]
+	  (match_operand:X87MODEF 2 "nonimm_or_0_or_1s_operand")
+	  (match_operand:X87MODEF 3 "nonimm_or_0_operand")))]
   "(TARGET_80387 && TARGET_CMOVE)
    || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
   "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;")
@@ -27183,7 +27395,7 @@
 	(cond [(and (eq_attr "alternative" "0")
 		    (not (match_test "TARGET_OPT_AGU")))
 		 (const_string "alu")
-	       (match_operand:<MODE> 2 "const0_operand")
+	       (match_operand 2 "const0_operand")
 		 (const_string "imov")
 	      ]
 	      (const_string "lea")))
@@ -27197,6 +27409,46 @@
 	      (const_string "*")))
    (set_attr "mode" "<MODE>")])
 
+(define_insn "@pro_epilogue_adjust_stack_add_nocc<mode>"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(plus:P (match_operand:P 1 "register_operand" "r")
+		(match_operand:P 2 "<nonmemory_operand>" "l<i>")))
+   (clobber (mem:BLK (scratch)))]
+  ""
+{
+  if (get_attr_type (insn) == TYPE_IMOV)
+    return "mov{<imodesuffix>}\t{%1, %0|%0, %1}";
+  else
+    {
+      operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+      return "lea{<imodesuffix>}\t{%E2, %0|%0, %E2}";
+    }
+}
+  [(set (attr "type")
+	(cond [(match_operand 2 "const0_operand")
+		 (const_string "imov")
+	      ]
+	      (const_string "lea")))
+   (set (attr "length_immediate")
+	(cond [(eq_attr "type" "imov")
+		 (const_string "0")
+	      ]
+	      (const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
+(define_peephole2
+  [(parallel
+     [(set (match_operand:P 0 "register_operand")
+	   (plus:P (match_dup 0)
+		   (match_operand:P 1 "<nonmemory_operand>")))
+      (clobber (mem:BLK (scratch)))])]
+  "peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel
+     [(set (match_dup 0)
+	   (plus:P (match_dup 0) (match_dup 1)))
+      (clobber (reg:CC FLAGS_REG))
+      (clobber (mem:BLK (scratch)))])])
+
 (define_insn "@pro_epilogue_adjust_stack_sub_<mode>"
   [(set (match_operand:P 0 "register_operand" "=r")
 	(minus:P (match_operand:P 1 "register_operand" "0")
@@ -27928,10 +28180,8 @@
 	(match_operator 1 "compare_operator"
 	  [(and:QI
 	     (subreg:QI
-	       (match_operator:SWI248 4 "extract_operator"
-		 [(match_operand 2 "int248_register_operand")
-		  (const_int 8)
-		  (const_int 8)]) 0)
+	       (match_operator:SWI248 4 "extract_high_operator"
+		 [(match_operand 2 "int248_register_operand")]) 0)
 	     (match_operand 3 "const_int_operand"))
 	   (const_int 0)]))]
   "! TARGET_PARTIAL_REG_STALL
@@ -27943,9 +28193,9 @@
 	   (match_op_dup 1
 	     [(and:QI
 		(subreg:QI
-		  (match_op_dup 4 [(match_dup 2)
-				   (const_int 8)
-				   (const_int 8)]) 0)
+		  (zero_extract:SWI248 (match_dup 2)
+				       (const_int 8)
+				       (const_int 8)) 0)
 		(match_dup 3))
 	      (const_int 0)]))
       (set (zero_extract:SWI248 (match_dup 2)
@@ -27954,9 +28204,9 @@
 	   (subreg:SWI248
 	     (and:QI
 	       (subreg:QI
-		 (match_op_dup 4 [(match_dup 2)
-				  (const_int 8)
-				  (const_int 8)]) 0)
+		 (zero_extract:SWI248 (match_dup 2)
+				      (const_int 8)
+				      (const_int 8)) 0)
 	       (match_dup 3)) 0))])])
 
 ;; Don't do logical operations with memory inputs.
@@ -28144,6 +28394,41 @@
 		       const0_rtx);
 })
 
+;; For APX NDD PLUS/MINUS/LOGIC
+;; Like cmpelim optimized pattern.
+;; Reduce an extra mov instruction like
+;; decl (%rdi), %eax
+;; mov %eax, (%rdi)
+;; to
+;; decl (%rdi)
+(define_peephole2
+  [(parallel [(set (reg FLAGS_REG)
+		   (compare (match_operator:SWI 2 "plusminuslogic_operator"
+			      [(match_operand:SWI 0 "memory_operand")
+			       (match_operand:SWI 1 "<nonmemory_operand>")])
+			    (const_int 0)))
+	      (set (match_operand:SWI 3 "register_operand") (match_dup 2))])
+   (set (match_dup 0) (match_dup 3))]
+  "TARGET_APX_NDD
+   && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && peep2_reg_dead_p (2, operands[3])
+   && !reg_overlap_mentioned_p (operands[3], operands[0])
+   && ix86_match_ccmode (peep2_next_insn (0),
+			 (GET_CODE (operands[2]) == PLUS
+			  || GET_CODE (operands[2]) == MINUS)
+			 ? CCGOCmode : CCNOmode)"
+  [(parallel [(set (match_dup 4) (match_dup 6))
+	      (set (match_dup 0) (match_dup 5))])]
+{
+  operands[4] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (0)), 0, 0));
+  operands[5]
+    = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]),
+		      copy_rtx (operands[0]), operands[1]);
+  operands[6]
+    = gen_rtx_COMPARE (GET_MODE (operands[4]), copy_rtx (operands[5]),
+		       const0_rtx);
+})
+
 ;; Likewise for instances where we have a lea pattern.
 (define_peephole2
   [(set (match_operand:SWI 0 "register_operand")
@@ -28237,6 +28522,54 @@
 		       const0_rtx);
 })
 
+;; For APX NDD XOR
+;; Reduce 2 mov and 1 cmp instruction.
+;; from
+;; movq (%rdi), %rax
+;; xorq %rsi, %rax, %rdx
+;; movb %rdx, (%rdi)
+;; cmpb %rsi, %rax
+;; jne
+;; to
+;; xorb %rsi, (%rdi)
+;; jne
+(define_peephole2
+  [(set (match_operand:SWI 0 "register_operand")
+	(match_operand:SWI 1 "memory_operand"))
+   (parallel [(set (match_operand:SWI 4 "register_operand")
+		   (xor:SWI (match_operand:SWI 3 "register_operand")
+			    (match_operand:SWI 2 "<nonmemory_operand>")))
+	      (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 1) (match_dup 4))
+   (set (reg:CCZ FLAGS_REG)
+	(compare:CCZ (match_operand:SWI 5 "register_operand")
+		     (match_operand:SWI 6 "<nonmemory_operand>")))]
+  "TARGET_APX_NDD
+   && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && REGNO (operands[3]) == REGNO (operands[0])
+   && (rtx_equal_p (operands[0], operands[5])
+       ? rtx_equal_p (operands[2], operands[6])
+       : rtx_equal_p (operands[2], operands[5])
+	 && rtx_equal_p (operands[0], operands[6]))
+   && peep2_reg_dead_p (3, operands[4])
+   && peep2_reg_dead_p (4, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])
+   && (<MODE>mode != QImode
+       || immediate_operand (operands[2], QImode)
+       || any_QIreg_operand (operands[2], QImode))"
+  [(parallel [(set (match_dup 7) (match_dup 9))
+	      (set (match_dup 1) (match_dup 8))])]
+{
+  operands[7] = SET_DEST (PATTERN (peep2_next_insn (3)));
+  operands[8] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]),
+			     operands[2]);
+  operands[9]
+    = gen_rtx_COMPARE (GET_MODE (operands[7]),
+		       copy_rtx (operands[8]),
+		       const0_rtx);
+})
+
 (define_peephole2
   [(set (match_operand:SWI12 0 "register_operand")
 	(match_operand:SWI12 1 "memory_operand"))
@@ -28480,6 +28813,58 @@
 		       const0_rtx);
 })
 
+;; For APX NDD XOR
+;; Reduce 2 mov and 1 cmp instruction.
+;; from
+;; movb (%rdi), %al
+;; xorl %esi, %eax, %edx
+;; movb %dl, (%rdi)
+;; cmpb %sil, %al
+;; jne
+;; to
+;; xorl %sil, (%rdi)
+;; jne
+(define_peephole2
+  [(set (match_operand:SWI12 0 "register_operand")
+	(match_operand:SWI12 1 "memory_operand"))
+   (parallel [(set (match_operand:SI 4 "register_operand")
+		   (xor:SI (match_operand:SI 3 "register_operand")
+			   (match_operand:SI 2 "<nonmemory_operand>")))
+	      (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 1) (match_operand:SWI12 5 "register_operand"))
+   (set (reg:CCZ FLAGS_REG)
+	(compare:CCZ (match_operand:SWI12 6 "register_operand")
+		     (match_operand:SWI12 7 "<nonmemory_operand>")))]
+  "TARGET_APX_NDD
+   && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && REGNO (operands[3]) == REGNO (operands[0])
+   && REGNO (operands[5]) == REGNO (operands[4])
+   && (rtx_equal_p (operands[0], operands[6])
+       ? (REG_P (operands[2])
+	  ? REG_P (operands[7]) && REGNO (operands[2]) == REGNO (operands[7])
+	  : rtx_equal_p (operands[2], operands[7]))
+       : (rtx_equal_p (operands[0], operands[7])
+	  && REG_P (operands[2])
+	  && REGNO (operands[2]) == REGNO (operands[6])))
+   && peep2_reg_dead_p (3, operands[5])
+   && peep2_reg_dead_p (4, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])
+   && (<MODE>mode != QImode
+       || immediate_operand (operands[2], SImode)
+       || any_QIreg_operand (operands[2], SImode))"
+  [(parallel [(set (match_dup 8) (match_dup 10))
+	      (set (match_dup 1) (match_dup 9))])]
+{
+  operands[8] = SET_DEST (PATTERN (peep2_next_insn (3)));
+  operands[9] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]),
+			     gen_lowpart (<MODE>mode, operands[2]));
+  operands[10]
+    = gen_rtx_COMPARE (GET_MODE (operands[8]),
+		       copy_rtx (operands[9]),
+		       const0_rtx);
+})
+
 ;; Attempt to optimize away memory stores of values the memory already
 ;; has.  See PR79593.
 (define_peephole2
@@ -29082,6 +29467,23 @@
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "DI")])
 
+(define_expand "crc_rev<SWI124:mode>si4"
+  [(match_operand:SI 0 "register_operand")
+   (match_operand:SI 1 "register_operand")
+   (match_operand:SWI124 2 "nonimmediate_operand")
+   (match_operand:SI 3)]
+  "TARGET_CRC32"
+{
+  /* crc32 uses iSCSI polynomial */
+  if (INTVAL (operands[3]) == 0x1EDC6F41)
+    emit_insn (gen_sse4_2_crc32<mode> (operands[0], operands[1], operands[2]));
+  else
+    expand_reversed_crc_table_based (operands[0], operands[1], operands[2],
+				     operands[3], <SWI124:MODE>mode,
+				     generate_reflecting_code_standard);
+  DONE;
+})
+
 (define_insn "rdpmc"
   [(set (match_operand:DI 0 "register_operand" "=A")
   	(unspec_volatile:DI [(match_operand:SI 1 "register_operand" "c")]
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 27d34bd..6bda22f 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -36,13 +36,6 @@ HOST_WIDE_INT ix86_isa_flags_explicit
 Variable
 HOST_WIDE_INT ix86_isa_flags2_explicit
 
-; Indicate if AVX512 and AVX10.1 are explicitly set no.
-Variable
-int ix86_no_avx512_explicit = 0
-
-Variable
-int ix86_no_avx10_1_explicit = 0
-
 ; Additional target flags
 Variable
 int ix86_target_flags
@@ -103,14 +96,6 @@ HOST_WIDE_INT x_ix86_isa_flags2_explicit
 TargetSave
 HOST_WIDE_INT x_ix86_isa_flags_explicit
 
-;; which flags were passed by the user
-TargetSave
-HOST_WIDE_INT x_ix86_no_avx512_explicit
-
-;; which flags were passed by the user
-TargetSave
-HOST_WIDE_INT x_ix86_no_avx10_1_explicit
-
 ;; whether -mtune was not specified
 TargetSave
 unsigned char tune_defaulted
@@ -721,13 +706,9 @@ Target Mask(ISA_SSE4_2) Var(ix86_isa_flags) Save
 Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation.
 
 msse4
-Target RejectNegative Mask(ISA_SSE4_2) Var(ix86_isa_flags) Save
+Target Mask(ISA_SSE4_2) Var(ix86_isa_flags) Save
 Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation.
 
-mno-sse4
-Target RejectNegative InverseMask(ISA_SSE4_1) Var(ix86_isa_flags) Save
-Do not support SSE4.1 and SSE4.2 built-in functions and code generation.
-
 msse5
 Target Undocumented Alias(mavx) Warn(%<-msse5%> was removed)
 ;; Deprecated
@@ -1265,6 +1246,10 @@ munroll-only-small-loops
 Target Var(ix86_unroll_only_small_loops) Init(0) Optimization
 Enable conservative small loop unrolling.
 
+-param=ix86-vect-unroll-limit=
+Target Joined UInteger Var(ix86_vect_unroll_limit) Init(4) Param
+Limit how much the autovectorizer may unroll a loop.
+
 mlam=
 Target RejectNegative Joined Enum(lam_type) Var(ix86_lam_type) Init(lam_none)
 -mlam=[none|u48|u57] Instrument meta data position in user data pointers.
@@ -1355,38 +1340,24 @@ mapx-inline-asm-use-gpr32
 Target Var(ix86_apx_inline_asm_use_gpr32) Init(0)
 Enable GPR32 in inline asm when APX_F enabled.
 
-mevex512
-Target Mask(ISA2_EVEX512) Var(ix86_isa_flags2) Save Warn(%<-mevex512%> will be deprecated in GCC 16 due to all machines 512 bit vector size supported)
-Support 512 bit vector built-in functions and code generation.
-
 musermsr
 Target Mask(ISA2_USER_MSR) Var(ix86_isa_flags2) Save
 Support USER_MSR built-in functions and code generation.
 
-mavx10.1-256
-Target Mask(ISA2_AVX10_1_256) Var(ix86_isa_flags2) Save Warn(%<-mavx10.1%> is aliased to 512 bit since GCC14.3 and GCC15.1 while %<-mavx10.1-256%> and %<-mavx10.1-512%> will be deprecated in GCC 16 due to all machines 512 bit vector size supported)
-Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2,
-and AVX10.1-256 built-in functions and code generation.
-
 mavx10.1
-Target Mask(ISA2_AVX10_1) Var(ix86_isa_flags2) Save Warn(%<-mavx10.1%> is aliased to 512 bit since GCC14.3 and GCC15.1 while %<-mavx10.1-256%> and %<-mavx10.1-512%> will be deprecated in GCC 16 due to all machines 512 bit vector size supported)
-Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2,
-and AVX10.1-512 built-in functions and code generation.
-
-mavx10.1-512
-Target Alias(mavx10.1)
+Target Mask(ISA2_AVX10_1) Var(ix86_isa_flags2) Save
 Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2,
-and AVX10.1-512 built-in functions and code generation.
+and AVX10.1 built-in functions and code generation.
 
 mavx10.2
 Target Mask(ISA2_AVX10_2) Var(ix86_isa_flags2) Save
 Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2,
-AVX10.1-512 and AVX10.2 built-in functions and code generation.
+AVX10.1 and AVX10.2 built-in functions and code generation.
 
 mamx-avx512
 Target Mask(ISA2_AMX_AVX512) Var(ix86_isa_flags2) Save
-Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX10.1-512,
-AVX10.2 and AMX-AVX512 built-in functions and code generation.
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2,
+AVX10.1, AVX10.2 and AMX-AVX512 built-in functions and code generation.
 
 mamx-tf32
 Target Mask(ISA2_AMX_TF32) Var(ix86_isa_flags2) Save
diff --git a/gcc/config/i386/i386.opt.urls b/gcc/config/i386/i386.opt.urls
index 0d5a5a1..cce524c 100644
--- a/gcc/config/i386/i386.opt.urls
+++ b/gcc/config/i386/i386.opt.urls
@@ -590,21 +590,12 @@ UrlSuffix(gcc/x86-Options.html#index-mapxf)
 mapx-inline-asm-use-gpr32
 UrlSuffix(gcc/x86-Options.html#index-mapx-inline-asm-use-gpr32)
 
-mevex512
-UrlSuffix(gcc/x86-Options.html#index-mevex512)
-
 musermsr
 UrlSuffix(gcc/x86-Options.html#index-musermsr)
 
-mavx10.1-256
-UrlSuffix(gcc/x86-Options.html#index-mavx10_002e1-256)
-
 mavx10.1
 UrlSuffix(gcc/x86-Options.html#index-mavx10_002e1)
 
-mavx10.1-512
-UrlSuffix(gcc/x86-Options.html#index-mavx10_002e1-512)
-
 mavx10.2
 UrlSuffix(gcc/x86-Options.html#index-mavx10_002e2)
 
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
index c30a4e0..b195fe5 100644
--- a/gcc/config/i386/immintrin.h
+++ b/gcc/config/i386/immintrin.h
@@ -148,24 +148,14 @@
 
 #include <avx10_2mediaintrin.h>
 
-#include <avx10_2-512mediaintrin.h>
-
 #include <avx10_2convertintrin.h>
 
-#include <avx10_2-512convertintrin.h>
-
 #include <avx10_2bf16intrin.h>
 
-#include <avx10_2-512bf16intrin.h>
-
 #include <avx10_2satcvtintrin.h>
 
-#include <avx10_2-512satcvtintrin.h>
-
 #include <avx10_2minmaxintrin.h>
 
-#include <avx10_2-512minmaxintrin.h>
-
 #include <avx10_2copyintrin.h>
 
 #include <movrsintrin.h>
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 7920232..1f97993 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -81,12 +81,13 @@
 ;; 4-byte and 2-byte QImode vector modes
 (define_mode_iterator VI1_16_32 [V4QI V2QI])
 
-;; All 2-byte, 4-byte and 8-byte vector modes with more than 1 element
+;; All 2-byte, 4-byte and 8-byte vector modes.
 (define_mode_iterator V_16_32_64
-   [V2QI V4QI V2HI V2HF
+   [V2QI V4QI V2HI V1SI V2HF V2BF
     (V8QI "TARGET_64BIT") (V4HI "TARGET_64BIT")
     (V4HF "TARGET_64BIT") (V4BF "TARGET_64BIT")
-    (V2SI "TARGET_64BIT") (V2SF "TARGET_64BIT")])
+    (V2SI "TARGET_64BIT") (V2SF "TARGET_64BIT")
+    (V1DI "TARGET_64BIT")])
 
 ;; V2S* modes
 (define_mode_iterator V2FI [V2SF V2SI])
@@ -107,6 +108,7 @@
   [(V8QI "DI") (V4QI "SI") (V2QI "HI")
    (V4HI "DI") (V2HI "SI")
    (V2SI "DI")
+   (V1DI "DI") (V1SI "SI")
    (V4HF "DI") (V2HF "SI")
    (V4BF "DI") (V2BF "SI")
    (V2SF "DI")])
@@ -329,7 +331,7 @@
 
 (define_expand "mov<mode>"
   [(set (match_operand:V_32 0 "nonimmediate_operand")
-	(match_operand:V_32 1 "nonimmediate_operand"))]
+	(match_operand:V_32 1 "nonimm_or_0_operand"))]
   ""
 {
   ix86_expand_vector_move (<MODE>mode, operands);
@@ -339,7 +341,7 @@
 (define_insn "*mov<mode>_internal"
   [(set (match_operand:V_32 0 "nonimmediate_operand"
     "=r ,m ,v,v,v,m,r,v")
-	(match_operand:V_32 1 "general_operand"
+	(match_operand:V_32 1 "nonimm_or_0_operand"
     "rmC,rC,C,v,m,v,v,r"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
    && ix86_hardreg_mov_ok (operands[0], operands[1])"
@@ -407,22 +409,6 @@
 	   ]
 	   (symbol_ref "true")))])
 
-;; 16-bit, 32-bit and 64-bit constant vector stores.  After reload,
-;; convert them to immediate integer stores.
-(define_insn_and_split "*mov<mode>_imm"
-  [(set (match_operand:V_16_32_64 0 "memory_operand" "=m")
-	(match_operand:V_16_32_64 1 "x86_64_const_vector_operand" "i"))]
-  ""
-  "#"
-  "&& reload_completed"
-  [(set (match_dup 0) (match_dup 1))]
-{
-  HOST_WIDE_INT val = ix86_convert_const_vector_to_integer (operands[1],
-							    <MODE>mode);
-  operands[1] = GEN_INT (val);
-  operands[0] = lowpart_subreg (<mmxinsnmode>mode, operands[0], <MODE>mode);
-})
-
 ;; For TARGET_64BIT we always round up to 8 bytes.
 (define_insn "*push<mode>2_rex64"
   [(set (match_operand:V_32 0 "push_operand" "=X,X")
@@ -457,7 +443,7 @@
 
 (define_expand "movv2qi"
   [(set (match_operand:V2QI 0 "nonimmediate_operand")
-	(match_operand:V2QI 1 "nonimmediate_operand"))]
+	(match_operand:V2QI 1 "nonimm_or_0_operand"))]
   ""
 {
   ix86_expand_vector_move (V2QImode, operands);
@@ -467,9 +453,10 @@
 (define_insn "*movv2qi_internal"
   [(set (match_operand:V2QI 0 "nonimmediate_operand"
     "=r,r,r,m ,v,v,v,jm,m,r,v")
-	(match_operand:V2QI 1 "general_operand"
+	(match_operand:V2QI 1 "nonimm_or_0_operand"
     "r ,C,m,rC,C,v,m,x,v,v,r"))]
-  "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "!(MEM_P (operands[0]) && MEM_P (operands[1]))
+   && ix86_hardreg_mov_ok (operands[0], operands[1])"
 {
   switch (get_attr_type (insn))
     {
@@ -587,6 +574,42 @@
 	   ]
 	   (symbol_ref "true")))])
 
+(define_split
+  [(set (match_operand:V_16_32_64 0 "general_reg_operand")
+	(match_operand:V_16_32_64 1 "memory_operand"))]
+  "reload_completed
+   && SYMBOL_REF_P (XEXP (operands[1], 0))
+   && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0))"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  rtx op1 = avoid_constant_pool_reference (operands[1]);
+
+  if (!CONST_VECTOR_P (op1))
+    FAIL;
+
+  HOST_WIDE_INT val = ix86_convert_const_vector_to_integer (op1, <MODE>mode);
+
+  operands[0] = lowpart_subreg (<mmxinsnmode>mode, operands[0], <MODE>mode);
+  operands[1] = GEN_INT (val);
+})
+
+;; 16-bit, 32-bit and 64-bit constant vector stores.  After reload,
+;; convert them to immediate integer stores.
+(define_insn_and_split "*mov<mode>_imm"
+  [(set (match_operand:V_16_32_64 0 "memory_operand" "=m")
+	(match_operand:V_16_32_64 1 "x86_64_const_vector_operand" "i"))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  rtx op1 = operands[1];
+  HOST_WIDE_INT val = ix86_convert_const_vector_to_integer (op1, <MODE>mode);
+
+  operands[0] = adjust_address (operands[0], <mmxinsnmode>mode, 0);
+  operands[1] = GEN_INT (val);
+})
+
 ;; We always round up to UNITS_PER_WORD bytes.
 (define_insn "*pushv2qi2"
   [(set (match_operand:V2QI 0 "push_operand" "=X,X")
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 3d3848c..5dbe444 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -218,6 +218,7 @@
 	  case UNSPEC_DTPOFF:
 	  case UNSPEC_GOTNTPOFF:
 	  case UNSPEC_NTPOFF:
+	  case UNSPEC_SECREL32:
 	    return true;
 	  default:
 	    break;
@@ -392,6 +393,23 @@
   return false;
 })
 
+;; Return true if VALUE is a constant integer whose negation satisfies
+;; x86_64_immediate_operand.
+(define_predicate "x86_64_neg_const_int_operand"
+  (match_code "const_int")
+{
+  HOST_WIDE_INT val = -UINTVAL (op);
+  if (mode == DImode && trunc_int_for_mode (val, SImode) != val)
+    return false;
+  if (flag_cf_protection & CF_BRANCH)
+    {
+      unsigned HOST_WIDE_INT endbr = TARGET_64BIT ? 0xfa1e0ff3 : 0xfb1e0ff3;
+      if ((val & HOST_WIDE_INT_C (0xffffffff)) == endbr)
+	return false;
+    }
+  return true;
+})
+
 ;; Return true if VALUE is a constant integer whose low and high words satisfy
 ;; x86_64_immediate_operand.
 (define_predicate "x86_64_hilo_int_operand"
@@ -555,8 +573,8 @@
 
     case CONST:
       op = XEXP (op, 0);
-      if (GET_CODE (op) == SYMBOL_REF
-	  || GET_CODE (op) == LABEL_REF
+      if (SYMBOL_REF_P (op)
+	  || LABEL_REF_P (op)
 	  || (GET_CODE (op) == UNSPEC
 	      && (XINT (op, 1) == UNSPEC_GOT
 		  || XINT (op, 1) == UNSPEC_GOTOFF
@@ -568,8 +586,8 @@
 	return false;
 
       op = XEXP (op, 0);
-      if (GET_CODE (op) == SYMBOL_REF
-	  || GET_CODE (op) == LABEL_REF)
+      if (SYMBOL_REF_P (op)
+	  || LABEL_REF_P (op))
 	return true;
       /* Only @GOTOFF gets offsets.  */
       if (GET_CODE (op) != UNSPEC
@@ -577,8 +595,8 @@
 	return false;
 
       op = XVECEXP (op, 0, 0);
-      if (GET_CODE (op) == SYMBOL_REF
-	  || GET_CODE (op) == LABEL_REF)
+      if (SYMBOL_REF_P (op)
+	  || LABEL_REF_P (op))
 	return true;
       return false;
 
@@ -596,10 +614,10 @@
       && CONST_INT_P (XEXP (XEXP (op, 0), 1)))
     op = XEXP (XEXP (op, 0), 0);
 
-  if (GET_CODE (op) == LABEL_REF)
+  if (LABEL_REF_P (op))
     return true;
 
-  if (GET_CODE (op) != SYMBOL_REF)
+  if (!SYMBOL_REF_P (op))
     return false;
 
   if (SYMBOL_REF_TLS_MODEL (op))
@@ -631,7 +649,7 @@
       && CONST_INT_P (XEXP (XEXP (op, 0), 1)))
     op = XEXP (XEXP (op, 0), 0);
 
-  if (GET_CODE (op) == SYMBOL_REF
+  if (SYMBOL_REF_P (op)
       && !SYMBOL_REF_FUNCTION_P (op))
     return false;
 
@@ -646,8 +664,9 @@
 ;; same segment as the GOT.  Unfortunately, the flexibility of linker
 ;; scripts means that we can't be sure of that in general, so assume
 ;; @GOTOFF is not valid on VxWorks, except with the large code model.
+;; The comments above seem to apply only to VxWorks releases before 7.
 (define_predicate "gotoff_operand"
-  (and (ior (not (match_test "TARGET_VXWORKS_RTP"))
+  (and (ior (not (match_test "TARGET_VXWORKS_VAROFF"))
             (match_test "ix86_cmodel == CM_LARGE")
             (match_test "ix86_cmodel == CM_LARGE_PIC"))
        (match_operand 0 "local_symbolic_operand")))
@@ -1126,7 +1145,7 @@
   unsigned n_elts;
   op = avoid_constant_pool_reference (op);
 
-  if (GET_CODE (op) != CONST_VECTOR)
+  if (!CONST_VECTOR_P (op))
     return false;
 
   n_elts = CONST_VECTOR_NUNITS (op);
@@ -1154,7 +1173,7 @@
   if (MEM_P (op))
     {
       op = get_pool_constant (XEXP (op, 0));
-      if (GET_CODE (op) != CONST_VECTOR)
+      if (!CONST_VECTOR_P (op))
 	return false;
 
       if (GET_MODE (op) != mode
@@ -1267,12 +1286,19 @@
        (match_operand 0 "vector_memory_operand")
        (match_code "const_vector")))
 
+; Return true when OP is register_operand, vector_memory_operand,
+; const_vector zero or const_vector all ones.
+(define_predicate "vector_or_0_or_1s_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "vector_memory_operand")
+       (match_operand 0 "const0_operand")
+       (match_operand 0 "int_float_vector_all_ones_operand")))
+
 (define_predicate "bcst_mem_operand"
   (and (match_code "vec_duplicate")
        (and (match_test "TARGET_AVX512F")
 	    (ior (match_test "TARGET_AVX512VL")
-		 (and (match_test "GET_MODE_SIZE (GET_MODE (op)) == 64")
-		      (match_test "TARGET_EVEX512"))))
+		 (match_test "GET_MODE_SIZE (GET_MODE (op)) == 64")))
        (match_test "VALID_BCST_MODE_P (GET_MODE_INNER (GET_MODE (op)))")
        (match_test "GET_MODE (XEXP (op, 0))
 		    == GET_MODE_INNER (GET_MODE (op))")
@@ -1293,6 +1319,9 @@
   (ior (match_operand 0 "nonimmediate_operand")
        (match_test "const_vec_duplicate_p (op)")))
 
+(define_predicate "const_vec_dup_operand"
+       (match_test "const_vec_duplicate_p (op)"))
+
 ;; Return true when OP is either register operand, or any
 ;; CONST_VECTOR.
 (define_predicate "reg_or_const_vector_operand"
@@ -1333,6 +1362,12 @@
   (ior (match_operand 0 "nonimmediate_operand")
        (match_operand 0 "const0_operand")))
 
+; Return true when OP is a nonimmediate or zero or all ones.
+(define_predicate "nonimm_or_0_or_1s_operand"
+  (ior (match_operand 0 "nonimmediate_operand")
+       (match_operand 0 "const0_operand")
+       (match_operand 0 "int_float_vector_all_ones_operand")))
+
 ;; Return true for RTX codes that force SImode address.
 (define_predicate "SImode_address_operand"
   (match_code "subreg,zero_extend,and"))
@@ -1390,8 +1425,8 @@
 	}
       if (TARGET_64BIT
 	  && flag_pic
-	  && (GET_CODE (disp) == SYMBOL_REF
-	      || GET_CODE (disp) == LABEL_REF))
+	  && (SYMBOL_REF_P (disp)
+	      || LABEL_REF_P (disp)))
 	return false;
     }
 
@@ -1682,10 +1717,14 @@
 (define_predicate "div_operator"
   (match_code "div"))
 
-;; Return true if this is a and, ior or xor operation.
+;; Return true if this is an and, ior or xor operation.
 (define_predicate "logic_operator"
   (match_code "and,ior,xor"))
 
+;; Return true if this is an and operation.
+(define_predicate "and_operator"
+  (match_code "and"))
+
 ;; Return true if this is a plus, minus, and, ior or xor operation.
 (define_predicate "plusminuslogic_operator"
   (match_code "plus,minus,and,ior,xor"))
@@ -1708,8 +1747,12 @@
 (define_predicate "compare_operator"
   (match_code "compare"))
 
-(define_predicate "extract_operator"
-  (match_code "zero_extract,sign_extract"))
+(define_predicate "extract_high_operator"
+  (match_code "zero_extract,sign_extract,ashiftrt,lshiftrt")
+{
+  return (const8_operand (XEXP (op, 1), VOIDmode)
+	  && (BINARY_P (op) || const8_operand (XEXP (op, 2), VOIDmode)));
+})
 
 ;; Return true if OP is a memory operand, aligned to
 ;; less than its natural alignment.
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index b280676..73906b8 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -279,63 +279,66 @@
 
 ;; All vector modes including V?TImode, used in move patterns.
 (define_mode_iterator VMOVE
-  [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
-   (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI
-   (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512")  (V4DI "TARGET_AVX") V2DI
-   (V4TI "TARGET_AVX512F && TARGET_EVEX512") (V2TI "TARGET_AVX") V1TI
-   (V32HF "TARGET_AVX512F && TARGET_EVEX512") (V16HF "TARGET_AVX") V8HF
-   (V32BF "TARGET_AVX512F && TARGET_EVEX512") (V16BF "TARGET_AVX") V8BF
-   (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
-   (V8DF "TARGET_AVX512F && TARGET_EVEX512")  (V4DF "TARGET_AVX") V2DF])
+  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
+   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
+   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+   (V8DI "TARGET_AVX512F")  (V4DI "TARGET_AVX") V2DI
+   (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX") V1TI
+   (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF
+   (V32BF "TARGET_AVX512F") (V16BF "TARGET_AVX") V8BF
+   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+   (V8DF "TARGET_AVX512F")  (V4DF "TARGET_AVX") V2DF])
 
 ;; All AVX-512{F,VL} vector modes without HF. Supposed TARGET_AVX512F baseline.
 (define_mode_iterator V48_AVX512VL
-  [(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
-   (V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
-   (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
-   (V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+  [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+   V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
+   V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
+   V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
 
 (define_mode_iterator V48_256_512_AVX512VL
-  [(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL")
-   (V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL")
-   (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL")
-   (V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL")])
+  [V16SI (V8SI "TARGET_AVX512VL")
+   V8DI (V4DI "TARGET_AVX512VL")
+   V16SF (V8SF "TARGET_AVX512VL")
+   V8DF (V4DF "TARGET_AVX512VL")])
 
 ;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline.
 (define_mode_iterator V48H_AVX512VL
-  [(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
-   (V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
-   (V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")
+  [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+   V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
+   (V32HF "TARGET_AVX512FP16")
    (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
    (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
-   (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
-   (V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+   V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
+   V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
 
 ;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline.
 (define_mode_iterator VI12_AVX512VL
-  [(V64QI "TARGET_EVEX512") (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
-   (V32HI "TARGET_EVEX512") (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
+  [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
+   V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
 
 (define_mode_iterator VI12HFBF_AVX512VL
-  [(V64QI "TARGET_EVEX512") (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
-   (V32HI "TARGET_EVEX512") (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
-   (V32HF "TARGET_EVEX512") (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")
-   (V32BF "TARGET_EVEX512") (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")])
+  [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
+   V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
+   V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")
+   V32BF (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")])
 
 (define_mode_iterator VI1_AVX512VL
-  [(V64QI "TARGET_EVEX512") (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
+  [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
+
+(define_mode_iterator VI1_AVX512_3264
+  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX")])
 
 ;; All vector modes
 (define_mode_iterator V
-  [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
-   (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI
-   (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512")  (V4DI "TARGET_AVX") V2DI
-   (V32HF "TARGET_AVX512F && TARGET_EVEX512") (V16HF "TARGET_AVX") V8HF
-   (V32BF "TARGET_AVX512F && TARGET_EVEX512") (V16BF "TARGET_AVX") V8BF
-   (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
-   (V8DF "TARGET_AVX512F && TARGET_EVEX512")  (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
+  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
+   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
+   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+   (V8DI "TARGET_AVX512F")  (V4DI "TARGET_AVX") V2DI
+   (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF
+   (V32BF "TARGET_AVX512F") (V16BF "TARGET_AVX") V8BF
+   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+   (V8DF "TARGET_AVX512F")  (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
 
 ;; All 128bit vector modes
 (define_mode_iterator V_128
@@ -352,54 +355,44 @@
 
 ;; All 512bit vector modes
 (define_mode_iterator V_512
-  [(V64QI "TARGET_EVEX512") (V32HI "TARGET_EVEX512")
-   (V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")
-   (V16SF "TARGET_EVEX512") (V8DF "TARGET_EVEX512")
-   (V32HF "TARGET_EVEX512") (V32BF "TARGET_EVEX512")])
+  [V64QI V32HI V16SI V8DI
+   V16SF V8DF V32HF V32BF])
 
 ;; All 256bit and 512bit vector modes
 (define_mode_iterator V_256_512
   [V32QI V16HI V16HF V16BF V8SI V4DI V8SF V4DF
-   (V64QI "TARGET_AVX512F && TARGET_EVEX512")
-   (V32HI "TARGET_AVX512F && TARGET_EVEX512")
-   (V32HF "TARGET_AVX512F && TARGET_EVEX512")
-   (V32BF "TARGET_AVX512F && TARGET_EVEX512")
-   (V16SI "TARGET_AVX512F && TARGET_EVEX512")
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512")
-   (V16SF "TARGET_AVX512F && TARGET_EVEX512")
-   (V8DF "TARGET_AVX512F && TARGET_EVEX512")])
+   (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F")
+   (V32HF "TARGET_AVX512F") (V32BF "TARGET_AVX512F")
+   (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
+   (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
 
 ;; All vector float modes
 (define_mode_iterator VF
-  [(V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
-   (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX")
+  [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
    (V2DF "TARGET_SSE2")])
 
 (define_mode_iterator VF1_VF2_AVX512DQ
-  [(V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
-   (V8DF "TARGET_AVX512DQ && TARGET_EVEX512")
+  [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+   (V8DF "TARGET_AVX512DQ")
    (V4DF "TARGET_AVX512DQ && TARGET_AVX512VL")
    (V2DF "TARGET_AVX512DQ && TARGET_AVX512VL")])
 
-(define_mode_iterator VF1_VF2_AVX10_2
-  [(V16SF "TARGET_AVX10_2") V8SF V4SF
-   (V8DF "TARGET_AVX10_2") V4DF V2DF])
-
 (define_mode_iterator VFH
-  [(V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")
+  [(V32HF "TARGET_AVX512FP16")
    (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
    (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
-   (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
-   (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX")
+   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
    (V2DF "TARGET_SSE2")])
 
 (define_mode_iterator VF_BHSD
-  [(V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")
+  [(V32HF "TARGET_AVX512FP16")
    (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
    (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
-   (V16SF "TARGET_AVX512F && TARGET_EVEX512")
+   (V16SF "TARGET_AVX512F")
    (V8SF "TARGET_AVX") V4SF
-   (V8DF "TARGET_AVX512F && TARGET_EVEX512")
+   (V8DF "TARGET_AVX512F")
    (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
    (V32BF "TARGET_AVX10_2")
    (V16BF "TARGET_AVX10_2")
@@ -408,12 +401,12 @@
 
 ;; 128-, 256- and 512-bit float vector modes for bitwise operations
 (define_mode_iterator VFB
-  [(V32BF "TARGET_AVX512F && TARGET_EVEX512")
+  [(V32BF "TARGET_AVX512F")
    (V16BF "TARGET_AVX") (V8BF "TARGET_SSE2")
-   (V32HF "TARGET_AVX512F && TARGET_EVEX512")
+   (V32HF "TARGET_AVX512F")
    (V16HF "TARGET_AVX") (V8HF "TARGET_SSE2")
-   (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
-   (V8DF "TARGET_AVX512F && TARGET_EVEX512")
+   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+   (V8DF "TARGET_AVX512F")
    (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
 
 ;; 128- and 256-bit float vector modes
@@ -430,44 +423,39 @@
 
 ;; All SFmode vector float modes
 (define_mode_iterator VF1
-  [(V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF])
+  [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
 
 (define_mode_iterator VF1_AVX2
-  [(V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX2") V4SF])
+  [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX2") V4SF])
 
 ;; 128- and 256-bit SF vector modes
 (define_mode_iterator VF1_128_256
   [(V8SF "TARGET_AVX") V4SF])
 
 (define_mode_iterator VF1_128_256VL
-  [(V8SF "TARGET_EVEX512") (V4SF "TARGET_AVX512VL")])
+  [V8SF (V4SF "TARGET_AVX512VL")])
 
 ;; All DFmode vector float modes
 (define_mode_iterator VF2
-  [(V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX") V2DF])
-
-(define_mode_iterator VF2_AVX10_2
-  [(V8DF "TARGET_AVX10_2") V4DF V2DF])
+  [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
 
 ;; All DFmode & HFmode & BFmode vector float modes
 (define_mode_iterator VF2HB
-  [(V32BF "TARGET_AVX10_2")
-   (V16BF "TARGET_AVX10_2")
-   (V8BF "TARGET_AVX10_2")
-   (V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")
+  [(V32BF "TARGET_AVX10_2") (V16BF "TARGET_AVX10_2")
+   (V8BF "TARGET_AVX10_2") (V32HF "TARGET_AVX512FP16")
    (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
    (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
-   (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX") V2DF])
+   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
 
 ;; 128- and 256-bit DF vector modes
 (define_mode_iterator VF2_128_256
   [(V4DF "TARGET_AVX") V2DF])
 
 (define_mode_iterator VF2_512_256
-  [(V8DF "TARGET_AVX512F && TARGET_EVEX512") V4DF])
+  [(V8DF "TARGET_AVX512F") V4DF])
 
 (define_mode_iterator VF2_512_256VL
-  [(V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL")])
+  [V8DF (V4DF "TARGET_AVX512VL")])
 
 ;; All 128bit vector SF/DF modes
 (define_mode_iterator VF_128
@@ -484,116 +472,102 @@
 
 ;; All 512bit vector float modes
 (define_mode_iterator VF_512
-  [(V16SF "TARGET_EVEX512") (V8DF "TARGET_EVEX512")])
+  [V16SF V8DF])
 
 ;; All 512bit vector float modes for bitwise operations
 (define_mode_iterator VFB_512
-  [(V32BF "TARGET_EVEX512")
-   (V32HF "TARGET_EVEX512")
-   (V16SF "TARGET_EVEX512")
-   (V8DF "TARGET_EVEX512")])
+  [V32BF V32HF V16SF V8DF])
 
 (define_mode_iterator V24F_128
   [V4SF V8HF V8BF])
 
 (define_mode_iterator VI48_AVX512VL
-  [(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
-   (V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
+  [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+   V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
 
 (define_mode_iterator VI1248_AVX512VLBW
-  [(V64QI "TARGET_AVX512BW && TARGET_EVEX512")
+  [(V64QI "TARGET_AVX512BW")
    (V32QI "TARGET_AVX512VL && TARGET_AVX512BW")
    (V16QI "TARGET_AVX512VL && TARGET_AVX512BW")
-   (V32HI "TARGET_AVX512BW && TARGET_EVEX512")
+   (V32HI "TARGET_AVX512BW")
    (V16HI "TARGET_AVX512VL && TARGET_AVX512BW")
    (V8HI "TARGET_AVX512VL && TARGET_AVX512BW")
-   (V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
-   (V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
-
-(define_mode_iterator VI1248_AVX10_2
-  [(V64QI "TARGET_AVX10_2") V32QI V16QI
-   (V32HI "TARGET_AVX10_2") V16HI V8HI
-   (V16SI "TARGET_AVX10_2") V8SI V4SI
-   (V8DI "TARGET_AVX10_2") V4DI V2DI])
+   V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+   V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
 
 (define_mode_iterator VF_AVX512VL
-  [(V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
-   (V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+  [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
+   V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
 
 (define_mode_iterator VFH_AVX512VL
-  [(V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")
+  [(V32HF "TARGET_AVX512FP16")
    (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
    (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
-   (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
-   (V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+   V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
+   V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+
+(define_mode_iterator V48_AVX512VL_4
+  [(V4SF "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
+   (V4SI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL")])
+
+(define_mode_iterator VI48_AVX512VL_4
+  [(V4SI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL")])
 
-(define_mode_iterator VFH_AVX10_2
-  [(V32HF "TARGET_AVX10_2") V16HF V8HF
-   (V16SF "TARGET_AVX10_2") V8SF V4SF
-   (V8DF "TARGET_AVX10_2") V4DF V2DF])
+(define_mode_iterator V8_AVX512VL_2
+  [(V2DF "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
 
 (define_mode_iterator VF2_AVX512VL
-  [(V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+  [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
 
 (define_mode_iterator VF1_AVX512VL
-  [(V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
+  [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
 
 (define_mode_iterator VF1_AVX512BW
-  [(V16SF "TARGET_AVX512BW && TARGET_EVEX512") (V8SF "TARGET_AVX2") V4SF])
-
-(define_mode_iterator VF1_AVX10_2
-  [(V16SF "TARGET_AVX10_2") V8SF V4SF])
+  [(V16SF "TARGET_AVX512BW") (V8SF "TARGET_AVX2") V4SF])
 
 (define_mode_iterator VHFBF
-  [(V32HF "TARGET_EVEX512") V16HF V8HF
-   (V32BF "TARGET_EVEX512") V16BF V8BF])
+  [V32HF V16HF V8HF V32BF V16BF V8BF])
 (define_mode_iterator VHFBF_256 [V16HF V16BF])
 (define_mode_iterator VHFBF_128 [V8HF V8BF])
 
 (define_mode_iterator VHF_AVX512VL
-  [(V32HF "TARGET_EVEX512") (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")])
+  [V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")])
 
 (define_mode_iterator VHFBF_AVX512VL
-  [(V32HF "TARGET_EVEX512") (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")
-   (V32BF "TARGET_EVEX512") (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")])
-
-(define_mode_iterator VHF_AVX10_2
-  [(V32HF "TARGET_AVX10_2") V16HF V8HF])
+  [V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")
+   V32BF (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")])
 
-(define_mode_iterator VBF_AVX10_2
-  [(V32BF "TARGET_AVX10_2") V16BF V8BF])
+(define_mode_iterator VBF
+  [V32BF V16BF V8BF])
 
 ;; All vector integer modes
 (define_mode_iterator VI
-  [(V16SI "TARGET_AVX512F && TARGET_EVEX512")
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512")
-   (V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
-   (V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI
+  [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
+   (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
+   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
    (V8SI "TARGET_AVX") V4SI
    (V4DI "TARGET_AVX") V2DI])
 
 ;; All vector integer and HF modes
 (define_mode_iterator VIHFBF
-  [(V16SI "TARGET_AVX512F && TARGET_EVEX512")
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512")
-   (V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
-   (V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI
-   (V8SI "TARGET_AVX") V4SI
-   (V4DI "TARGET_AVX") V2DI
-   (V32HF "TARGET_AVX512BW && TARGET_EVEX512") (V16HF "TARGET_AVX") V8HF
-   (V32BF "TARGET_AVX512BW && TARGET_EVEX512") (V16BF "TARGET_AVX") V8BF])
+  [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
+   (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
+   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
+   (V8SI "TARGET_AVX") V4SI (V4DI "TARGET_AVX") V2DI
+   (V32HF "TARGET_AVX512BW") (V16HF "TARGET_AVX") V8HF
+   (V32BF "TARGET_AVX512BW") (V16BF "TARGET_AVX") V8BF])
 
 (define_mode_iterator VI_AVX2
-  [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI
-   (V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI
-   (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX2") V4SI
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX2") V2DI])
+  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
+   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
+   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
+   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
 
 (define_mode_iterator VI_AVX_AVX512F
-  [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
-   (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI
-   (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI])
+  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
+   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
+   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
 
 ;; All QImode vector integer modes
 (define_mode_iterator VI1
@@ -611,56 +585,50 @@
   (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")])
 
 (define_mode_iterator VI8
-  [(V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI])
-
-(define_mode_iterator VI8_AVX10_2
-  [(V8DI "TARGET_AVX10_2") V4DI V2DI])
+  [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
 
 (define_mode_iterator VI8_FVL
-  [(V8DI "TARGET_AVX512F && TARGET_EVEX512") V4DI (V2DI "TARGET_AVX512VL")])
+  [(V8DI "TARGET_AVX512F") V4DI (V2DI "TARGET_AVX512VL")])
 
 (define_mode_iterator VI8_AVX512VL
-  [(V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
+  [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
 
 (define_mode_iterator VI8_256_512
-  [(V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL")])
+  [V8DI (V4DI "TARGET_AVX512VL")])
 
 (define_mode_iterator VI1_AVX2
   [(V32QI "TARGET_AVX2") V16QI])
 
 (define_mode_iterator VI1_AVX512
-  [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI])
+  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
 
 (define_mode_iterator VI1_AVX512F
-  [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI])
+  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI])
 
 (define_mode_iterator VI1_AVX512VNNI
-  [(V64QI "TARGET_AVX512VNNI && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI])
+  [(V64QI "TARGET_AVX512VNNI") (V32QI "TARGET_AVX2") V16QI])
 
 (define_mode_iterator VI1_AVX512VNNIBW
-  [(V64QI "(TARGET_AVX512BW || TARGET_AVX512VNNI) && TARGET_EVEX512")
+  [(V64QI "TARGET_AVX512BW || TARGET_AVX512VNNI")
    (V32QI "TARGET_AVX2") V16QI])
 
 (define_mode_iterator VI12_256_512_AVX512VL
-  [(V64QI "TARGET_EVEX512") (V32QI "TARGET_AVX512VL")
-   (V32HI "TARGET_EVEX512") (V16HI "TARGET_AVX512VL")])
+  [V64QI (V32QI "TARGET_AVX512VL")
+   V32HI (V16HI "TARGET_AVX512VL")])
 
 (define_mode_iterator VI2_AVX2
   [(V16HI "TARGET_AVX2") V8HI])
 
 (define_mode_iterator VI2_AVX2_AVX512BW
-  [(V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI])
+  [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
 
 (define_mode_iterator VI2_AVX512F
-  [(V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI])
+  [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
 
 (define_mode_iterator VI2_AVX512VNNIBW
-  [(V32HI "(TARGET_AVX512BW || TARGET_AVX512VNNI) && TARGET_EVEX512")
+  [(V32HI "TARGET_AVX512BW || TARGET_AVX512VNNI")
    (V16HI "TARGET_AVX2") V8HI])
 
-(define_mode_iterator VI2_AVX10_2
-  [(V32HI "TARGET_AVX10_2") V16HI V8HI])
-
 (define_mode_iterator VI4_AVX
   [(V8SI "TARGET_AVX") V4SI])
 
@@ -668,65 +636,64 @@
   [(V8SI "TARGET_AVX2") V4SI])
 
 (define_mode_iterator VI4_AVX512F
-  [(V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX2") V4SI])
+  [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
 
 (define_mode_iterator VI4_AVX512VL
-  [(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
+  [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
 
 (define_mode_iterator VI4_AVX10_2
   [(V16SI "TARGET_AVX10_2") V8SI V4SI])
 
 (define_mode_iterator VI48_AVX512F_AVX512VL
-  [V4SI V8SI (V16SI "TARGET_AVX512F && TARGET_EVEX512")
+  [V4SI V8SI (V16SI "TARGET_AVX512F")
    (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL")
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512")])
+   (V8DI "TARGET_AVX512F")])
 
 (define_mode_iterator VI2_AVX512VL
-  [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") (V32HI "TARGET_EVEX512")])
+  [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI])
 
 (define_mode_iterator VI2HFBF_AVX512VL
-  [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") (V32HI "TARGET_EVEX512")
-   (V8HF "TARGET_AVX512VL") (V16HF "TARGET_AVX512VL") (V32HF "TARGET_EVEX512")
-   (V8BF "TARGET_AVX512VL") (V16BF "TARGET_AVX512VL") (V32BF "TARGET_EVEX512")])
+  [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI
+   (V8HF "TARGET_AVX512VL") (V16HF "TARGET_AVX512VL") V32HF
+   (V8BF "TARGET_AVX512VL") (V16BF "TARGET_AVX512VL") V32BF])
 
 (define_mode_iterator VI2H_AVX512VL
-  [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") (V32HI "TARGET_EVEX512")
-   (V8SI "TARGET_AVX512VL") (V16SI "TARGET_EVEX512")
-   (V8DI "TARGET_EVEX512")])
+  [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI
+   (V8SI "TARGET_AVX512VL") V16SI V8DI])
 
 (define_mode_iterator VI1_AVX512VL_F
-  [V32QI (V16QI "TARGET_AVX512VL") (V64QI "TARGET_AVX512F && TARGET_EVEX512")])
+  [V32QI (V16QI "TARGET_AVX512VL") (V64QI "TARGET_AVX512F")])
 
 (define_mode_iterator VI8_AVX2_AVX512BW
-  [(V8DI "TARGET_AVX512BW && TARGET_EVEX512") (V4DI "TARGET_AVX2") V2DI])
+  [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
 
 (define_mode_iterator VI8_AVX2
   [(V4DI "TARGET_AVX2") V2DI])
 
 (define_mode_iterator VI8_AVX2_AVX512F
-  [(V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX2") V2DI])
+  [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
 
 (define_mode_iterator VI8_AVX_AVX512F
-  [(V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX")])
+  [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")])
 
 (define_mode_iterator VI4_128_8_256
   [V4SI V4DI])
 
 ;; All V8D* modes
 (define_mode_iterator V8FI
-  [(V8DF "TARGET_EVEX512") (V8DI "TARGET_EVEX512")])
+  [V8DF V8DI])
 
 ;; All V16S* modes
 (define_mode_iterator V16FI
-  [(V16SF "TARGET_EVEX512") (V16SI "TARGET_EVEX512")])
+  [V16SF V16SI])
 
 ;; ??? We should probably use TImode instead.
 (define_mode_iterator VIMAX_AVX2_AVX512BW
-  [(V4TI "TARGET_AVX512BW && TARGET_EVEX512") (V2TI "TARGET_AVX2") V1TI])
+  [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
 
 ;; Suppose TARGET_AVX512BW as baseline
 (define_mode_iterator VIMAX_AVX512VL
-  [(V4TI "TARGET_EVEX512") (V2TI "TARGET_AVX512VL") (V1TI "TARGET_AVX512VL")])
+  [V4TI (V2TI "TARGET_AVX512VL") (V1TI "TARGET_AVX512VL")])
 
 (define_mode_iterator VIMAX_AVX2
   [(V2TI "TARGET_AVX2") V1TI])
@@ -736,17 +703,17 @@
    (V16HI "TARGET_AVX2") V8HI])
 
 (define_mode_iterator VI12_AVX2_AVX512BW
-  [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI
-   (V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI])
+  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
+   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
 
 (define_mode_iterator VI24_AVX2
   [(V16HI "TARGET_AVX2") V8HI
    (V8SI "TARGET_AVX2") V4SI])
 
 (define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW
-  [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI
-   (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI
-   (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX2") V4SI])
+  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
+   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
+   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
 
 (define_mode_iterator VI124_AVX2
   [(V32QI "TARGET_AVX2") V16QI
@@ -754,17 +721,17 @@
    (V8SI "TARGET_AVX2") V4SI])
 
 (define_mode_iterator VI248_AVX512VL
-  [(V32HI "TARGET_EVEX512") (V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")
+  [V32HI V16SI V8DI
    (V16HI "TARGET_AVX512VL") (V8SI "TARGET_AVX512VL")
    (V4DI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
    (V4SI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
 
 (define_mode_iterator VI248_AVX512VLBW
-  [(V32HI "TARGET_AVX512BW && TARGET_EVEX512")
+  [(V32HI "TARGET_AVX512BW")
    (V16HI "TARGET_AVX512VL && TARGET_AVX512BW")
    (V8HI "TARGET_AVX512VL && TARGET_AVX512BW")
-   (V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
-   (V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
+   V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+   V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
 
 (define_mode_iterator VI48_AVX2
   [(V8SI "TARGET_AVX2") V4SI
@@ -776,17 +743,16 @@
    (V4DI "TARGET_AVX2") V2DI])
 
 (define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW
-  [(V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI
-   (V16SI "TARGET_AVX512BW && TARGET_EVEX512") (V8SI "TARGET_AVX2") V4SI
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX2") V2DI])
+  [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
+   (V16SI "TARGET_AVX512BW") (V8SI "TARGET_AVX2") V4SI
+   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
 
 (define_mode_iterator VI248_AVX512BW
-  [(V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16SI "TARGET_EVEX512")
-   (V8DI "TARGET_EVEX512")])
+  [(V32HI "TARGET_AVX512BW") V16SI V8DI])
 
 (define_mode_iterator VI248_AVX512BW_AVX512VL
-  [(V32HI "TARGET_AVX512BW && TARGET_EVEX512") 
-   (V4DI "TARGET_AVX512VL") (V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")])
+  [(V32HI "TARGET_AVX512BW")
+   (V4DI "TARGET_AVX512VL") V16SI V8DI])
 
 ;; Suppose TARGET_AVX512VL as baseline
 (define_mode_iterator VI248_AVX512BW_1
@@ -800,16 +766,16 @@
   V4DI V2DI])
    
 (define_mode_iterator VI48_AVX512F
-  [(V16SI "TARGET_AVX512F && TARGET_EVEX512") V8SI V4SI
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512") V4DI V2DI])
+  [(V16SI "TARGET_AVX512F") V8SI V4SI
+   (V8DI "TARGET_AVX512F") V4DI V2DI])
 
 (define_mode_iterator VI48_AVX_AVX512F
-  [(V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI])
+  [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
 
 (define_mode_iterator VI12_AVX_AVX512F
-  [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
-   (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI])
+  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
+   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI])
 
 (define_mode_iterator V48_128_256
   [V4SF V2DF
@@ -950,10 +916,10 @@
 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
 (define_mode_iterator VI248_256 [V16HI V8SI V4DI])
 (define_mode_iterator VI248_512
-  [(V32HI "TARGET_EVEX512") (V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")])
+  [V32HI V16SI V8DI])
 (define_mode_iterator VI48_128 [V4SI V2DI])
 (define_mode_iterator VI148_512
-  [(V64QI "TARGET_EVEX512") (V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")])
+  [V64QI V16SI V8DI])
 (define_mode_iterator VI148_256 [V32QI V8SI V4DI])
 (define_mode_iterator VI148_128 [V16QI V4SI V2DI])
 
@@ -961,75 +927,62 @@
 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
 (define_mode_iterator VI124_256_AVX512F_AVX512BW
   [V32QI V16HI V8SI
-   (V64QI "TARGET_AVX512BW && TARGET_EVEX512")
-   (V32HI "TARGET_AVX512BW && TARGET_EVEX512")
-   (V16SI "TARGET_AVX512F && TARGET_EVEX512")])
+   (V64QI "TARGET_AVX512BW") (V32HI "TARGET_AVX512BW")
+   (V16SI "TARGET_AVX512F")])
 (define_mode_iterator VI48_256 [V8SI V4DI])
 (define_mode_iterator VI48_512
-  [(V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")])
+  [V16SI V8DI])
 (define_mode_iterator VI4_256_8_512 [V8SI V8DI])
 (define_mode_iterator VI_AVX512BW
-  [(V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")
-   (V32HI "TARGET_AVX512BW && TARGET_EVEX512")
-   (V64QI "TARGET_AVX512BW && TARGET_EVEX512")])
+  [V16SI V8DI
+   (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
 (define_mode_iterator VIHFBF_AVX512BW
-  [(V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")
-   (V32HI "TARGET_AVX512BW && TARGET_EVEX512")
-   (V64QI "TARGET_AVX512BW && TARGET_EVEX512")
-   (V32HF "TARGET_AVX512BW && TARGET_EVEX512")
-   (V32BF "TARGET_AVX512BW && TARGET_EVEX512")])
+  [V16SI V8DI
+   (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")
+   (V32HF "TARGET_AVX512BW") (V32BF "TARGET_AVX512BW")])
 
 ;; Int-float size matches
 (define_mode_iterator VI2F_256_512
-  [V16HI (V32HI "TARGET_EVEX512")
-   V16HF (V32HF "TARGET_EVEX512")
-   V16BF (V32BF "TARGET_EVEX512")])
+  [V16HI V32HI V16HF V32HF V16BF V32BF])
 (define_mode_iterator VI4F_128 [V4SI V4SF])
 (define_mode_iterator VI8F_128 [V2DI V2DF])
 (define_mode_iterator VI4F_256 [V8SI V8SF])
 (define_mode_iterator VI8F_256 [V4DI V4DF])
 (define_mode_iterator VI4F_256_512
-  [V8SI V8SF
-   (V16SI "TARGET_AVX512F && TARGET_EVEX512")
-   (V16SF "TARGET_AVX512F && TARGET_EVEX512")])
+  [V8SI V8SF (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")])
 (define_mode_iterator VI48F_256_512
   [V8SI V8SF
-  (V16SI "TARGET_AVX512F && TARGET_EVEX512")
-  (V16SF "TARGET_AVX512F && TARGET_EVEX512")
-  (V8DI "TARGET_AVX512F && TARGET_EVEX512")
-  (V8DF "TARGET_AVX512F && TARGET_EVEX512")
-  (V4DI "TARGET_AVX512VL") (V4DF  "TARGET_AVX512VL")])
+   (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
+   (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
+   (V4DI "TARGET_AVX512VL") (V4DF  "TARGET_AVX512VL")])
 (define_mode_iterator VF48H_AVX512VL
-  [(V8DF "TARGET_EVEX512") (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL")])
+  [V8DF V16SF (V8SF "TARGET_AVX512VL")])
 
 (define_mode_iterator VF48_128
   [V2DF V4SF])
 
 (define_mode_iterator VI48F
-  [(V16SI "TARGET_EVEX512") (V16SF "TARGET_EVEX512")
-   (V8DI "TARGET_EVEX512") (V8DF "TARGET_EVEX512")
+  [V16SI V16SF V8DI V8DF
    (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
    (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
    (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
    (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
 (define_mode_iterator VI12_VI48F_AVX512VL
-  [(V16SI "TARGET_AVX512F && TARGET_EVEX512")
-   (V16SF "TARGET_AVX512F && TARGET_EVEX512")
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512")
-   (V8DF "TARGET_AVX512F && TARGET_EVEX512")
+  [(V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
+   (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
    (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
    (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
    (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
    (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
-   (V64QI "TARGET_EVEX512") (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
-   (V32HI "TARGET_EVEX512") (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
+   V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
+   V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
 
 (define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
 
 (define_mode_iterator V8_128 [V8HI V8HF V8BF])
 (define_mode_iterator V16_256 [V16HI V16HF V16BF])
 (define_mode_iterator V32_512
- [(V32HI "TARGET_EVEX512") (V32HF "TARGET_EVEX512") (V32BF "TARGET_EVEX512")])
+ [V32HI V32HF V32BF])
 
 ;; Mapping from float mode to required SSE level
 (define_mode_attr sse
@@ -1441,7 +1394,7 @@
 ;; Mix-n-match
 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
 (define_mode_iterator AVX512MODE2P
-  [(V16SI "TARGET_EVEX512") (V16SF "TARGET_EVEX512") (V8DF "TARGET_EVEX512")])
+  [V16SI V16SF V8DF])
 
 ;; Mapping for dbpsabbw modes
 (define_mode_attr dbpsadbwmode
@@ -1639,6 +1592,44 @@
   "&& 1"
   [(set (match_dup 0) (match_dup 1))])
 
+(define_insn_and_split "*<avx512>_load<mode>mask_and15"
+  [(set (match_operand:V48_AVX512VL_4 0 "register_operand" "=v")
+	(vec_merge:V48_AVX512VL_4
+	 (unspec:V48_AVX512VL_4
+	  [(match_operand:V48_AVX512VL_4 1 "memory_operand" "m")]
+	  UNSPEC_MASKLOAD)
+	 (match_operand:V48_AVX512VL_4 2 "nonimm_or_0_operand" "0C")
+	 (and:QI
+	  (match_operand:QI 3 "register_operand" "Yk")
+	  (const_int 15))))]
+  "TARGET_AVX512F"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(vec_merge:V48_AVX512VL_4
+	 (unspec:V48_AVX512VL_4 [(match_dup 1)] UNSPEC_MASKLOAD)
+	 (match_dup 2)
+	 (match_dup 3)))])
+
+(define_insn_and_split "*<avx512>_load<mode>mask_and3"
+  [(set (match_operand:V8_AVX512VL_2 0 "register_operand" "=v")
+	(vec_merge:V8_AVX512VL_2
+	 (unspec:V8_AVX512VL_2
+	  [(match_operand:V8_AVX512VL_2 1 "memory_operand" "m")]
+	  UNSPEC_MASKLOAD)
+	 (match_operand:V8_AVX512VL_2 2 "nonimm_or_0_operand" "0C")
+	 (and:QI
+	  (match_operand:QI 3 "register_operand" "Yk")
+	  (const_int 3))))]
+  "TARGET_AVX512F"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(vec_merge:V8_AVX512VL_2
+	 (unspec:V8_AVX512VL_2 [(match_dup 1)] UNSPEC_MASKLOAD)
+	 (match_dup 2)
+	 (match_dup 3)))])
+
 (define_expand "<avx512>_load<mode>_mask"
   [(set (match_operand:VI12_AVX512VL 0 "register_operand")
 	(vec_merge:VI12_AVX512VL
@@ -2049,11 +2040,9 @@
 (define_mode_iterator STORENT_MODE
   [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
    (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512")
-   (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
-   (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
-   (V8DF "TARGET_AVX512F && TARGET_EVEX512")
-   (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
+   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
+   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
 
 (define_expand "storent<mode>"
   [(set (match_operand:STORENT_MODE 0 "memory_operand")
@@ -2857,10 +2846,10 @@
 })
 
 (define_expand "div<mode>3"
-  [(set (match_operand:VBF_AVX10_2 0 "register_operand")
-	(div:VBF_AVX10_2
-	  (match_operand:VBF_AVX10_2 1 "register_operand")
-	  (match_operand:VBF_AVX10_2 2 "vector_operand")))]
+  [(set (match_operand:VBF 0 "register_operand")
+	(div:VBF
+	  (match_operand:VBF 1 "register_operand")
+	  (match_operand:VBF 2 "vector_operand")))]
   "TARGET_AVX10_2"
 {
   if (TARGET_RECIP_VEC_DIV
@@ -3897,15 +3886,12 @@
 (define_mode_iterator REDUC_PLUS_MODE
  [(V4DF "TARGET_AVX") (V8SF "TARGET_AVX")
   (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
-  (V8DF "TARGET_AVX512F && TARGET_EVEX512")
-  (V16SF "TARGET_AVX512F && TARGET_EVEX512")
-  (V32HF "TARGET_AVX512FP16 && TARGET_AVX512VL && TARGET_EVEX512")
+  (V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
+  (V32HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
   (V32QI "TARGET_AVX") (V16HI "TARGET_AVX")
   (V8SI "TARGET_AVX")  (V4DI "TARGET_AVX")
-  (V64QI "TARGET_AVX512F && TARGET_EVEX512")
-  (V32HI "TARGET_AVX512F && TARGET_EVEX512")
-  (V16SI "TARGET_AVX512F && TARGET_EVEX512")
-  (V8DI "TARGET_AVX512F && TARGET_EVEX512")])
+  (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F")
+  (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
 
 (define_expand "reduc_plus_scal_<mode>"
  [(plus:REDUC_PLUS_MODE
@@ -3948,13 +3934,11 @@
    (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
    (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
    (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
-   (V64QI "TARGET_AVX512BW && TARGET_EVEX512")
-   (V32HF "TARGET_AVX512FP16 && TARGET_AVX512VL && TARGET_EVEX512")
-   (V32HI "TARGET_AVX512BW && TARGET_EVEX512")
-   (V16SI "TARGET_AVX512F && TARGET_EVEX512")
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512")
-   (V16SF "TARGET_AVX512F && TARGET_EVEX512")
-   (V8DF "TARGET_AVX512F && TARGET_EVEX512")])
+   (V64QI "TARGET_AVX512BW")
+   (V32HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
+   (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
+   (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
+   (V8DF "TARGET_AVX512F")])
 
 (define_expand "reduc_<code>_scal_<mode>"
   [(smaxmin:REDUC_SMINMAX_MODE
@@ -4063,10 +4047,8 @@
 (define_mode_iterator REDUC_ANY_LOGIC_MODE
  [(V32QI "TARGET_AVX") (V16HI "TARGET_AVX")
   (V8SI "TARGET_AVX")  (V4DI "TARGET_AVX")
-  (V64QI "TARGET_AVX512F && TARGET_EVEX512")
-  (V32HI "TARGET_AVX512F && TARGET_EVEX512")
-  (V16SI "TARGET_AVX512F && TARGET_EVEX512")
-  (V8DI "TARGET_AVX512F && TARGET_EVEX512")])
+  (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F")
+  (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
 
 (define_expand "reduc_<code>_scal_<mode>"
  [(any_logic:REDUC_ANY_LOGIC_MODE
@@ -4410,7 +4392,7 @@
 	  (unspec:<V48H_AVX512VL:avx512fmaskmode>
 	    [(match_operand:V48H_AVX512VL 1 "nonimmediate_operand" "v")
 	     (match_operand:V48H_AVX512VL 2 "nonimmediate_operand" "vm")
-	     (match_operand:SI 3 "const_0_to_7_operand" "n")]
+	     (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
 	    UNSPEC_PCMP)))]
   "TARGET_AVX512F
    && (!VALID_MASK_AVX512BW_MODE (<SWI248x:MODE>mode) || TARGET_AVX512BW)
@@ -4428,7 +4410,7 @@
 	  (unspec:<V48H_AVX512VL:avx512fmaskmode>
 	    [(match_operand:V48H_AVX512VL 1 "nonimmediate_operand")
 	     (match_operand:V48H_AVX512VL 2 "nonimmediate_operand")
-	     (match_operand:SI 3 "const_0_to_7_operand")]
+	     (match_operand:SI 3 "<cmp_imm_predicate>")]
 	    UNSPEC_PCMP)))
    (set (match_operand:<V48H_AVX512VL:avx512fmaskmode> 4 "register_operand")
 	(unspec:<V48H_AVX512VL:avx512fmaskmode>
@@ -4469,7 +4451,8 @@
 	     (match_operand:V48H_AVX512VL 2 "nonimmediate_operand")
 	     (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
 	    UNSPEC_PCMP)))]
-  "TARGET_AVX512F && ix86_pre_reload_split ()"
+  "TARGET_AVX512F && GET_MODE_NUNITS (<MODE>mode) >= 8
+   && ix86_pre_reload_split ()"
   "#"
   "&& 1"
   [(set (match_dup 0)
@@ -4480,6 +4463,70 @@
 	   UNSPEC_PCMP))]
   "operands[4] = GEN_INT (INTVAL (operands[3]) ^ 4);")
 
+(define_insn "*<avx512>_cmp<mode>3_and15"
+  [(set (match_operand:QI 0 "register_operand" "=k")
+	(and:QI
+	  (unspec:QI
+	    [(match_operand:V48_AVX512VL_4 1 "nonimmediate_operand" "v")
+	     (match_operand:V48_AVX512VL_4 2 "nonimmediate_operand" "vm")
+	     (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
+	    UNSPEC_PCMP)
+	  (const_int 15)))]
+  "TARGET_AVX512F"
+  "v<ssecmpintprefix>cmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*<avx512>_ucmp<mode>3_and15"
+  [(set (match_operand:QI 0 "register_operand" "=k")
+	(and:QI
+	  (unspec:QI
+	    [(match_operand:VI48_AVX512VL_4 1 "nonimmediate_operand" "v")
+	     (match_operand:VI48_AVX512VL_4 2 "nonimmediate_operand" "vm")
+	     (match_operand:SI 3 "const_0_to_7_operand" "n")]
+	    UNSPEC_UNSIGNED_PCMP)
+	  (const_int 15)))]
+  "TARGET_AVX512F"
+  "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*<avx512>_cmp<mode>3_and3"
+  [(set (match_operand:QI 0 "register_operand" "=k")
+	(and:QI
+	  (unspec:QI
+	    [(match_operand:V8_AVX512VL_2 1 "nonimmediate_operand" "v")
+	     (match_operand:V8_AVX512VL_2 2 "nonimmediate_operand" "vm")
+	     (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
+	    UNSPEC_PCMP)
+	  (const_int 3)))]
+  "TARGET_AVX512F"
+  "v<ssecmpintprefix>cmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*avx512vl_ucmpv2di3_and3"
+  [(set (match_operand:QI 0 "register_operand" "=k")
+	(and:QI
+	  (unspec:QI
+	    [(match_operand:V2DI 1 "nonimmediate_operand" "v")
+	     (match_operand:V2DI 2 "nonimmediate_operand" "vm")
+	     (match_operand:SI 3 "const_0_to_7_operand" "n")]
+	    UNSPEC_UNSIGNED_PCMP)
+	  (const_int 3)))]
+  "TARGET_AVX512F"
+  "vpcmpuq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "TI")])
+
 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
   [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
 	(unspec:<avx512fmaskmode>
@@ -4762,7 +4809,8 @@
 	     (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")
 	     (match_operand:SI 3 "const_0_to_7_operand")]
 	    UNSPEC_UNSIGNED_PCMP)))]
-  "TARGET_AVX512F && ix86_pre_reload_split ()"
+  "TARGET_AVX512F && ix86_pre_reload_split ()
+   && GET_MODE_NUNITS (<MODE>mode) >= 8"
   "#"
   "&& 1"
   [(set (match_dup 0)
@@ -4923,8 +4971,8 @@
 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
   [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
 	(match_operator:<avx512fmaskmode> 1 ""
-	  [(match_operand:VBF_AVX10_2 2 "register_operand")
-	   (match_operand:VBF_AVX10_2 3 "nonimmediate_operand")]))]
+	  [(match_operand:VBF 2 "register_operand")
+	   (match_operand:VBF 3 "nonimmediate_operand")]))]
   "TARGET_AVX10_2"
 {
   bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
@@ -5142,7 +5190,7 @@
 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
   [(set (match_operand:VI_256_AVX2 0 "register_operand")
 	(vec_merge:VI_256_AVX2
-	  (match_operand:VI_256_AVX2 1 "nonimmediate_operand")
+	  (match_operand:VI_256_AVX2 1 "nonimm_or_0_or_1s_operand")
 	  (match_operand:VI_256_AVX2 2 "nonimm_or_0_operand")
 	  (match_operand:<sseintvecmode> 3 "register_operand")))]
   "TARGET_AVX"
@@ -5155,7 +5203,7 @@
 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
   [(set (match_operand:VI_128 0 "register_operand")
 	(vec_merge:VI_128
-	  (match_operand:VI_128 1 "vector_operand")
+	  (match_operand:VI_128 1 "vector_or_0_or_1s_operand")
 	  (match_operand:VI_128 2 "nonimm_or_0_operand")
 	  (match_operand:<sseintvecmode> 3 "register_operand")))]
   "TARGET_SSE2"
@@ -5168,7 +5216,7 @@
 (define_expand "vcond_mask_v1tiv1ti"
   [(set (match_operand:V1TI 0 "register_operand")
 	(vec_merge:V1TI
-	  (match_operand:V1TI 1 "vector_operand")
+	  (match_operand:V1TI 1 "vector_or_0_or_1s_operand")
 	  (match_operand:V1TI 2 "nonimm_or_0_operand")
 	  (match_operand:V1TI 3 "register_operand")))]
   "TARGET_SSE2"
@@ -5181,7 +5229,7 @@
 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
   [(set (match_operand:VF_256 0 "register_operand")
 	(vec_merge:VF_256
-	  (match_operand:VF_256 1 "nonimmediate_operand")
+	  (match_operand:VF_256 1 "nonimm_or_0_or_1s_operand")
 	  (match_operand:VF_256 2 "nonimm_or_0_operand")
 	  (match_operand:<sseintvecmode> 3 "register_operand")))]
   "TARGET_AVX"
@@ -5194,7 +5242,7 @@
 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
   [(set (match_operand:VF_128 0 "register_operand")
 	(vec_merge:VF_128
-	  (match_operand:VF_128 1 "vector_operand")
+	  (match_operand:VF_128 1 "vector_or_0_or_1s_operand")
 	  (match_operand:VF_128 2 "nonimm_or_0_operand")
 	  (match_operand:<sseintvecmode> 3 "register_operand")))]
   "TARGET_SSE"
@@ -5573,7 +5621,7 @@
   output_asm_insn (buf, operands);
   return "";
 }
-  [(set_attr "isa" "noavx,avx,avx512vl,avx512f_512")
+  [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
    (set_attr "type" "sselog")
    (set_attr "prefix" "orig,vex,evex,evex")
    (set (attr "mode")
@@ -5630,7 +5678,7 @@
   output_asm_insn (buf, operands);
   return "";
 }
-  [(set_attr "isa" "noavx,avx_noavx512vl,avx512vl,avx512f_512")
+  [(set_attr "isa" "noavx,avx_noavx512f,avx512vl,avx512f")
    (set_attr "addr" "*,gpr16,*,*")
    (set_attr "type" "sselog")
    (set (attr "prefix_data16")
@@ -5703,7 +5751,7 @@
   output_asm_insn (buf, operands);
   return "";
 }
-  [(set_attr "isa" "noavx,avx,avx512vl,avx512f_512")
+  [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
    (set_attr "type" "sselog")
    (set_attr "prefix" "orig,vex,evex,evex")
    (set (attr "mode")
@@ -5765,7 +5813,7 @@
   output_asm_insn (buf, operands);
   return "";
 }
-  [(set_attr "isa" "noavx,avx,avx512vl,avx512f_512")
+  [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
    (set_attr "type" "sselog")
    (set (attr "prefix_data16")
      (if_then_else
@@ -5811,15 +5859,10 @@
    (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
    (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
    (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
-   (V16SF "TARGET_AVX512F && TARGET_EVEX512")
-   (V8DF "TARGET_AVX512F && TARGET_EVEX512")
-   (HF "TARGET_AVX512FP16")
-   (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
-   (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
-   (V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")
-   (V8BF "TARGET_AVX10_2")
-   (V16BF "TARGET_AVX10_2")
-   (V32BF "TARGET_AVX10_2")])
+   (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
+   (HF "TARGET_AVX512FP16") (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
+   (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") (V32HF "TARGET_AVX512FP16")
+   (V8BF "TARGET_AVX10_2") (V16BF "TARGET_AVX10_2") (V32BF "TARGET_AVX10_2")])
 
 (define_expand "fma<mode>4"
   [(set (match_operand:FMAMODEM 0 "register_operand")
@@ -5857,8 +5900,7 @@
   (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
   (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
   (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
-  (V16SF "TARGET_AVX512F && TARGET_EVEX512")
-  (V8DF "TARGET_AVX512F && TARGET_EVEX512")])
+  (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
 
 (define_mode_iterator FMAMODE
   [SF DF V4SF V2DF V8SF V4DF])
@@ -5928,14 +5970,12 @@
 
 ;; Suppose AVX-512F as baseline
 (define_mode_iterator VFH_SF_AVX512VL
-  [(V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")
+  [(V32HF "TARGET_AVX512FP16")
    (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
    (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
    (HF "TARGET_AVX512FP16")
-   SF (V16SF "TARGET_EVEX512")
-   (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
-   DF (V8DF "TARGET_EVEX512")
-   (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+   SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
+   DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
 
 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
   [(set (match_operand:VFH_SF_AVX512VL 0 "register_operand" "=v,v,v")
@@ -8683,7 +8723,7 @@
 	(unspec:V16SI
 	  [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
 	  UNSPEC_FIX_NOTRUNC))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
@@ -8751,7 +8791,7 @@
 	(unspec:V16SI
 	  [(match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
 	  UNSPEC_VCVTT_U))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vcvttps2<vcvtt_suffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
@@ -8761,7 +8801,7 @@
   [(set (match_operand:V16SI 0 "register_operand" "=v")
 	(any_fix:V16SI
 	  (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
@@ -9349,7 +9389,7 @@
 		       (const_int 2) (const_int 3)
 		       (const_int 4) (const_int 5)
 		       (const_int 6) (const_int 7)]))))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vcvtdq2pd\t{%t1, %0|%0, %t1}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
@@ -9385,7 +9425,7 @@
 	(unspec:V8SI
 	  [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
 	  UNSPEC_FIX_NOTRUNC))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
@@ -9544,7 +9584,7 @@
 	(unspec:V8SI
 	  [(match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
 	  UNSPEC_VCVTT_U))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vcvttpd2<vcvtt_suffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
@@ -9554,7 +9594,7 @@
   [(set (match_operand:V8SI 0 "register_operand" "=v")
 	(any_fix:V8SI
 	  (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
@@ -10070,7 +10110,7 @@
   [(set (match_operand:V8SF 0 "register_operand" "=v")
 	(float_truncate:V8SF
 	  (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
@@ -10232,7 +10272,7 @@
 		       (const_int 2) (const_int 3)
 		       (const_int 4) (const_int 5)
 		       (const_int 6) (const_int 7)]))))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vcvtps2pd\t{%t1, %0|%0, %t1}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
@@ -10438,7 +10478,7 @@
    (set (match_operand:V8DF 0 "register_operand")
 	(float_extend:V8DF
 	  (match_dup 2)))]
-"TARGET_AVX512F && TARGET_EVEX512"
+"TARGET_AVX512F"
 "operands[2] = gen_reg_rtx (V8SFmode);")
 
 (define_expand "vec_unpacks_lo_v4sf"
@@ -10576,7 +10616,7 @@
    (set (match_operand:V8DF 0 "register_operand")
 	(float:V8DF
 	  (match_dup 2)))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "operands[2] = gen_reg_rtx (V8SImode);")
 
 (define_expand "vec_unpacks_float_lo_v16si"
@@ -10588,7 +10628,7 @@
 		       (const_int 2) (const_int 3)
 		       (const_int 4) (const_int 5)
 		       (const_int 6) (const_int 7)]))))]
-  "TARGET_AVX512F && TARGET_EVEX512")
+  "TARGET_AVX512F")
 
 (define_expand "vec_unpacku_float_hi_v4si"
   [(set (match_dup 5)
@@ -10684,7 +10724,7 @@
 (define_expand "vec_unpacku_float_hi_v16si"
   [(match_operand:V8DF 0 "register_operand")
    (match_operand:V16SI 1 "register_operand")]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
 {
   REAL_VALUE_TYPE TWO32r;
   rtx k, x, tmp[4];
@@ -10733,7 +10773,7 @@
 (define_expand "vec_unpacku_float_lo_v16si"
   [(match_operand:V8DF 0 "register_operand")
    (match_operand:V16SI 1 "nonimmediate_operand")]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
 {
   REAL_VALUE_TYPE TWO32r;
   rtx k, x, tmp[3];
@@ -10827,7 +10867,7 @@
   [(match_operand:V16SI 0 "register_operand")
    (match_operand:V8DF 1 "nonimmediate_operand")
    (match_operand:V8DF 2 "nonimmediate_operand")]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
 {
   rtx r1, r2;
 
@@ -10942,7 +10982,7 @@
   [(match_operand:V16SI 0 "register_operand")
    (match_operand:V8DF 1 "nonimmediate_operand")
    (match_operand:V8DF 2 "nonimmediate_operand")]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
 {
   rtx r1, r2;
 
@@ -11135,7 +11175,7 @@
 		     (const_int 11) (const_int 27)
 		     (const_int 14) (const_int 30)
 		     (const_int 15) (const_int 31)])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
@@ -11223,7 +11263,7 @@
 		     (const_int 9) (const_int 25)
 		     (const_int 12) (const_int 28)
 		     (const_int 13) (const_int 29)])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
@@ -11363,7 +11403,7 @@
 		     (const_int 11) (const_int 11)
 		     (const_int 13) (const_int 13)
 		     (const_int 15) (const_int 15)])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "sse")
    (set_attr "prefix" "evex")
@@ -11416,7 +11456,7 @@
 		     (const_int 10) (const_int 10)
 		     (const_int 12) (const_int 12)
 		     (const_int 14) (const_int 14)])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "sse")
    (set_attr "prefix" "evex")
@@ -12376,9 +12416,7 @@
    (V8SF "32x4") (V8SI "32x4") (V4DF "64x2") (V4DI "64x2")])
 
 (define_mode_iterator AVX512_VEC
-  [(V8DF "TARGET_AVX512DQ && TARGET_EVEX512")
-   (V8DI "TARGET_AVX512DQ && TARGET_EVEX512")
-   (V16SF "TARGET_EVEX512") (V16SI "TARGET_EVEX512")])
+  [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
 
 (define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
   [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
@@ -12547,9 +12585,7 @@
   [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
 
 (define_mode_iterator AVX512_VEC_2
-  [(V16SF "TARGET_AVX512DQ && TARGET_EVEX512")
-   (V16SI "TARGET_AVX512DQ && TARGET_EVEX512")
-   (V8DF "TARGET_EVEX512") (V8DI "TARGET_EVEX512")])
+  [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
 
 (define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
   [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
@@ -12723,7 +12759,7 @@
 	     lo insns have =m and 0C constraints.  */
 	  : (operands[2] != const0_rtx
 	     || (!rtx_equal_p (dest, operands[3])
-		 && GET_CODE (operands[3]) != CONST_VECTOR))))
+		 && !CONST_VECTOR_P (operands[3])))))
     dest = gen_reg_rtx (<ssehalfvecmode>mode);
   switch (INTVAL (operands[2]))
     {
@@ -13110,7 +13146,7 @@
 		     (const_int 26) (const_int 27)
 		     (const_int 28) (const_int 29)
 		     (const_int 30) (const_int 31)])))]
-  "TARGET_AVX512F && TARGET_EVEX512
+  "TARGET_AVX512F
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
   if (TARGET_AVX512VL
@@ -13159,7 +13195,7 @@
 		     (const_int 58) (const_int 59)
 		     (const_int 60) (const_int 61)
 		     (const_int 62) (const_int 63)])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
   [(set_attr "type" "sselog1")
    (set_attr "length_immediate" "1")
@@ -13257,15 +13293,15 @@
 
 ;; Modes handled by vec_extract patterns.
 (define_mode_iterator VEC_EXTRACT_MODE
-  [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
-   (V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI
-   (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI
-   (V32HF "TARGET_AVX512BW && TARGET_EVEX512") (V16HF "TARGET_AVX") V8HF
-   (V32BF "TARGET_AVX512BW && TARGET_EVEX512") (V16BF "TARGET_AVX") V8BF
-   (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
-   (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX") V2DF
-   (V4TI "TARGET_AVX512F && TARGET_EVEX512") (V2TI "TARGET_AVX")])
+  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
+   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
+   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
+   (V32HF "TARGET_AVX512BW") (V16HF "TARGET_AVX") V8HF
+   (V32BF "TARGET_AVX512BW") (V16BF "TARGET_AVX") V8BF
+   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF
+   (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
 
 (define_expand "vec_extract<mode><ssescalarmodelower>"
   [(match_operand:<ssescalarmode> 0 "register_operand")
@@ -13307,7 +13343,7 @@
 		     (const_int 3) (const_int 11)
 		     (const_int 5) (const_int 13)
 		     (const_int 7) (const_int 15)])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
@@ -13421,9 +13457,9 @@
 		     (const_int 2) (const_int 10)
 		     (const_int 4) (const_int 12)
 		     (const_int 6) (const_int 14)])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
-  [(set_attr "type" "sselog1")
+  [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
    (set_attr "mode" "V8DF")])
 
@@ -13437,7 +13473,7 @@
 		     (const_int 2) (const_int 10)
 		     (const_int 4) (const_int 12)
 		     (const_int 6) (const_int 14)])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
@@ -13454,7 +13490,7 @@
 		     (const_int 2) (const_int 6)])))]
   "TARGET_AVX && <mask_avx512vl_condition>"
   "vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
-  [(set_attr "type" "sselog1")
+  [(set_attr "type" "ssemov")
    (set_attr "prefix" "<mask_prefix>")
    (set_attr "mode" "V4DF")])
 
@@ -13649,7 +13685,7 @@
 	   (match_operand:SI 4 "const_0_to_255_operand")]
 	  UNSPEC_VTERNLOG))]
   "(<MODE_SIZE> == 64 || TARGET_AVX512VL
-    || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256))
+    || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
 /* Disallow embeded broadcast for vector HFmode since
    it's not real AVX512FP16 instruction.  */
   && (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) >= 4
@@ -13731,7 +13767,7 @@
   [(set (match_operand:V 0 "register_operand")
 	(match_operand:V 1 "ternlog_operand"))]
   "(<MODE_SIZE> == 64 || TARGET_AVX512VL
-    || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256))
+    || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
    && ix86_pre_reload_split ()"
   "#"
   "&& 1"
@@ -13761,7 +13797,7 @@
 	    (match_operand:V 3 "regmem_or_bitnot_regmem_operand")
 	    (match_operand:V 4 "regmem_or_bitnot_regmem_operand"))))]
   "(<MODE_SIZE> == 64 || TARGET_AVX512VL
-    || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256))
+    || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
    && ix86_pre_reload_split ()
    && (rtx_equal_p (STRIP_UNARY (operands[1]),
 		    STRIP_UNARY (operands[4]))
@@ -13846,7 +13882,7 @@
 	    (match_operand:V 3 "regmem_or_bitnot_regmem_operand"))
 	  (match_operand:V 4 "regmem_or_bitnot_regmem_operand")))]
   "(<MODE_SIZE> == 64 || TARGET_AVX512VL
-    || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256))
+    || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
    && ix86_pre_reload_split ()
    && (rtx_equal_p (STRIP_UNARY (operands[1]),
 		    STRIP_UNARY (operands[4]))
@@ -13930,7 +13966,7 @@
 	    (match_operand:V 2 "regmem_or_bitnot_regmem_operand"))
 	  (match_operand:V 3 "regmem_or_bitnot_regmem_operand")))]
   "(<MODE_SIZE> == 64 || TARGET_AVX512VL
-    || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256))
+    || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
    && ix86_pre_reload_split ()"
   "#"
   "&& 1"
@@ -14080,7 +14116,7 @@
    (match_operand:SI 3 "const_0_to_255_operand")
    (match_operand:V16SF 4 "register_operand")
    (match_operand:HI 5 "register_operand")]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
 {
   int mask = INTVAL (operands[3]);
   emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
@@ -14267,7 +14303,7 @@
 		     (match_operand 16  "const_12_to_15_operand")
 		     (match_operand 17  "const_28_to_31_operand")
 		     (match_operand 18  "const_28_to_31_operand")])))]
-  "TARGET_AVX512F && TARGET_EVEX512
+  "TARGET_AVX512F
    && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
        && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
        && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
@@ -14302,7 +14338,7 @@
    (match_operand:SI 3 "const_0_to_255_operand")
    (match_operand:V8DF 4 "register_operand")
    (match_operand:QI 5 "register_operand")]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
 {
   int mask = INTVAL (operands[3]);
   emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
@@ -14332,7 +14368,7 @@
 		     (match_operand 8 "const_12_to_13_operand")
 		     (match_operand 9 "const_6_to_7_operand")
 		     (match_operand 10 "const_14_to_15_operand")])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
 {
   int mask;
   mask = INTVAL (operands[3]);
@@ -14464,7 +14500,7 @@
 		     (const_int 3) (const_int 11)
 		     (const_int 5) (const_int 13)
 		     (const_int 7) (const_int 15)])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
@@ -14514,7 +14550,7 @@
 		     (const_int 2) (const_int 10)
 		     (const_int 4) (const_int 12)
 		     (const_int 6) (const_int 14)])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
@@ -14880,7 +14916,7 @@
    (set_attr "mode" "V2DF,DF,V8DF")
    (set (attr "enabled")
 	(cond [(eq_attr "alternative" "2")
-		 (symbol_ref "TARGET_AVX512F && TARGET_EVEX512
+		 (symbol_ref "TARGET_AVX512F
 			      && !TARGET_AVX512VL && !TARGET_PREFER_AVX256")
 	       (match_test "<mask_avx512vl_condition>")
 	         (const_string "*")
@@ -14965,13 +15001,13 @@
   [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand")
 	(truncate:PMOV_DST_MODE_1
 	  (match_operand:<pmov_src_mode> 1 "register_operand")))]
-  "TARGET_AVX512F && TARGET_EVEX512")
+  "TARGET_AVX512F")
 
 (define_insn "*avx512f_<code><pmov_src_lower><mode>2"
   [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
 	(any_truncate:PMOV_DST_MODE_1
 	  (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "memory" "none,store")
@@ -14993,7 +15029,7 @@
 		     (const_int 10) (const_int 11)
 		     (const_int 12) (const_int 13)
 		     (const_int 14) (const_int 15)])))]
-  "TARGET_AVX512BW && TARGET_EVEX512 && ix86_pre_reload_split ()"
+  "TARGET_AVX512BW && ix86_pre_reload_split ()"
   "#"
   "&& 1"
   [(set (match_dup 0)
@@ -15018,7 +15054,7 @@
 		     (const_int 10) (const_int 11)
 		     (const_int 12) (const_int 13)
 		     (const_int 14) (const_int 15)])))]
-  "TARGET_AVX512BW && TARGET_EVEX512 && ix86_pre_reload_split ()"
+  "TARGET_AVX512BW && ix86_pre_reload_split ()"
   "#"
   "&& 1"
   [(set (match_dup 0)
@@ -15102,7 +15138,7 @@
 		     (const_int 2) (const_int 3)
 		     (const_int 4) (const_int 5)
 		     (const_int 6) (const_int 7)])))]
-  "TARGET_AVX512F && TARGET_EVEX512 && ix86_pre_reload_split ()"
+  "TARGET_AVX512F && ix86_pre_reload_split ()"
   "#"
   "&& 1"
   [(set (match_dup 0)
@@ -15118,7 +15154,7 @@
         (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
       (match_operand:PMOV_DST_MODE_1 2 "nonimm_or_0_operand" "0C,0")
       (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "memory" "none,store")
@@ -15132,19 +15168,19 @@
         (match_operand:<pmov_src_mode> 1 "register_operand"))
       (match_dup 0)
       (match_operand:<avx512fmaskmode> 2 "register_operand")))]
-  "TARGET_AVX512F && TARGET_EVEX512")
+  "TARGET_AVX512F")
 
 (define_expand "truncv32hiv32qi2"
   [(set (match_operand:V32QI 0 "nonimmediate_operand")
 	(truncate:V32QI
 	  (match_operand:V32HI 1 "register_operand")))]
-  "TARGET_AVX512BW && TARGET_EVEX512")
+  "TARGET_AVX512BW")
 
 (define_insn "avx512bw_<code>v32hiv32qi2"
   [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
 	(any_truncate:V32QI
 	    (match_operand:V32HI 1 "register_operand" "v,v")))]
-  "TARGET_AVX512BW && TARGET_EVEX512"
+  "TARGET_AVX512BW"
   "vpmov<trunsuffix>wb\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "memory" "none,store")
@@ -15174,7 +15210,7 @@
 		     (const_int 26) (const_int 27)
 		     (const_int 28) (const_int 29)
 		     (const_int 30) (const_int 31)])))]
-  "TARGET_AVX512VBMI && TARGET_EVEX512 && ix86_pre_reload_split ()"
+  "TARGET_AVX512VBMI && ix86_pre_reload_split ()"
   "#"
   "&& 1"
   [(set (match_dup 0)
@@ -15190,7 +15226,7 @@
         (match_operand:V32HI 1 "register_operand" "v,v"))
       (match_operand:V32QI 2 "nonimm_or_0_operand" "0C,0")
       (match_operand:SI 3 "register_operand" "Yk,Yk")))]
-  "TARGET_AVX512BW && TARGET_EVEX512"
+  "TARGET_AVX512BW"
   "vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "memory" "none,store")
@@ -15204,7 +15240,7 @@
         (match_operand:V32HI 1 "register_operand"))
       (match_dup 0)
       (match_operand:SI 2 "register_operand")))]
-  "TARGET_AVX512BW && TARGET_EVEX512")
+  "TARGET_AVX512BW")
 
 (define_mode_iterator PMOV_DST_MODE_2
   [V4SI V8HI (V16QI "TARGET_AVX512BW")])
@@ -16062,7 +16098,7 @@
   [(set (match_operand:V8QI 0 "register_operand")
 	(truncate:V8QI
 	    (match_operand:V8DI 1 "register_operand")))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
 {
   rtx op0 = gen_reg_rtx (V16QImode);
 
@@ -16082,7 +16118,7 @@
 			      (const_int 0) (const_int 0)
 			      (const_int 0) (const_int 0)
 			      (const_int 0) (const_int 0)])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
@@ -16092,7 +16128,7 @@
   [(set (match_operand:V8QI 0 "memory_operand" "=m")
 	(any_truncate:V8QI
 	  (match_operand:V8DI 1 "register_operand" "v")))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "memory" "store")
@@ -16104,7 +16140,7 @@
 	(subreg:DI
 	  (any_truncate:V8QI
 	    (match_operand:V8DI 1 "register_operand")) 0))]
-  "TARGET_AVX512F && TARGET_EVEX512 && ix86_pre_reload_split ()"
+  "TARGET_AVX512F && ix86_pre_reload_split ()"
   "#"
   "&& 1"
   [(set (match_dup 0)
@@ -16128,7 +16164,7 @@
                           (const_int 0) (const_int 0)
                           (const_int 0) (const_int 0)
                           (const_int 0) (const_int 0)])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
@@ -16149,7 +16185,7 @@
 			  (const_int 0) (const_int 0)
 			  (const_int 0) (const_int 0)
 			  (const_int 0) (const_int 0)])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
@@ -16162,7 +16198,7 @@
 	    (match_operand:V8DI 1 "register_operand" "v"))
 	(match_dup 0)
 	(match_operand:QI 2 "register_operand" "Yk")))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "memory" "store")
@@ -16174,7 +16210,7 @@
    (any_truncate:V8QI
     (match_operand:V8DI 1 "register_operand"))
    (match_operand:QI 2 "register_operand")]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
 {
   operands[0] = adjust_address_nv (operands[0], V8QImode, 0);
   emit_insn (gen_avx512f_<code>v8div16qi2_mask_store_1 (operands[0],
@@ -16431,7 +16467,7 @@
                          (const_int 4) (const_int 6)
                          (const_int 8) (const_int 10)
                          (const_int 12) (const_int 14)])))))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
 
 (define_insn "*vec_widen_umult_even_v16si<mask_name>"
@@ -16451,7 +16487,7 @@
                          (const_int 4) (const_int 6)
                          (const_int 8) (const_int 10)
                          (const_int 12) (const_int 14)])))))]
-  "TARGET_AVX512F && TARGET_EVEX512
+  "TARGET_AVX512F
    && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sseimul")
@@ -16547,7 +16583,7 @@
                          (const_int 4) (const_int 6)
                          (const_int 8) (const_int 10)
                          (const_int 12) (const_int 14)])))))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
 
 (define_insn "*vec_widen_smult_even_v16si<mask_name>"
@@ -16567,7 +16603,7 @@
                          (const_int 4) (const_int 6)
                          (const_int 8) (const_int 10)
                          (const_int 12) (const_int 14)])))))]
-  "TARGET_AVX512F && TARGET_EVEX512
+  "TARGET_AVX512F
    && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sseimul")
@@ -16969,7 +17005,7 @@
   "TARGET_SSE2"
 {
   /* Try with vnni instructions.  */
-  if ((<MODE_SIZE> == 64 && TARGET_AVX512VNNI && TARGET_EVEX512)
+  if ((<MODE_SIZE> == 64 && TARGET_AVX512VNNI)
       || (<MODE_SIZE> < 64
 	  && ((TARGET_AVX512VNNI && TARGET_AVX512VL) || TARGET_AVXVNNI)))
     {
@@ -17062,7 +17098,7 @@
    (match_operand:V64QI 1 "register_operand")
    (match_operand:V64QI 2 "nonimmediate_operand")
    (match_operand:V16SI 3 "nonimmediate_operand")]
-  "TARGET_AVX512BW && TARGET_EVEX512"
+  "TARGET_AVX512BW"
 {
   rtx t1 = gen_reg_rtx (V8DImode);
   rtx t2 = gen_reg_rtx (V16SImode);
@@ -18300,13 +18336,10 @@
    (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
    (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
    (V16HF "TARGET_AVX512FP16")
-   (V16SF "TARGET_AVX512F && TARGET_EVEX512")
-   (V8DF "TARGET_AVX512F && TARGET_EVEX512")
-   (V16SI "TARGET_AVX512F && TARGET_EVEX512")
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512")
-   (V32HI "TARGET_AVX512BW && TARGET_EVEX512")
-   (V64QI "TARGET_AVX512VBMI && TARGET_EVEX512")
-   (V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")])
+   (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
+   (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
+   (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")
+   (V32HF "TARGET_AVX512FP16")])
 
 (define_expand "vec_perm<mode>"
   [(match_operand:VEC_PERM_AVX2 0 "register_operand")
@@ -18333,7 +18366,7 @@
 {
   operands[2] = CONSTM1_RTX (<MODE>mode);
 
-  if (!TARGET_AVX512F || (!TARGET_AVX512VL && !TARGET_EVEX512))
+  if (!TARGET_AVX512F)
     operands[2] = force_reg (<MODE>mode, operands[2]);
 })
 
@@ -18342,7 +18375,6 @@
 	(xor:VI (match_operand:VI 1 "bcst_vector_operand"     " 0, m,Br")
 		(match_operand:VI 2 "vector_all_ones_operand" "BC,BC,BC")))]
   "TARGET_AVX512F
-   && (<MODE_SIZE> == 64 || TARGET_AVX512VL || TARGET_EVEX512)
    && (!<mask_applied>
        || <ssescalarmode>mode == SImode
        || <ssescalarmode>mode == DImode)"
@@ -18409,7 +18441,7 @@
 		(match_operand:VI 2 "vector_all_ones_operand" "BC,BC,BC")))
    (unspec [(match_operand:VI 3 "register_operand" "0,0,0")]
      UNSPEC_INSN_FALSE_DEP)]
-  "TARGET_AVX512F && (<MODE_SIZE> == 64 || TARGET_AVX512VL || TARGET_EVEX512)"
+  "TARGET_AVX512F"
 {
   if (TARGET_AVX512VL)
     return "vpternlog<ternlogsuffix>\t{$0x55, %1, %0, %0<mask_operand3>|%0<mask_operand3>, %0, %1, 0x55}";
@@ -18433,7 +18465,7 @@
 	  (not:<ssescalarmode>
 	    (match_operand:<ssescalarmode> 1 "nonimmediate_operand"))))]
   "<MODE_SIZE> == 64 || TARGET_AVX512VL
-   || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)"
+   || (TARGET_AVX512F && !TARGET_PREFER_AVX256)"
   [(set (match_dup 0)
 	(xor:VI48_AVX512F
 	  (vec_duplicate:VI48_AVX512F (match_dup 1))
@@ -18587,8 +18619,7 @@
 		 (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
 	       (eq_attr "alternative" "4")
 		 (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL
-			      || (TARGET_AVX512F && TARGET_EVEX512
-				  && !TARGET_PREFER_AVX256)")
+			      || (TARGET_AVX512F && !TARGET_PREFER_AVX256)")
 	      ]
 	      (const_string "*")))])
 
@@ -18632,7 +18663,7 @@
 	      (match_operand:<ssescalarmode> 1 "nonimmediate_operand")))
 	  (match_operand:VI 2 "vector_operand")))]
   "<MODE_SIZE> == 64 || TARGET_AVX512VL
-   || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)"
+   || (TARGET_AVX512F && !TARGET_PREFER_AVX256)"
   [(set (match_dup 3)
 	(vec_duplicate:VI (match_dup 1)))
    (set (match_dup 0)
@@ -18647,7 +18678,7 @@
 	      (match_operand:<ssescalarmode> 1 "nonimmediate_operand")))
 	  (match_operand:VI 2 "vector_operand")))]
   "<MODE_SIZE> == 64 || TARGET_AVX512VL
-   || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)"
+   || (TARGET_AVX512F && !TARGET_PREFER_AVX256)"
   [(set (match_dup 3)
 	(vec_duplicate:VI (match_dup 1)))
    (set (match_dup 0)
@@ -18941,7 +18972,7 @@
 	    (match_operand:VI 1 "bcst_vector_operand" "0,m,  0,vBr"))
 	  (match_operand:VI 2 "bcst_vector_operand"   "m,0,vBr,  0")))]
   "(<MODE_SIZE> == 64 || TARGET_AVX512VL
-    || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256))
+    || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
    && (register_operand (operands[1], <MODE>mode)
        || register_operand (operands[2], <MODE>mode))"
 {
@@ -18974,7 +19005,7 @@
 	    (match_operand:VI 1 "bcst_vector_operand" "%0, 0")
 	    (match_operand:VI 2 "bcst_vector_operand" " m,vBr"))))]
   "(<MODE_SIZE> == 64 || TARGET_AVX512VL
-    || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256))
+    || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
    && (register_operand (operands[1], <MODE>mode)
        || register_operand (operands[2], <MODE>mode))"
 {
@@ -19005,7 +19036,7 @@
 	  (not:VI (match_operand:VI 1 "bcst_vector_operand" "%0, 0"))
 	  (not:VI (match_operand:VI 2 "bcst_vector_operand" "m,vBr"))))]
   "(<MODE_SIZE> == 64 || TARGET_AVX512VL
-    || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256))
+    || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
    && (register_operand (operands[1], <MODE>mode)
        || register_operand (operands[2], <MODE>mode))"
 {
@@ -19027,7 +19058,7 @@
 		      (const_string "*")))])
 
 (define_mode_iterator AVX512ZEXTMASK
-  [(DI "TARGET_AVX512BW && TARGET_EVEX512") (SI "TARGET_AVX512BW") HI])
+  [(DI "TARGET_AVX512BW") (SI "TARGET_AVX512BW") HI])
 
 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
   [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
@@ -19276,7 +19307,7 @@
 		     (const_int 60) (const_int 61)
 		     (const_int 62) (const_int 63)])))]
 
-  "TARGET_AVX512BW && TARGET_EVEX512"
+  "TARGET_AVX512BW"
   "vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "<mask_prefix>")
@@ -19345,7 +19376,7 @@
 		     (const_int 14)  (const_int 15)
 		     (const_int 28)  (const_int 29)
 		     (const_int 30)  (const_int 31)])))]
-  "TARGET_AVX512BW && TARGET_EVEX512"
+  "TARGET_AVX512BW"
   "vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "<mask_prefix>")
@@ -19407,7 +19438,7 @@
 		     (const_int 61) (const_int 125)
 		     (const_int 62) (const_int 126)
 		     (const_int 63) (const_int 127)])))]
-  "TARGET_AVX512BW && TARGET_EVEX512"
+  "TARGET_AVX512BW"
   "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
@@ -19503,7 +19534,7 @@
 		     (const_int 53) (const_int 117)
 		     (const_int 54) (const_int 118)
 		     (const_int 55) (const_int 119)])))]
-  "TARGET_AVX512BW && TARGET_EVEX512"
+  "TARGET_AVX512BW"
   "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
@@ -19727,7 +19758,7 @@
 		     (const_int 11) (const_int 27)
 		     (const_int 14) (const_int 30)
 		     (const_int 15) (const_int 31)])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
@@ -19782,7 +19813,7 @@
 		     (const_int 9) (const_int 25)
 		     (const_int 12) (const_int 28)
 		     (const_int 13) (const_int 29)])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
@@ -20488,7 +20519,7 @@
    (match_operand:SI 2 "const_0_to_255_operand")
    (match_operand:V16SI 3 "register_operand")
    (match_operand:HI 4 "register_operand")]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
 {
   int mask = INTVAL (operands[2]);
   emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
@@ -20532,7 +20563,7 @@
 		     (match_operand 15 "const_12_to_15_operand")
 		     (match_operand 16 "const_12_to_15_operand")
 		     (match_operand 17 "const_12_to_15_operand")])))]
-  "TARGET_AVX512F && TARGET_EVEX512
+  "TARGET_AVX512F
    && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
    && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
    && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
@@ -20698,7 +20729,7 @@
 	  [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
 	   (match_operand:SI 2 "const_0_to_255_operand")]
 	  UNSPEC_PSHUFLW))]
-  "TARGET_AVX512BW && TARGET_EVEX512"
+  "TARGET_AVX512BW"
   "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
@@ -20874,7 +20905,7 @@
 	  [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
 	   (match_operand:SI 2 "const_0_to_255_operand")]
 	  UNSPEC_PSHUFHW))]
-  "TARGET_AVX512BW && TARGET_EVEX512"
+  "TARGET_AVX512BW"
   "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
@@ -21408,7 +21439,7 @@
 	  (match_operand:V4TI 1 "register_operand" "v")
 	  (parallel
 	    [(match_operand:SI 2 "const_0_to_3_operand")])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vextracti32x4\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "length_immediate" "1")
@@ -21416,7 +21447,7 @@
    (set_attr "mode" "XI")])
 
 (define_mode_iterator VEXTRACTI128_MODE
-  [(V4TI "TARGET_AVX512F && TARGET_EVEX512") V2TI])
+  [(V4TI "TARGET_AVX512F") V2TI])
 
 (define_split
   [(set (match_operand:TI 0 "nonimmediate_operand")
@@ -21439,7 +21470,7 @@
    && VECTOR_MODE_P (GET_MODE (operands[1]))
    && ((TARGET_SSE && GET_MODE_SIZE (GET_MODE (operands[1])) == 16)
        || (TARGET_AVX && GET_MODE_SIZE (GET_MODE (operands[1])) == 32)
-       || (TARGET_AVX512F && TARGET_EVEX512
+       || (TARGET_AVX512F
 	   && GET_MODE_SIZE (GET_MODE (operands[1])) == 64))
    && (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))"
   [(set (match_dup 0) (vec_select:SWI48x (match_dup 1)
@@ -21701,6 +21732,19 @@
 	   (const_string "orig")))
    (set_attr "mode" "TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
 
+;; Eliminate redundancy caused by
+;; /* Special case TImode to 128-bit vector conversions via V2DI.  */
+;; in ix86_expand_vector_move
+
+(define_split
+  [(set (match_operand:V2DI 0 "register_operand")
+	(vec_concat:V2DI
+	  (subreg:DI (match_operand:TI 1 "register_operand") 0)
+	  (subreg:DI (match_dup 1) 8)))]
+  "TARGET_SSE2 && ix86_pre_reload_split ()"
+  [(set (match_dup 0)
+	(subreg:V2DI (match_dup 1) 0))])
+
 (define_insn "*vec_concatv2di_0"
   [(set (match_operand:V2DI 0 "register_operand"     "=v,v ,x")
 	(vec_concat:V2DI
@@ -22814,7 +22858,7 @@
 				   (const_int 1) (const_int 1)
 				   (const_int 1) (const_int 1)]))
 	    (const_int 1))))]
-  "TARGET_AVX512BW && TARGET_EVEX512"
+  "TARGET_AVX512BW"
   "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sseimul")
    (set_attr "prefix" "evex")
@@ -23328,10 +23372,10 @@
 ;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
 ;; modes for abs instruction on pre AVX-512 targets.
 (define_mode_iterator VI1248_AVX512VL_AVX512BW
-  [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI
-   (V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI
-   (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX2") V4SI
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX512VL")
+  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
+   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
+   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
+   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL")
    (V2DI "TARGET_AVX512VL")])
 
 (define_insn "*abs<mode>2"
@@ -24159,7 +24203,7 @@
   [(set (match_operand:V32HI 0 "register_operand" "=v")
 	(any_extend:V32HI
 	  (match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
-  "TARGET_AVX512BW && TARGET_EVEX512"
+  "TARGET_AVX512BW"
   "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
@@ -24173,7 +24217,7 @@
 	    (match_operand:V64QI 2 "const0_operand"))
 	  (match_parallel 3 "pmovzx_parallel"
 	    [(match_operand 4 "const_int_operand")])))]
-  "TARGET_AVX512BW && TARGET_EVEX512"
+  "TARGET_AVX512BW"
   "#"
   "&& reload_completed"
   [(set (match_dup 0) (zero_extend:V32HI (match_dup 1)))]
@@ -24193,7 +24237,7 @@
 	    (match_operand:V64QI 3 "const0_operand"))
 	  (match_parallel 4 "pmovzx_parallel"
 	    [(match_operand 5 "const_int_operand")])))]
-  "TARGET_AVX512BW && TARGET_EVEX512"
+  "TARGET_AVX512BW"
   "#"
   "&& reload_completed"
   [(set (match_dup 0) (zero_extend:V32HI (match_dup 1)))]
@@ -24206,7 +24250,7 @@
   [(set (match_operand:V32HI 0 "register_operand")
 	(any_extend:V32HI
 	  (match_operand:V32QI 1 "nonimmediate_operand")))]
-  "TARGET_AVX512BW && TARGET_EVEX512")
+  "TARGET_AVX512BW")
 
 (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
   [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,Yw")
@@ -24354,7 +24398,7 @@
   [(set (match_operand:V16SI 0 "register_operand" "=v")
 	(any_extend:V16SI
 	  (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
@@ -24364,7 +24408,7 @@
   [(set (match_operand:V16SI 0 "register_operand")
 	(any_extend:V16SI
 	  (match_operand:V16QI 1 "nonimmediate_operand")))]
-  "TARGET_AVX512F && TARGET_EVEX512")
+  "TARGET_AVX512F")
 
 (define_insn "avx2_<code>v8qiv8si2<mask_name>"
   [(set (match_operand:V8SI 0 "register_operand" "=v")
@@ -24497,7 +24541,7 @@
   [(set (match_operand:V16SI 0 "register_operand" "=v")
 	(any_extend:V16SI
 	  (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
@@ -24507,7 +24551,7 @@
   [(set (match_operand:V16SI 0 "register_operand")
 	(any_extend:V16SI
 	  (match_operand:V16HI 1 "nonimmediate_operand")))]
-  "TARGET_AVX512F && TARGET_EVEX512")
+  "TARGET_AVX512F")
 
 (define_insn_and_split "avx512f_zero_extendv16hiv16si2_1"
   [(set (match_operand:V32HI 0 "register_operand" "=v")
@@ -24517,7 +24561,7 @@
 	    (match_operand:V32HI 2 "const0_operand"))
 	  (match_parallel 3 "pmovzx_parallel"
 	    [(match_operand 4 "const_int_operand")])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "#"
   "&& reload_completed"
   [(set (match_dup 0) (zero_extend:V16SI (match_dup 1)))]
@@ -24741,7 +24785,7 @@
 		       (const_int 2) (const_int 3)
 		       (const_int 4) (const_int 5)
 		       (const_int 6) (const_int 7)]))))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
@@ -24751,7 +24795,7 @@
   [(set (match_operand:V8DI 0 "register_operand" "=v")
 	(any_extend:V8DI
 	  (match_operand:V8QI 1 "memory_operand" "m")))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
@@ -24769,7 +24813,7 @@
 		       (const_int 2) (const_int 3)
 		       (const_int 4) (const_int 5)
 		       (const_int 6) (const_int 7)]))))]
-  "TARGET_AVX512F && TARGET_EVEX512 && ix86_pre_reload_split ()"
+  "TARGET_AVX512F && ix86_pre_reload_split ()"
   "#"
   "&& 1"
   [(set (match_dup 0)
@@ -24780,7 +24824,7 @@
   [(set (match_operand:V8DI 0 "register_operand")
 	(any_extend:V8DI
 	  (match_operand:V8QI 1 "nonimmediate_operand")))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
 {
   if (!MEM_P (operands[1]))
     {
@@ -24922,7 +24966,7 @@
   [(set (match_operand:V8DI 0 "register_operand" "=v")
 	(any_extend:V8DI
 	  (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
@@ -24932,7 +24976,7 @@
   [(set (match_operand:V8DI 0 "register_operand")
 	(any_extend:V8DI
 	  (match_operand:V8HI 1 "nonimmediate_operand")))]
-  "TARGET_AVX512F && TARGET_EVEX512")
+  "TARGET_AVX512F")
 
 (define_insn "avx2_<code>v4hiv4di2<mask_name>"
   [(set (match_operand:V4DI 0 "register_operand" "=v")
@@ -25059,7 +25103,7 @@
   [(set (match_operand:V8DI 0 "register_operand" "=v")
 	(any_extend:V8DI
 	  (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
@@ -25073,7 +25117,7 @@
 	    (match_operand:V16SI 2 "const0_operand"))
 	  (match_parallel 3 "pmovzx_parallel"
 	    [(match_operand 4 "const_int_operand")])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "#"
   "&& reload_completed"
   [(set (match_dup 0) (zero_extend:V8DI (match_dup 1)))]
@@ -25092,7 +25136,7 @@
 	    (match_operand:V16SI 3 "const0_operand"))
 	  (match_parallel 4 "pmovzx_parallel"
 	    [(match_operand 5 "const_int_operand")])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "#"
   "&& reload_completed"
   [(set (match_dup 0) (zero_extend:V8DI (match_dup 1)))]
@@ -25104,7 +25148,7 @@
   [(set (match_operand:V8DI 0 "register_operand" "=v")
 	(any_extend:V8DI
 	  (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
-  "TARGET_AVX512F && TARGET_EVEX512")
+  "TARGET_AVX512F")
 
 (define_insn "avx2_<code>v4siv4di2<mask_name>"
   [(set (match_operand:V4DI 0 "register_operand" "=v")
@@ -25505,7 +25549,7 @@
   [(match_operand:V16SI 0 "register_operand")
    (match_operand:V16SF 1 "nonimmediate_operand")
    (match_operand:SI 2 "const_0_to_15_operand")]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
 {
   rtx tmp = gen_reg_rtx (V16SFmode);
   emit_insn (gen_avx512f_rndscalev16sf (tmp, operands[1], operands[2]));
@@ -26518,9 +26562,9 @@
 
 ;; XOP packed rotate instructions
 (define_expand "rotl<mode>3"
-  [(set (match_operand:VI_128 0 "register_operand")
-	(rotate:VI_128
-	 (match_operand:VI_128 1 "nonimmediate_operand")
+  [(set (match_operand:VI248_128 0 "register_operand")
+	(rotate:VI248_128
+	 (match_operand:VI248_128 1 "nonimmediate_operand")
 	 (match_operand:SI 2 "general_operand")))]
   "TARGET_XOP"
 {
@@ -26549,9 +26593,9 @@
 })
 
 (define_expand "rotr<mode>3"
-  [(set (match_operand:VI_128 0 "register_operand")
-	(rotatert:VI_128
-	 (match_operand:VI_128 1 "nonimmediate_operand")
+  [(set (match_operand:VI248_128 0 "register_operand")
+	(rotatert:VI248_128
+	 (match_operand:VI248_128 1 "nonimmediate_operand")
 	 (match_operand:SI 2 "general_operand")))]
   "TARGET_XOP"
 {
@@ -26723,7 +26767,7 @@
 	(ashiftrt:V8DI
 	  (match_operand:V8DI 1 "register_operand")
 	  (match_operand:V8DI 2 "nonimmediate_operand")))]
-  "TARGET_AVX512F && TARGET_EVEX512")
+  "TARGET_AVX512F")
 
 (define_expand "vashrv4di3"
   [(set (match_operand:V4DI 0 "register_operand")
@@ -26814,7 +26858,7 @@
   [(set (match_operand:V16SI 0 "register_operand")
 	(ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
 		        (match_operand:V16SI 2 "nonimmediate_operand")))]
-  "TARGET_AVX512F && TARGET_EVEX512")
+  "TARGET_AVX512F")
 
 (define_expand "vashrv8si3"
   [(set (match_operand:V8SI 0 "register_operand")
@@ -26923,31 +26967,122 @@
       int i;
 
       if (<CODE> != ASHIFT)
-	{
-	  if (CONST_INT_P (operands[2]))
-	    operands[2] = GEN_INT (-INTVAL (operands[2]));
-	  else
-	    negate = true;
-	}
+       {
+	     if (CONST_INT_P (operands[2]))
+	       operands[2] = GEN_INT (-INTVAL (operands[2]));
+	     else
+	       negate = true;
+	   }
       par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
       tmp = lowpart_subreg (QImode, operands[2], SImode);
       for (i = 0; i < 16; i++)
-	XVECEXP (par, 0, i) = tmp;
+        XVECEXP (par, 0, i) = tmp;
 
       tmp = gen_reg_rtx (V16QImode);
       emit_insn (gen_vec_initv16qiqi (tmp, par));
 
       if (negate)
-	emit_insn (gen_negv16qi2 (tmp, tmp));
+        emit_insn (gen_negv16qi2 (tmp, tmp));
 
       gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
       emit_insn (gen (operands[0], operands[1], tmp));
     }
+  else if (TARGET_GFNI && CONST_INT_P (operands[2])
+           && (<MODE_SIZE> == 64
+               || !(INTVAL (operands[2]) == 7 && <CODE> == ASHIFTRT)))
+    {
+      rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], operands[2],
+						   <CODE>);
+      emit_insn (gen_vgf2p8affineqb_<mode> (operands[0], operands[1], matrix,
+					    const0_rtx));
+    }
   else
     ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
   DONE;
 })
 
+(define_expand "cond_<insn><mode>"
+  [(set (match_operand:VI1_AVX512VL 0 "register_operand")
+	(vec_merge:VI1_AVX512VL
+	  (any_shift:VI1_AVX512VL
+	    (match_operand:VI1_AVX512VL 2 "register_operand")
+	    (match_operand:VI1_AVX512VL 3 "const_vec_dup_operand"))
+	  (match_operand:VI1_AVX512VL 4 "nonimm_or_0_operand")
+	(match_operand:<avx512fmaskmode> 1 "register_operand")))]
+  "TARGET_GFNI && TARGET_AVX512F"
+{
+  rtx count = XVECEXP (operands[3], 0, 0);
+  rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], count, <CODE>);
+  emit_insn (gen_vgf2p8affineqb_<mode>_mask (operands[0], operands[2], matrix,
+					     const0_rtx, operands[4],
+					     operands[1]));
+  DONE;
+})
+
+(define_expand "<insn><mode>3"
+  [(set (match_operand:VI1_AVX512_3264 0 "register_operand")
+	(any_rotate:VI1_AVX512_3264
+	  (match_operand:VI1_AVX512_3264 1 "register_operand")
+	  (match_operand:SI 2 "const_int_operand")))]
+  "TARGET_GFNI"
+{
+  rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], operands[2], <CODE>);
+  emit_insn (gen_vgf2p8affineqb_<mode> (operands[0], operands[1], matrix,
+             const0_rtx));
+  DONE;
+})
+
+(define_expand "<insn>v16qi3"
+  [(set (match_operand:V16QI 0 "register_operand")
+     (any_rotate:V16QI
+       (match_operand:V16QI 1 "nonimmediate_operand")
+       (match_operand:SI 2 "general_operand")))]
+  "TARGET_GFNI || TARGET_XOP"
+{
+  /* Handle the V16QI XOP case to avoid a conflict with the other expand.  */
+  if (TARGET_XOP)
+    {
+      if (! const_0_to_7_operand (operands[2], SImode))
+        {
+          rtvec vs = rtvec_alloc (16);
+          rtx par = gen_rtx_PARALLEL (V16QImode, vs);
+          rtx reg = gen_reg_rtx (V16QImode);
+          rtx op2 = operands[2];
+          int i;
+
+          if (GET_MODE (op2) != QImode)
+            {
+              op2 = gen_reg_rtx (QImode);
+              convert_move (op2, operands[2], false);
+            }
+
+          for (i = 0; i < 16; i++)
+            RTVEC_ELT (vs, i) = op2;
+
+          emit_insn (gen_vec_initv16qiqi (reg, par));
+          if (<CODE> == ROTATERT)
+            {
+              rtx neg = gen_reg_rtx (V16QImode);
+              emit_insn (gen_negv16qi2 (neg, reg));
+              emit_insn (gen_xop_vrotlv16qi3 (operands[0], operands[1], neg));
+              reg = neg;
+            }
+          emit_insn (gen_xop_vrotlv16qi3 (operands[0], operands[1], reg));
+          DONE;
+       }
+    }
+  else if (TARGET_GFNI && CONST_INT_P (operands[2]))
+    {
+      rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], operands[2], <CODE>);
+      emit_insn (gen_vgf2p8affineqb_v16qi (operands[0],
+					   force_reg (V16QImode, operands[1]),
+					   matrix, const0_rtx));
+      DONE;
+    }
+  else
+    FAIL;
+})
+
 (define_expand "ashrv2di3"
   [(set (match_operand:V2DI 0 "register_operand")
 	(ashiftrt:V2DI
@@ -27257,12 +27392,12 @@
    (set_attr "mode" "OI")])
 
 (define_mode_attr pbroadcast_evex_isa
-  [(V64QI "avx512bw_512") (V32QI "avx512bw") (V16QI "avx512bw")
-   (V32HI "avx512bw_512") (V16HI "avx512bw") (V8HI "avx512bw")
-   (V16SI "avx512f_512") (V8SI "avx512f") (V4SI "avx512f")
-   (V8DI "avx512f_512") (V4DI "avx512f") (V2DI "avx512f")
-   (V32HF "avx512bw_512") (V16HF "avx512bw") (V8HF "avx512bw")
-   (V32BF "avx512bw_512") (V16BF "avx512bw") (V8BF "avx512bw")])
+  [(V64QI "avx512bw") (V32QI "avx512bw") (V16QI "avx512bw")
+   (V32HI "avx512bw") (V16HI "avx512bw") (V8HI "avx512bw")
+   (V16SI "avx512f") (V8SI "avx512f") (V4SI "avx512f")
+   (V8DI "avx512f") (V4DI "avx512f") (V2DI "avx512f")
+   (V32HF "avx512bw") (V16HF "avx512bw") (V8HF "avx512bw")
+   (V32BF "avx512bw") (V16BF "avx512bw") (V8BF "avx512bw")])
 
 (define_insn "avx2_pbroadcast<mode>"
   [(set (match_operand:VIHFBF 0 "register_operand" "=x,v")
@@ -27806,7 +27941,7 @@
    (set (attr "enabled")
 	(if_then_else (eq_attr "alternative" "1")
 		      (symbol_ref "TARGET_AVX512F && !TARGET_AVX512VL
-				   && TARGET_EVEX512 && !TARGET_PREFER_AVX256")
+				   && !TARGET_PREFER_AVX256")
 		      (const_string "*")))])
 
 (define_insn "*vec_dupv4si"
@@ -27834,7 +27969,7 @@
    (set (attr "enabled")
 	(if_then_else (eq_attr "alternative" "1")
 		      (symbol_ref "TARGET_AVX512F && !TARGET_AVX512VL
-				   && TARGET_EVEX512 && !TARGET_PREFER_AVX256")
+				   && !TARGET_PREFER_AVX256")
 		      (const_string "*")))])
 
 (define_insn "*vec_dupv2di"
@@ -27849,7 +27984,7 @@
    %vmovddup\t{%1, %0|%0, %1}
    movlhps\t%0, %0"
   [(set_attr "isa" "sse2_noavx,avx,avx512f,sse3,noavx")
-   (set_attr "type" "sselog1,sselog1,ssemov,sselog1,ssemov")
+   (set_attr "type" "sselog1,sselog1,ssemov,ssemov,ssemov")
    (set_attr "prefix" "orig,maybe_evex,evex,maybe_vex,orig")
    (set (attr "mode")
 	(cond [(and (eq_attr "alternative" "2")
@@ -27865,8 +28000,7 @@
 	(if_then_else
 	  (eq_attr "alternative" "2")
 	  (symbol_ref "TARGET_AVX512VL
-		       || (TARGET_AVX512F && TARGET_EVEX512
-			   && !TARGET_PREFER_AVX256)")
+		       || (TARGET_AVX512F && !TARGET_PREFER_AVX256)")
 	  (const_string "*")))])
 
 (define_insn "avx2_vbroadcasti128_<mode>"
@@ -27946,7 +28080,7 @@
   [(set_attr "type" "ssemov")
    (set_attr "prefix_extra" "1")
    (set_attr "prefix" "maybe_evex")
-   (set_attr "isa" "avx2,noavx2,avx2,avx512f_512,noavx2")
+   (set_attr "isa" "avx2,noavx2,avx2,avx512f,noavx2")
    (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,<sseinsnmode>,V8SF")])
 
 (define_split
@@ -28010,8 +28144,8 @@
 
 ;; For broadcast[i|f]32x2.  Yes there is no v4sf version, only v4si.
 (define_mode_iterator VI4F_BRCST32x2
-  [(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
-   (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL")])
+  [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+   V16SF (V8SF "TARGET_AVX512VL")])
 
 (define_mode_attr 64x2mode
   [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
@@ -28061,8 +28195,7 @@
 
 ;; For broadcast[i|f]64x2
 (define_mode_iterator VI8F_BRCST64x2
-  [(V8DI "TARGET_EVEX512") (V8DF "TARGET_EVEX512")
-   (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
+  [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
 
 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
   [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v")
@@ -28118,27 +28251,26 @@
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_mode_iterator VPERMI2
-  [(V16SI "TARGET_EVEX512") (V16SF "TARGET_EVEX512")
-   (V8DI "TARGET_EVEX512") (V8DF "TARGET_EVEX512")
+  [V16SI V16SF V8DI V8DF
    (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
    (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
    (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
    (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
-   (V32HI "TARGET_AVX512BW && TARGET_EVEX512")
+   (V32HI "TARGET_AVX512BW")
    (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
    (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
-   (V64QI "TARGET_AVX512VBMI && TARGET_EVEX512")
+   (V64QI "TARGET_AVX512VBMI")
    (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
    (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
 
 (define_mode_iterator VPERMI2I
-  [(V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")
+  [V16SI V8DI
    (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
    (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
-   (V32HI "TARGET_AVX512BW && TARGET_EVEX512")
+   (V32HI "TARGET_AVX512BW")
    (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
    (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
-   (V64QI "TARGET_AVX512VBMI && TARGET_EVEX512")
+   (V64QI "TARGET_AVX512VBMI")
    (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
    (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
 
@@ -28813,29 +28945,28 @@
 
 ;; Modes handled by vec_init expanders.
 (define_mode_iterator VEC_INIT_MODE
-  [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
-   (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI
-   (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI
-   (V32HF "TARGET_AVX512F && TARGET_EVEX512") (V16HF "TARGET_AVX") V8HF
-   (V32BF "TARGET_AVX512F && TARGET_EVEX512") (V16BF "TARGET_AVX") V8BF
-   (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
-   (V8DF "TARGET_AVX512F && TARGET_EVEX512")
-   (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
-   (V4TI "TARGET_AVX512F && TARGET_EVEX512") (V2TI "TARGET_AVX")])
+  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
+   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
+   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
+   (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF
+   (V32BF "TARGET_AVX512F") (V16BF "TARGET_AVX") V8BF
+   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
+   (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
 
 ;; Likewise, but for initialization from half sized vectors.
 ;; Thus, these are all VEC_INIT_MODE modes except V2??.
 (define_mode_iterator VEC_INIT_HALF_MODE
-  [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
-   (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI
-   (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX")
-   (V32HF "TARGET_AVX512F && TARGET_EVEX512") (V16HF "TARGET_AVX") V8HF
-   (V32BF "TARGET_AVX512F && TARGET_EVEX512") (V16BF "TARGET_AVX") V8BF
-   (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
-   (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX")
-   (V4TI "TARGET_AVX512F && TARGET_EVEX512")])
+  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
+   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
+   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")
+   (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF
+   (V32BF "TARGET_AVX512F") (V16BF "TARGET_AVX") V8BF
+   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
+   (V4TI "TARGET_AVX512F")])
 
 (define_expand "vec_init<mode><ssescalarmodelower>"
   [(match_operand:VEC_INIT_MODE 0 "register_operand")
@@ -29096,7 +29227,7 @@
 	(unspec:V16SF
 	  [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
 	  UNSPEC_VCVTPH2PS))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
@@ -29186,7 +29317,7 @@
 	   UNSPEC_VCVTPS2PH)
 	 (match_operand:V16HI 3 "nonimm_or_0_operand")
 	 (match_operand:HI 4 "register_operand")))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
 {
   int round = INTVAL (operands[2]);
   /* Separate {sae} from rounding control imm,
@@ -29205,7 +29336,7 @@
 	  [(match_operand:V16SF 1 "register_operand" "v")
 	   (match_operand:SI 2 "const_0_to_255_operand")]
 	  UNSPEC_VCVTPS2PH))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vcvtps2ph\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
@@ -29217,7 +29348,7 @@
 	  [(match_operand:V16SF 1 "register_operand" "v")
 	   (match_operand:SI 2 "const_0_to_255_operand")]
 	  UNSPEC_VCVTPS2PH))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
@@ -30196,7 +30327,7 @@
 	   (match_operand:V8DI 2 "register_operand" "v")
 	   (match_operand:V8DI 3 "nonimmediate_operand" "vm")]
 	  VPMADD52))]
-  "TARGET_AVX512IFMA && TARGET_EVEX512"
+  "TARGET_AVX512IFMA"
   "vpmadd52<vpmadd52type>\t{%3, %2, %0|%0, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "prefix" "evex")
@@ -30567,7 +30698,7 @@
 	   (match_operand:V16SI 2 "register_operand" "v")
 	   (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
 	  UNSPEC_VPDPBUSD))]
-  "TARGET_AVX512VNNI && TARGET_EVEX512"
+  "TARGET_AVX512VNNI"
   "vpdpbusd\t{%3, %2, %0|%0, %2, %3}"
   [(set_attr ("prefix") ("evex"))])
 
@@ -30636,7 +30767,7 @@
 	   (match_operand:V16SI 2 "register_operand" "v")
 	   (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
 	  UNSPEC_VPDPBUSDS))]
-  "TARGET_AVX512VNNI && TARGET_EVEX512"
+  "TARGET_AVX512VNNI"
   "vpdpbusds\t{%3, %2, %0|%0, %2, %3}"
   [(set_attr ("prefix") ("evex"))])
 
@@ -30705,7 +30836,7 @@
 	   (match_operand:V16SI 2 "register_operand" "v")
 	   (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
 	  UNSPEC_VPDPWSSD))]
-  "TARGET_AVX512VNNI && TARGET_EVEX512"
+  "TARGET_AVX512VNNI"
   "vpdpwssd\t{%3, %2, %0|%0, %2, %3}"
   [(set_attr ("prefix") ("evex"))])
 
@@ -30774,7 +30905,7 @@
 	   (match_operand:V16SI 2 "register_operand" "v")
 	   (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
 	  UNSPEC_VPDPWSSDS))]
-  "TARGET_AVX512VNNI && TARGET_EVEX512"
+  "TARGET_AVX512VNNI"
   "vpdpwssds\t{%3, %2, %0|%0, %2, %3}"
   [(set_attr ("prefix") ("evex"))])
 
@@ -30930,8 +31061,7 @@
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_mode_iterator VI48_AVX512VP2VL
-  [(V8DI "TARGET_EVEX512")
-   (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
+  [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
    (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
 
 (define_mode_iterator MASK_DWI [P2QI P2HI])
@@ -30973,12 +31103,12 @@
 	(unspec:P2HI [(match_operand:V16SI 1 "register_operand" "v")
 		      (match_operand:V16SI 2 "vector_operand" "vm")]
 		UNSPEC_VP2INTERSECT))]
-  "TARGET_AVX512VP2INTERSECT && TARGET_EVEX512"
+  "TARGET_AVX512VP2INTERSECT"
   "vp2intersectd\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr ("prefix") ("evex"))])
 
 (define_mode_iterator VF_AVX512BF16VL
-  [(V32BF "TARGET_EVEX512") (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")])
+  [V32BF (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")])
 ;; Converting from BF to SF
 (define_mode_attr bf16_cvt_2sf
   [(V32BF  "V16SF") (V16BF  "V8SF") (V8BF  "V4SF")])
@@ -31098,7 +31228,7 @@
   "vcvtneps2bf16{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}")
 
 (define_mode_iterator VF1_AVX512_256
-  [(V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL")])
+  [V16SF (V8SF "TARGET_AVX512VL")])
 
 (define_expand "avx512f_cvtneps2bf16_<mode>_maskz"
   [(match_operand:<sf_cvt_bf16> 0 "register_operand")
@@ -31144,7 +31274,7 @@
   [(set (match_operand:V16BF 0 "register_operand")
 	(float_truncate:V16BF
 	  (match_operand:V16SF 1 "nonimmediate_operand")))]
-  "TARGET_AVX512BW && TARGET_EVEX512
+  "TARGET_AVX512BW
    && !HONOR_NANS (BFmode) && !flag_rounding_math
    && (flag_unsafe_math_optimizations || TARGET_AVX512BF16)"
 {
@@ -31428,10 +31558,10 @@
 ;;	vinserti64x4	$0x1, %ymm15, %zmm15, %zmm15
 
 (define_mode_iterator INT_BROADCAST_MODE
-  [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
-   (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI
-   (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512 && TARGET_64BIT")
+  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
+   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
+   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+   (V8DI "TARGET_AVX512F && TARGET_64BIT")
    (V4DI "TARGET_AVX && TARGET_64BIT") (V2DI "TARGET_64BIT")])
 
 ;; Broadcast from an integer.  NB: Enable broadcast only if we can move
@@ -31705,8 +31835,8 @@
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "avx10_2_cvt2ps2phx_<mode><mask_name><round_name>"
-  [(set (match_operand:VHF_AVX10_2 0 "register_operand" "=v")
-	(vec_concat:VHF_AVX10_2
+  [(set (match_operand:VHF_AVX512VL 0 "register_operand" "=v")
+	(vec_concat:VHF_AVX512VL
 	  (float_truncate:<ssehalfvecmode>
 	    (match_operand:<ssePSmode> 2 "<round_nimm_predicate>" "<round_constraint>"))
 	  (float_truncate:<ssehalfvecmode>
@@ -31730,8 +31860,8 @@
 (define_insn "vcvt<convertfp8_pack><mode><mask_name>"
   [(set (match_operand:<ssebvecmode> 0 "register_operand" "=v")
 	(unspec:<ssebvecmode>
-	  [(match_operand:VHF_AVX10_2 1 "register_operand" "v")
-	   (match_operand:VHF_AVX10_2 2 "nonimmediate_operand" "vm")]
+	  [(match_operand:VHF_AVX512VL 1 "register_operand" "v")
+	   (match_operand:VHF_AVX512VL 2 "nonimmediate_operand" "vm")]
 	  UNSPEC_CONVERTFP8_PACK))]
   "TARGET_AVX10_2"
   "vcvt<convertfp8_pack>\t{%2, %1, %0<mask_operand3>|%0<mask_operand2>, %1, %2}"
@@ -31814,7 +31944,7 @@
   [(set_attr "prefix" "evex")])
 
 (define_mode_iterator VHF_AVX10_2_2
-  [(V32HF "TARGET_AVX10_2") V16HF])
+  [V32HF V16HF])
 
 (define_insn "vcvt<biasph2fp8_pack><mode><mask_name>"
   [(set (match_operand:<ssebvecmode_2> 0 "register_operand" "=v")
@@ -31911,8 +32041,8 @@
   [(set_attr "prefix" "evex")])
 
 (define_insn "vcvthf82ph<mode><mask_name>"
-  [(set (match_operand:VHF_AVX10_2 0 "register_operand" "=v")
-	(unspec:VHF_AVX10_2
+  [(set (match_operand:VHF_AVX512VL 0 "register_operand" "=v")
+	(unspec:VHF_AVX512VL
 	  [(match_operand:<ssebvecmode_2> 1 "nonimmediate_operand" "vm")]
 	  UNSPEC_VCVTHF82PH))]
   "TARGET_AVX10_2"
@@ -31934,8 +32064,8 @@
 
 (define_expand "usdot_prod<sseunpackmodelower><mode>"
   [(match_operand:<sseunpackmode> 0 "register_operand")
-   (match_operand:VI2_AVX10_2 1 "register_operand")
-   (match_operand:VI2_AVX10_2 2 "register_operand")
+   (match_operand:VI2_AVX512F 1 "register_operand")
+   (match_operand:VI2_AVX512F 2 "register_operand")
    (match_operand:<sseunpackmode> 3 "register_operand")]
   "TARGET_AVXVNNIINT16 || TARGET_AVX10_2"
 {
@@ -31952,8 +32082,8 @@
 
 (define_expand "udot_prod<sseunpackmodelower><mode>"
   [(match_operand:<sseunpackmode> 0 "register_operand")
-   (match_operand:VI2_AVX10_2 1 "register_operand")
-   (match_operand:VI2_AVX10_2 2 "register_operand")
+   (match_operand:VI2_AVX512F 1 "register_operand")
+   (match_operand:VI2_AVX512F 2 "register_operand")
    (match_operand:<sseunpackmode> 3 "register_operand")]
   "TARGET_AVXVNNIINT16 || TARGET_AVX10_2"
 {
@@ -32032,23 +32162,23 @@
    [(set_attr "prefix" "evex")])
 
 (define_insn "vdpphps_<mode>"
-  [(set (match_operand:VF1_AVX10_2 0 "register_operand" "=v")
-	(unspec:VF1_AVX10_2
-	  [(match_operand:VF1_AVX10_2 1 "register_operand" "0")
-	   (match_operand:VF1_AVX10_2 2 "register_operand" "v")
-	   (match_operand:VF1_AVX10_2 3 "nonimmediate_operand" "vm")]
+  [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
+	(unspec:VF1_AVX512VL
+	  [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
+	   (match_operand:VF1_AVX512VL 2 "register_operand" "v")
+	   (match_operand:VF1_AVX512VL 3 "nonimmediate_operand" "vm")]
 	   UNSPEC_VDPPHPS))]
   "TARGET_AVX10_2"
   "vdpphps\t{%3, %2, %0|%0, %2, %3}"
   [(set_attr "prefix" "evex")])
 
 (define_insn "vdpphps_<mode>_mask"
-  [(set (match_operand:VF1_AVX10_2 0 "register_operand" "=v")
-	(vec_merge:VF1_AVX10_2
-	  (unspec:VF1_AVX10_2
-	    [(match_operand:VF1_AVX10_2 1 "register_operand" "0")
-	     (match_operand:VF1_AVX10_2 2 "register_operand" "v")
-	     (match_operand:VF1_AVX10_2 3 "nonimmediate_operand" "vm")]
+  [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
+	(vec_merge:VF1_AVX512VL
+	  (unspec:VF1_AVX512VL
+	    [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
+	     (match_operand:VF1_AVX512VL 2 "register_operand" "v")
+	     (match_operand:VF1_AVX512VL 3 "nonimmediate_operand" "vm")]
 	    UNSPEC_VDPPHPS)
 	  (match_dup 1)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
@@ -32057,10 +32187,10 @@
   [(set_attr "prefix" "evex")])
 
 (define_expand "vdpphps_<mode>_maskz"
-  [(match_operand:VF1_AVX10_2 0 "register_operand")
-   (match_operand:VF1_AVX10_2 1 "register_operand")
-   (match_operand:VF1_AVX10_2 2 "register_operand")
-   (match_operand:VF1_AVX10_2 3 "nonimmediate_operand")
+  [(match_operand:VF1_AVX512VL 0 "register_operand")
+   (match_operand:VF1_AVX512VL 1 "register_operand")
+   (match_operand:VF1_AVX512VL 2 "register_operand")
+   (match_operand:VF1_AVX512VL 3 "nonimmediate_operand")
    (match_operand:<avx512fmaskmode> 4 "register_operand")]
   "TARGET_AVX10_2"
 {
@@ -32070,60 +32200,60 @@
 })
 
 (define_insn "vdpphps_<mode>_maskz_1"
-  [(set (match_operand:VF1_AVX10_2 0 "register_operand" "=v")
-	(vec_merge:VF1_AVX10_2
-	  (unspec:VF1_AVX10_2
-	    [(match_operand:VF1_AVX10_2 1 "register_operand" "0")
-	     (match_operand:VF1_AVX10_2 2 "register_operand" "v")
-	     (match_operand:VF1_AVX10_2 3 "nonimmediate_operand" "vm")]
+  [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
+	(vec_merge:VF1_AVX512VL
+	  (unspec:VF1_AVX512VL
+	    [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
+	     (match_operand:VF1_AVX512VL 2 "register_operand" "v")
+	     (match_operand:VF1_AVX512VL 3 "nonimmediate_operand" "vm")]
 	    UNSPEC_VDPPHPS)
-	  (match_operand:VF1_AVX10_2 4 "const0_operand" "C")
+	  (match_operand:VF1_AVX512VL 4 "const0_operand" "C")
 	  (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
   "TARGET_AVX10_2"
   "vdpphps\t{%3, %2, %0%{%5%}%N4|%0%{%5%}%N4, %2, %3}"
   [(set_attr "prefix" "evex")])
 
 (define_insn "avx10_2_scalefbf16_<mode><mask_name>"
-   [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
-      (unspec:VBF_AVX10_2
-	[(match_operand:VBF_AVX10_2 1 "register_operand" "v")
-	 (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")]
+   [(set (match_operand:VBF 0 "register_operand" "=v")
+      (unspec:VBF
+	[(match_operand:VBF 1 "register_operand" "v")
+	 (match_operand:VBF 2 "nonimmediate_operand" "vm")]
        UNSPEC_VSCALEFBF16))]
    "TARGET_AVX10_2"
    "vscalefbf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
    [(set_attr "prefix" "evex")])
 
 (define_expand "<code><mode>3"
-  [(set (match_operand:VBF_AVX10_2 0 "register_operand")
-     (smaxmin:VBF_AVX10_2
-       (match_operand:VBF_AVX10_2 1 "register_operand")
-       (match_operand:VBF_AVX10_2 2 "nonimmediate_operand")))]
+  [(set (match_operand:VBF 0 "register_operand")
+     (smaxmin:VBF
+       (match_operand:VBF 1 "register_operand")
+       (match_operand:VBF 2 "nonimmediate_operand")))]
   "TARGET_AVX10_2")
 
 (define_insn "avx10_2_<code>bf16_<mode><mask_name>"
-   [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
-      (smaxmin:VBF_AVX10_2
-	 (match_operand:VBF_AVX10_2 1 "register_operand" "v")
-	 (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")))]
+   [(set (match_operand:VBF 0 "register_operand" "=v")
+      (smaxmin:VBF
+	 (match_operand:VBF 1 "register_operand" "v")
+	 (match_operand:VBF 2 "nonimmediate_operand" "vm")))]
    "TARGET_AVX10_2"
    "v<maxmin_float>bf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
    [(set_attr "prefix" "evex")
     (set_attr "mode" "<MODE>")])
 
 (define_insn "avx10_2_<insn>bf16_<mode><mask_name>"
-   [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
-      (plusminusmultdiv:VBF_AVX10_2
-	(match_operand:VBF_AVX10_2 1 "register_operand" "v")
-	(match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")))]
+   [(set (match_operand:VBF 0 "register_operand" "=v")
+      (plusminusmultdiv:VBF
+	(match_operand:VBF 1 "register_operand" "v")
+	(match_operand:VBF 2 "nonimmediate_operand" "vm")))]
    "TARGET_AVX10_2"
    "v<insn>bf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
    [(set_attr "prefix" "evex")])
 
 (define_expand "avx10_2_fmaddbf16_<mode>_maskz"
-  [(match_operand:VBF_AVX10_2 0 "register_operand")
-   (match_operand:VBF_AVX10_2 1 "nonimmediate_operand")
-   (match_operand:VBF_AVX10_2 2 "nonimmediate_operand")
-   (match_operand:VBF_AVX10_2 3 "nonimmediate_operand")
+  [(match_operand:VBF 0 "register_operand")
+   (match_operand:VBF 1 "nonimmediate_operand")
+   (match_operand:VBF 2 "nonimmediate_operand")
+   (match_operand:VBF 3 "nonimmediate_operand")
    (match_operand:<avx512fmaskmode> 4 "register_operand")]
   "TARGET_AVX10_2"
   {
@@ -32135,11 +32265,11 @@
   })
 
 (define_insn "avx10_2_fmaddbf16_<mode><sd_maskz_name>"
-  [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v,v")
-	  (fma:VBF_AVX10_2
-	    (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%0,0,v")
-	    (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v,vm")
-	    (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm,0")))]
+  [(set (match_operand:VBF 0 "register_operand" "=v,v,v")
+	  (fma:VBF
+	    (match_operand:VBF 1 "nonimmediate_operand" "%0,0,v")
+	    (match_operand:VBF 2 "nonimmediate_operand" "vm,v,vm")
+	    (match_operand:VBF 3 "nonimmediate_operand" "v,vm,0")))]
   "TARGET_AVX10_2"
   "@
    vfmadd132bf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2}
@@ -32150,12 +32280,12 @@
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "avx10_2_fmaddbf16_<mode>_mask"
-  [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v")
-	(vec_merge:VBF_AVX10_2
-	  (fma:VBF_AVX10_2
-	     (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "0,0")
-	     (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v")
-	     (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm"))
+  [(set (match_operand:VBF 0 "register_operand" "=v,v")
+	(vec_merge:VBF
+	  (fma:VBF
+	     (match_operand:VBF 1 "nonimmediate_operand" "0,0")
+	     (match_operand:VBF 2 "nonimmediate_operand" "vm,v")
+	     (match_operand:VBF 3 "nonimmediate_operand" "v,vm"))
 	  (match_dup 1)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
   "TARGET_AVX10_2"
@@ -32167,12 +32297,12 @@
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "avx10_2_fmaddbf16_<mode>_mask3"
-  [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
-	(vec_merge:VBF_AVX10_2
-	  (fma:VBF_AVX10_2
-	     (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%v")
-	     (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")
-	     (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "0"))
+  [(set (match_operand:VBF 0 "register_operand" "=v")
+	(vec_merge:VBF
+	  (fma:VBF
+	     (match_operand:VBF 1 "nonimmediate_operand" "%v")
+	     (match_operand:VBF 2 "nonimmediate_operand" "vm")
+	     (match_operand:VBF 3 "nonimmediate_operand" "0"))
 	  (match_dup 3)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
   "TARGET_AVX10_2"
@@ -32182,10 +32312,10 @@
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_expand "avx10_2_fnmaddbf16_<mode>_maskz"
-  [(match_operand:VBF_AVX10_2 0 "register_operand")
-   (match_operand:VBF_AVX10_2 1 "nonimmediate_operand")
-   (match_operand:VBF_AVX10_2 2 "nonimmediate_operand")
-   (match_operand:VBF_AVX10_2 3 "nonimmediate_operand")
+  [(match_operand:VBF 0 "register_operand")
+   (match_operand:VBF 1 "nonimmediate_operand")
+   (match_operand:VBF 2 "nonimmediate_operand")
+   (match_operand:VBF 3 "nonimmediate_operand")
    (match_operand:<avx512fmaskmode> 4 "register_operand")]
   "TARGET_AVX10_2"
   {
@@ -32197,12 +32327,12 @@
   })
 
 (define_insn "avx10_2_fnmaddbf16_<mode><sd_maskz_name>"
-  [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v,v")
-	  (fma:VBF_AVX10_2
-	    (neg:VBF_AVX10_2
-	      (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%0,0,v"))
-	    (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v,vm")
-	    (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm,0")))]
+  [(set (match_operand:VBF 0 "register_operand" "=v,v,v")
+	  (fma:VBF
+	    (neg:VBF
+	      (match_operand:VBF 1 "nonimmediate_operand" "%0,0,v"))
+	    (match_operand:VBF 2 "nonimmediate_operand" "vm,v,vm")
+	    (match_operand:VBF 3 "nonimmediate_operand" "v,vm,0")))]
   "TARGET_AVX10_2"
   "@
    vfnmadd132bf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2}
@@ -32213,13 +32343,13 @@
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "avx10_2_fnmaddbf16_<mode>_mask"
-  [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v")
-	(vec_merge:VBF_AVX10_2
-	  (fma:VBF_AVX10_2
-	    (neg:VBF_AVX10_2
-	      (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "0,0"))
-	    (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v")
-	    (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm"))
+  [(set (match_operand:VBF 0 "register_operand" "=v,v")
+	(vec_merge:VBF
+	  (fma:VBF
+	    (neg:VBF
+	      (match_operand:VBF 1 "nonimmediate_operand" "0,0"))
+	    (match_operand:VBF 2 "nonimmediate_operand" "vm,v")
+	    (match_operand:VBF 3 "nonimmediate_operand" "v,vm"))
 	  (match_dup 1)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
   "TARGET_AVX10_2"
@@ -32231,13 +32361,13 @@
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "avx10_2_fnmaddbf16_<mode>_mask3"
-  [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
-	(vec_merge:VBF_AVX10_2
-	  (fma:VBF_AVX10_2
-	    (neg:VBF_AVX10_2
-	      (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%v"))
-	    (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")
-	    (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "0"))
+  [(set (match_operand:VBF 0 "register_operand" "=v")
+	(vec_merge:VBF
+	  (fma:VBF
+	    (neg:VBF
+	      (match_operand:VBF 1 "nonimmediate_operand" "%v"))
+	    (match_operand:VBF 2 "nonimmediate_operand" "vm")
+	    (match_operand:VBF 3 "nonimmediate_operand" "0"))
 	  (match_dup 3)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
   "TARGET_AVX10_2"
@@ -32247,10 +32377,10 @@
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_expand "avx10_2_fmsubbf16_<mode>_maskz"
-  [(match_operand:VBF_AVX10_2 0 "register_operand")
-   (match_operand:VBF_AVX10_2 1 "nonimmediate_operand")
-   (match_operand:VBF_AVX10_2 2 "nonimmediate_operand")
-   (match_operand:VBF_AVX10_2 3 "nonimmediate_operand")
+  [(match_operand:VBF 0 "register_operand")
+   (match_operand:VBF 1 "nonimmediate_operand")
+   (match_operand:VBF 2 "nonimmediate_operand")
+   (match_operand:VBF 3 "nonimmediate_operand")
    (match_operand:<avx512fmaskmode> 4 "register_operand")]
   "TARGET_AVX10_2"
   {
@@ -32262,12 +32392,12 @@
   })
 
 (define_insn "avx10_2_fmsubbf16_<mode><sd_maskz_name>"
-  [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v,v")
-	  (fma:VBF_AVX10_2
-	    (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%0,0,v")
-	    (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v,vm")
-	    (neg:VBF_AVX10_2
-	      (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm,0"))))]
+  [(set (match_operand:VBF 0 "register_operand" "=v,v,v")
+	  (fma:VBF
+	    (match_operand:VBF 1 "nonimmediate_operand" "%0,0,v")
+	    (match_operand:VBF 2 "nonimmediate_operand" "vm,v,vm")
+	    (neg:VBF
+	      (match_operand:VBF 3 "nonimmediate_operand" "v,vm,0"))))]
   "TARGET_AVX10_2"
   "@
    vfmsub132bf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2}
@@ -32278,13 +32408,13 @@
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "avx10_2_fmsubbf16_<mode>_mask"
-  [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v")
-	(vec_merge:VBF_AVX10_2
-	  (fma:VBF_AVX10_2
-	    (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "0,0")
-	    (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v")
-	    (neg:VBF_AVX10_2
-	      (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm")))
+  [(set (match_operand:VBF 0 "register_operand" "=v,v")
+	(vec_merge:VBF
+	  (fma:VBF
+	    (match_operand:VBF 1 "nonimmediate_operand" "0,0")
+	    (match_operand:VBF 2 "nonimmediate_operand" "vm,v")
+	    (neg:VBF
+	      (match_operand:VBF 3 "nonimmediate_operand" "v,vm")))
 	  (match_dup 1)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
   "TARGET_AVX10_2"
@@ -32296,13 +32426,13 @@
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "avx10_2_fmsubbf16_<mode>_mask3"
-  [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
-	(vec_merge:VBF_AVX10_2
-	  (fma:VBF_AVX10_2
-	    (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%v")
-	    (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")
-	    (neg:VBF_AVX10_2
-	      (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "0")))
+  [(set (match_operand:VBF 0 "register_operand" "=v")
+	(vec_merge:VBF
+	  (fma:VBF
+	    (match_operand:VBF 1 "nonimmediate_operand" "%v")
+	    (match_operand:VBF 2 "nonimmediate_operand" "vm")
+	    (neg:VBF
+	      (match_operand:VBF 3 "nonimmediate_operand" "0")))
 	  (match_dup 3)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
   "TARGET_AVX10_2"
@@ -32312,10 +32442,10 @@
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_expand "avx10_2_fnmsubbf16_<mode>_maskz"
-  [(match_operand:VBF_AVX10_2 0 "register_operand")
-   (match_operand:VBF_AVX10_2 1 "nonimmediate_operand")
-   (match_operand:VBF_AVX10_2 2 "nonimmediate_operand")
-   (match_operand:VBF_AVX10_2 3 "nonimmediate_operand")
+  [(match_operand:VBF 0 "register_operand")
+   (match_operand:VBF 1 "nonimmediate_operand")
+   (match_operand:VBF 2 "nonimmediate_operand")
+   (match_operand:VBF 3 "nonimmediate_operand")
    (match_operand:<avx512fmaskmode> 4 "register_operand")]
   "TARGET_AVX10_2"
   {
@@ -32327,13 +32457,13 @@
   })
 
 (define_insn "avx10_2_fnmsubbf16_<mode><sd_maskz_name>"
-  [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v,v")
-	  (fma:VBF_AVX10_2
-	    (neg:VBF_AVX10_2
-	      (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%0,0,v"))
-	    (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v,vm")
-	    (neg:VBF_AVX10_2
-	      (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm,0"))))]
+  [(set (match_operand:VBF 0 "register_operand" "=v,v,v")
+	  (fma:VBF
+	    (neg:VBF
+	      (match_operand:VBF 1 "nonimmediate_operand" "%0,0,v"))
+	    (match_operand:VBF 2 "nonimmediate_operand" "vm,v,vm")
+	    (neg:VBF
+	      (match_operand:VBF 3 "nonimmediate_operand" "v,vm,0"))))]
   "TARGET_AVX10_2"
   "@
    vfnmsub132bf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2}
@@ -32344,14 +32474,14 @@
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "avx10_2_fnmsubbf16_<mode>_mask"
-  [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v")
-	(vec_merge:VBF_AVX10_2
-	  (fma:VBF_AVX10_2
-	    (neg:VBF_AVX10_2
-	      (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "0,0"))
-	    (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v")
-	    (neg:VBF_AVX10_2
-	      (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm")))
+  [(set (match_operand:VBF 0 "register_operand" "=v,v")
+	(vec_merge:VBF
+	  (fma:VBF
+	    (neg:VBF
+	      (match_operand:VBF 1 "nonimmediate_operand" "0,0"))
+	    (match_operand:VBF 2 "nonimmediate_operand" "vm,v")
+	    (neg:VBF
+	      (match_operand:VBF 3 "nonimmediate_operand" "v,vm")))
 	  (match_dup 1)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
   "TARGET_AVX10_2"
@@ -32363,14 +32493,14 @@
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "avx10_2_fnmsubbf16_<mode>_mask3"
-  [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
-	(vec_merge:VBF_AVX10_2
-	  (fma:VBF_AVX10_2
-	    (neg:VBF_AVX10_2
-	      (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%v"))
-	    (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")
-	    (neg:VBF_AVX10_2
-	      (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "0")))
+  [(set (match_operand:VBF 0 "register_operand" "=v")
+	(vec_merge:VBF
+	  (fma:VBF
+	    (neg:VBF
+	      (match_operand:VBF 1 "nonimmediate_operand" "%v"))
+	    (match_operand:VBF 2 "nonimmediate_operand" "vm")
+	    (neg:VBF
+	      (match_operand:VBF 3 "nonimmediate_operand" "0")))
 	  (match_dup 3)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
   "TARGET_AVX10_2"
@@ -32380,35 +32510,35 @@
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "avx10_2_rsqrtbf16_<mode><mask_name>"
-   [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
-	 (unspec:VBF_AVX10_2
-	   [(match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm")]
+   [(set (match_operand:VBF 0 "register_operand" "=v")
+	 (unspec:VBF
+	   [(match_operand:VBF 1 "nonimmediate_operand" "vm")]
 	  UNSPEC_RSQRT))]
    "TARGET_AVX10_2"
    "vrsqrtbf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
    [(set_attr "prefix" "evex")])
 
 (define_insn "avx10_2_sqrtbf16_<mode><mask_name>"
-   [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
-	 (sqrt:VBF_AVX10_2
-	   (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm")))]
+   [(set (match_operand:VBF 0 "register_operand" "=v")
+	 (sqrt:VBF
+	   (match_operand:VBF 1 "nonimmediate_operand" "vm")))]
    "TARGET_AVX10_2"
    "vsqrtbf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
    [(set_attr "prefix" "evex")])
 
 (define_insn "avx10_2_rcpbf16_<mode><mask_name>"
-   [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
-	 (unspec:VBF_AVX10_2
-	   [(match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm")]
+   [(set (match_operand:VBF 0 "register_operand" "=v")
+	 (unspec:VBF
+	   [(match_operand:VBF 1 "nonimmediate_operand" "vm")]
 	  UNSPEC_RCP))]
    "TARGET_AVX10_2"
    "vrcpbf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
    [(set_attr "prefix" "evex")])
 
 (define_insn "avx10_2_getexpbf16_<mode><mask_name>"
-   [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
-	 (unspec:VBF_AVX10_2
-	   [(match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm")]
+   [(set (match_operand:VBF 0 "register_operand" "=v")
+	 (unspec:VBF
+	   [(match_operand:VBF 1 "nonimmediate_operand" "vm")]
 	  UNSPEC_GETEXP))]
    "TARGET_AVX10_2"
    "vgetexpbf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
@@ -32425,9 +32555,9 @@
     (UNSPEC_VGETMANTBF16 "getmant")])
 
 (define_insn "avx10_2_<bf16immop>bf16_<mode><mask_name>"
-   [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
-	 (unspec:VBF_AVX10_2
-	   [(match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm")
+   [(set (match_operand:VBF 0 "register_operand" "=v")
+	 (unspec:VBF
+	   [(match_operand:VBF 1 "nonimmediate_operand" "vm")
 	    (match_operand:SI 2 "const_0_to_255_operand")]
 	  BF16IMMOP))]
    "TARGET_AVX10_2"
@@ -32437,7 +32567,7 @@
 (define_insn "avx10_2_fpclassbf16_<mode><mask_scalar_merge_name>"
    [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
 	 (unspec:<avx512fmaskmode>
-	   [(match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm")
+	   [(match_operand:VBF 1 "nonimmediate_operand" "vm")
 	    (match_operand 2 "const_0_to_255_operand")]
 	  UNSPEC_VFPCLASSBF16))]
    "TARGET_AVX10_2"
@@ -32447,8 +32577,8 @@
 (define_insn "avx10_2_cmpbf16_<mode><mask_scalar_merge_name>"
    [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
 	 (unspec:<avx512fmaskmode>
-	   [(match_operand:VBF_AVX10_2 1 "register_operand" "v")
-	    (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")
+	   [(match_operand:VBF 1 "register_operand" "v")
+	    (match_operand:VBF 2 "nonimmediate_operand" "vm")
 	    (match_operand 3 "const_0_to_31_operand" "n")]
 	  UNSPEC_PCMP))]
    "TARGET_AVX10_2"
@@ -32486,7 +32616,7 @@
 (define_insn "avx10_2_cvt<sat_cvt_trunc_prefix>bf162i<sat_cvt_sign_prefix>bs<mode><mask_name>"
  [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
        (unspec:<sseintvecmode>
-	  [(match_operand:VBF_AVX10_2 1 "vector_operand" "vm")]
+	  [(match_operand:VBF 1 "vector_operand" "vm")]
 	  UNSPEC_CVT_BF16_IBS_ITER))]
  "TARGET_AVX10_2"
  "vcvt<sat_cvt_trunc_prefix>bf162i<sat_cvt_sign_prefix>bs\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
@@ -32501,7 +32631,7 @@
 (define_insn "avx10_2_cvtph2i<sat_cvt_sign_prefix>bs<mode><mask_name><round_name>"
  [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
        (unspec:<sseintvecmode>
-	  [(match_operand:VHF_AVX10_2 1 "<round_nimm_predicate>" "<round_constraint>")]
+	  [(match_operand:VHF_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
 	  UNSPEC_CVT_PH_IBS_ITER))]
  "TARGET_AVX10_2 && <round_mode512bit_condition>"
  "vcvtph2i<sat_cvt_sign_prefix>bs\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
@@ -32516,7 +32646,7 @@
 (define_insn "avx10_2_cvttph2i<sat_cvt_sign_prefix>bs<mode><mask_name><round_saeonly_name>"
  [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
        (unspec:<sseintvecmode>
-	  [(match_operand:VHF_AVX10_2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+	  [(match_operand:VHF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
 	  UNSPEC_CVTT_PH_IBS_ITER))]
  "TARGET_AVX10_2 && <round_saeonly_mode512bit_condition>"
  "vcvttph2i<sat_cvt_sign_prefix>bs\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
@@ -32531,7 +32661,7 @@
 (define_insn "avx10_2_cvtps2i<sat_cvt_sign_prefix>bs<mode><mask_name><round_name>"
  [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
        (unspec:<sseintvecmode>
-	  [(match_operand:VF1_AVX10_2 1 "<round_nimm_predicate>" "<round_constraint>")]
+	  [(match_operand:VF1_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
 	  UNSPEC_CVT_PS_IBS_ITER))]
  "TARGET_AVX10_2 && <round_mode512bit_condition>"
  "vcvtps2i<sat_cvt_sign_prefix>bs\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
@@ -32546,7 +32676,7 @@
 (define_insn "avx10_2_cvttps2i<sat_cvt_sign_prefix>bs<mode><mask_name><round_saeonly_name>"
  [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
        (unspec:<sseintvecmode>
-	  [(match_operand:VF1_AVX10_2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+	  [(match_operand:VF1_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
 	  UNSPEC_CVTT_PS_IBS_ITER))]
  "TARGET_AVX10_2 && <round_saeonly_mode512bit_condition>"
  "vcvttps2i<sat_cvt_sign_prefix>bs\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
@@ -32565,7 +32695,7 @@
 (define_insn "avx10_2_vcvtt<castmode>2<sat_cvt_sign_prefix>dqs<mode><mask_name><round_saeonly_name>"
  [(set (match_operand:<VEC_GATHER_IDXSI> 0 "register_operand" "=v")
        (unspec:<VEC_GATHER_IDXSI>
-	  [(match_operand:VF1_VF2_AVX10_2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+	  [(match_operand:VF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
 	  UNSPEC_SAT_CVT_DS_SIGN_ITER))]
  "TARGET_AVX10_2 && <round_saeonly_mode512bit_condition>"
  "vcvtt<castmode>2<sat_cvt_sign_prefix>dqs<pd2dqssuff>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
@@ -32576,7 +32706,7 @@
 (define_insn "avx10_2_vcvttpd2<sat_cvt_sign_prefix>qqs<mode><mask_name><round_saeonly_name>"
  [(set (match_operand:<VEC_GATHER_IDXDI> 0 "register_operand" "=v")
        (unspec:<VEC_GATHER_IDXDI>
-	  [(match_operand:VF2_AVX10_2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+	  [(match_operand:VF2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
 	  UNSPEC_SAT_CVT_DS_SIGN_ITER))]
  "TARGET_AVX10_2 && <round_saeonly_mode512bit_condition>"
  "vcvttpd2<sat_cvt_sign_prefix>qqs\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
@@ -32585,8 +32715,8 @@
   (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "avx10_2_vcvttps2<sat_cvt_sign_prefix>qqs<mode><mask_name><round_saeonly_name>"
- [(set (match_operand:VI8_AVX10_2 0 "register_operand" "=v")
-       (unspec:VI8_AVX10_2
+ [(set (match_operand:VI8 0 "register_operand" "=v")
+       (unspec:VI8
 	  [(match_operand:<vpckfloat_temp_mode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
 	  UNSPEC_SAT_CVT_DS_SIGN_ITER))]
  "TARGET_AVX10_2 && <round_saeonly_mode512bit_condition>"
@@ -32622,10 +32752,10 @@
  (set_attr "mode" "<MODE>")])
 
 (define_insn "avx10_2_minmaxbf16_<mode><mask_name>"
-  [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
-    (unspec:VBF_AVX10_2
-      [(match_operand:VBF_AVX10_2 1 "register_operand" "v")
-       (match_operand:VBF_AVX10_2 2 "bcst_vector_operand" "vmBr")
+  [(set (match_operand:VBF 0 "register_operand" "=v")
+    (unspec:VBF
+      [(match_operand:VBF 1 "register_operand" "v")
+       (match_operand:VBF 2 "bcst_vector_operand" "vmBr")
        (match_operand:SI 3 "const_0_to_255_operand")]
        UNSPEC_MINMAXBF16))]
   "TARGET_AVX10_2"
@@ -32634,10 +32764,10 @@
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "avx10_2_minmaxp<mode><mask_name><round_saeonly_name>"
-  [(set (match_operand:VFH_AVX10_2 0 "register_operand" "=v")
-    (unspec:VFH_AVX10_2
-      [(match_operand:VFH_AVX10_2 1 "register_operand" "v")
-       (match_operand:VFH_AVX10_2 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+  [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v")
+    (unspec:VFH_AVX512VL
+      [(match_operand:VFH_AVX512VL 1 "register_operand" "v")
+       (match_operand:VFH_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
        (match_operand:SI 3 "const_0_to_255_operand")]
       UNSPEC_MINMAX))]
   "TARGET_AVX10_2"
@@ -32661,9 +32791,9 @@
    (set_attr "mode" "<ssescalarmode>")])
 
 (define_insn "avx10_2_vmovrs<ssemodesuffix><mode><mask_name>"
-  [(set (match_operand:VI1248_AVX10_2 0 "register_operand" "=v")
-	(unspec:VI1248_AVX10_2
-	  [(match_operand:VI1248_AVX10_2 1 "memory_operand" "m")]
+  [(set (match_operand:VI1248_AVX512VLBW 0 "register_operand" "=v")
+	(unspec:VI1248_AVX512VLBW
+	  [(match_operand:VI1248_AVX512VLBW 1 "memory_operand" "m")]
 	  UNSPEC_VMOVRS))]
   "TARGET_AVX10_2 && TARGET_MOVRS"
   "vmovrs<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
diff --git a/gcc/config/i386/vaesintrin.h b/gcc/config/i386/vaesintrin.h
index 15d8e96..64f3c20 100644
--- a/gcc/config/i386/vaesintrin.h
+++ b/gcc/config/i386/vaesintrin.h
@@ -66,9 +66,9 @@ _mm256_aesenclast_epi128 (__m256i __A, __m256i __B)
 #endif /* __DISABLE_VAES__ */
 
 
-#if !defined(__VAES__) || !defined(__AVX512F__) || !defined(__EVEX512__)
+#if !defined(__VAES__) || !defined(__AVX512F__)
 #pragma GCC push_options
-#pragma GCC target("vaes,avx512f,evex512")
+#pragma GCC target("vaes,avx512f")
 #define __DISABLE_VAESF__
 #endif /* __VAES__ */
 
diff --git a/gcc/config/i386/vpclmulqdqintrin.h b/gcc/config/i386/vpclmulqdqintrin.h
index 2b36c37..a02ab38 100644
--- a/gcc/config/i386/vpclmulqdqintrin.h
+++ b/gcc/config/i386/vpclmulqdqintrin.h
@@ -28,9 +28,9 @@
 #ifndef _VPCLMULQDQINTRIN_H_INCLUDED
 #define _VPCLMULQDQINTRIN_H_INCLUDED
 
-#if !defined(__VPCLMULQDQ__) || !defined(__AVX512F__) || !defined(__EVEX512__)
+#if !defined(__VPCLMULQDQ__) || !defined(__AVX512F__)
 #pragma GCC push_options
-#pragma GCC target("vpclmulqdq,avx512f,evex512")
+#pragma GCC target("vpclmulqdq,avx512f")
 #define __DISABLE_VPCLMULQDQF__
 #endif /* __VPCLMULQDQF__ */
 
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index 9477345..1649ea2 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -107,6 +107,7 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
 					   in 128bit, 256bit and 512bit */
   4, 4, 6,				/* cost of moving XMM,YMM,ZMM register */
   4,					/* cost of moving SSE register to integer.  */
+  4,					/* cost of moving integer register to SSE.  */
   COSTS_N_BYTES (5), 0,			/* Gather load static, per_elt.  */
   COSTS_N_BYTES (5), 0,			/* Gather store static, per_elt.  */
   0,					/* size of l1 cache  */
@@ -121,17 +122,31 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
   COSTS_N_BYTES (2),			/* cost of FCHS instruction.  */
   COSTS_N_BYTES (2),			/* cost of FSQRT instruction.  */
 
-  COSTS_N_BYTES (2),			/* cost of cheap SSE instruction.  */
-  COSTS_N_BYTES (2),			/* cost of ADDSS/SD SUBSS/SD insns.  */
-  COSTS_N_BYTES (2),			/* cost of MULSS instruction.  */
-  COSTS_N_BYTES (2),			/* cost of MULSD instruction.  */
-  COSTS_N_BYTES (2),			/* cost of FMA SS instruction.  */
-  COSTS_N_BYTES (2),			/* cost of FMA SD instruction.  */
-  COSTS_N_BYTES (2),			/* cost of DIVSS instruction.  */
-  COSTS_N_BYTES (2),			/* cost of DIVSD instruction.  */
-  COSTS_N_BYTES (2),			/* cost of SQRTSS instruction.  */
-  COSTS_N_BYTES (2),			/* cost of SQRTSD instruction.  */
+  COSTS_N_BYTES (4),			/* cost of cheap SSE instruction.  */
+  COSTS_N_BYTES (4),			/* cost of ADDSS/SD SUBSS/SD insns.  */
+  COSTS_N_BYTES (4),			/* cost of MULSS instruction.  */
+  COSTS_N_BYTES (4),			/* cost of MULSD instruction.  */
+  COSTS_N_BYTES (4),			/* cost of FMA SS instruction.  */
+  COSTS_N_BYTES (4),			/* cost of FMA SD instruction.  */
+  COSTS_N_BYTES (4),			/* cost of DIVSS instruction.  */
+  COSTS_N_BYTES (4),			/* cost of DIVSD instruction.  */
+  COSTS_N_BYTES (4),			/* cost of SQRTSS instruction.  */
+  COSTS_N_BYTES (4),			/* cost of SQRTSD instruction.  */
+  COSTS_N_BYTES (4),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_BYTES (4),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_BYTES (6),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_BYTES (4),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_BYTES (4),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_BYTES (4),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_BYTES (4),			/* cost of CVT(T)PS2PI instruction.  */
+  
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  {1, 1, 1},				/* latency times throughput of
+					   FMA/DOT_PROD_EXPR/SAD_EXPR,
+					   it's used to determine unroll
+					   factor in the vectorizer.  */
+  1,					/* Limit how much the autovectorizer
+					   may unroll a loop.  */
   ix86_size_memcpy,
   ix86_size_memset,
   COSTS_N_BYTES (1),			/* cond_taken_branch_cost.  */
@@ -219,6 +234,7 @@ struct processor_costs i386_cost = {	/* 386 specific costs */
   {4, 8, 16, 32, 64},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   3,					/* cost of moving SSE register to integer.  */
+  3,					/* cost of moving integer register to SSE.  */
   4, 4,					/* Gather load static, per_elt.  */
   4, 4,					/* Gather store static, per_elt.  */
   0,					/* size of l1 cache  */
@@ -243,7 +259,20 @@ struct processor_costs i386_cost = {	/* 386 specific costs */
   COSTS_N_INSNS (88),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (122),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (122),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (27),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (54),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (108),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (27),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (27),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (27),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (27),			/* cost of CVT(T)PS2PI instruction.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  {1, 1, 1},				/* latency times throughput of
+					   FMA/DOT_PROD_EXPR/SAD_EXPR,
+					   it's used to determine unroll
+					   factor in the vectorizer.  */
+  1,					/* Limit how much the autovectorizer
+					   may unroll a loop.  */
   i386_memcpy,
   i386_memset,
   COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
@@ -330,6 +359,7 @@ struct processor_costs i486_cost = {	/* 486 specific costs */
   {4, 8, 16, 32, 64},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   3,					/* cost of moving SSE register to integer.  */
+  3,					/* cost of moving integer register to SSE.  */
   4, 4,					/* Gather load static, per_elt.  */
   4, 4,					/* Gather store static, per_elt.  */
   4,					/* size of l1 cache.  486 has 8kB cache
@@ -356,7 +386,20 @@ struct processor_costs i486_cost = {	/* 486 specific costs */
   COSTS_N_INSNS (74),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (83),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (83),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (8),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (16),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (32),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (27),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (27),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (27),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (27),			/* cost of CVT(T)PS2PI instruction.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  {1, 1, 1},				/* latency times throughput of
+					   FMA/DOT_PROD_EXPR/SAD_EXPR,
+					   it's used to determine unroll
+					   factor in the vectorizer.  */
+  1,					/* Limit how much the autovectorizer
+					   may unroll a loop.  */
   i486_memcpy,
   i486_memset,
   COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
@@ -443,6 +486,7 @@ struct processor_costs pentium_cost = {
   {4, 8, 16, 32, 64},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   3,					/* cost of moving SSE register to integer.  */
+  3,					/* cost of moving integer register to SSE.  */
   4, 4,					/* Gather load static, per_elt.  */
   4, 4,					/* Gather store static, per_elt.  */
   8,					/* size of l1 cache.  */
@@ -467,7 +511,20 @@ struct processor_costs pentium_cost = {
   COSTS_N_INSNS (39),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (70),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (70),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (6),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (12),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVT(T)PS2PI instruction.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  {1, 1, 1},				/* latency times throughput of
+					   FMA/DOT_PROD_EXPR/SAD_EXPR,
+					   it's used to determine unroll
+					   factor in the vectorizer.  */
+  1,					/* Limit how much the autovectorizer
+					   may unroll a loop.  */
   pentium_memcpy,
   pentium_memset,
   COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
@@ -547,6 +604,7 @@ struct processor_costs lakemont_cost = {
   {4, 8, 16, 32, 64},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   3,					/* cost of moving SSE register to integer.  */
+  3,					/* cost of moving integer register to SSE.  */
   4, 4,					/* Gather load static, per_elt.  */
   4, 4,					/* Gather store static, per_elt.  */
   8,					/* size of l1 cache.  */
@@ -571,7 +629,20 @@ struct processor_costs lakemont_cost = {
   COSTS_N_INSNS (60),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (31),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (63),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (5),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (10),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (20),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (5),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (5),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (5),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (5),			/* cost of CVT(T)PS2PI instruction.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  {1, 1, 1},				/* latency times throughput of
+					   FMA/DOT_PROD_EXPR/SAD_EXPR,
+					   it's used to determine unroll
+					   factor in the vectorizer.  */
+  1,					/* Limit how much the autovectorizer
+					   may unroll a loop.  */
   pentium_memcpy,
   pentium_memset,
   COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
@@ -666,6 +737,7 @@ struct processor_costs pentiumpro_cost = {
   {4, 8, 16, 32, 64},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   3,					/* cost of moving SSE register to integer.  */
+  3,					/* cost of moving integer register to SSE.  */
   4, 4,					/* Gather load static, per_elt.  */
   4, 4,					/* Gather store static, per_elt.  */
   8,					/* size of l1 cache.  */
@@ -690,7 +762,20 @@ struct processor_costs pentiumpro_cost = {
   COSTS_N_INSNS (18),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (31),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (31),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (6),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (12),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVT(T)PS2PI instruction.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  {1, 1, 1},				/* latency times throughput of
+					   FMA/DOT_PROD_EXPR/SAD_EXPR,
+					   it's used to determine unroll
+					   factor in the vectorizer.  */
+  1,					/* Limit how much the autovectorizer
+					   may unroll a loop.  */
   pentiumpro_memcpy,
   pentiumpro_memset,
   COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
@@ -776,6 +861,7 @@ struct processor_costs geode_cost = {
   {2, 2, 8, 16, 32},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
   2, 2,					/* Gather load static, per_elt.  */
   2, 2,					/* Gather store static, per_elt.  */
   64,					/* size of l1 cache.  */
@@ -800,7 +886,20 @@ struct processor_costs geode_cost = {
   COSTS_N_INSNS (47),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (54),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (54),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (12),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (24),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (6),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVT(T)PS2PI instruction.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  {1, 1, 1},				/* latency times throughput of
+					   FMA/DOT_PROD_EXPR/SAD_EXPR,
+					   it's used to determine unroll
+					   factor in the vectorizer.  */
+  1,					/* Limit how much the autovectorizer
+					   may unroll a loop.  */
   geode_memcpy,
   geode_memset,
   COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
@@ -886,6 +985,7 @@ struct processor_costs k6_cost = {
   {2, 2, 8, 16, 32},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
   2, 2,					/* Gather load static, per_elt.  */
   2, 2,					/* Gather store static, per_elt.  */
   32,					/* size of l1 cache.  */
@@ -913,7 +1013,20 @@ struct processor_costs k6_cost = {
   COSTS_N_INSNS (56),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (56),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (56),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (2),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (4),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (8),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (2),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (2),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of CVT(T)PS2PI instruction.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  {1, 1, 1},				/* latency times throughput of
+					   FMA/DOT_PROD_EXPR/SAD_EXPR,
+					   it's used to determine unroll
+					   factor in the vectorizer.  */
+  1,					/* Limit how much the autovectorizer
+					   may unroll a loop.  */
   k6_memcpy,
   k6_memset,
   COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
@@ -1002,6 +1115,7 @@ struct processor_costs athlon_cost = {
   {4, 4, 10, 10, 20},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   5,					/* cost of moving SSE register to integer.  */
+  5,					/* cost of moving integer register to SSE.  */
   4, 4,					/* Gather load static, per_elt.  */
   4, 4,					/* Gather store static, per_elt.  */
   64,					/* size of l1 cache.  */
@@ -1027,7 +1141,20 @@ struct processor_costs athlon_cost = {
   COSTS_N_INSNS (24),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (19),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (19),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (8),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (16),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (4),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVT(T)PS2PI instruction.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  {1, 1, 1},				/* latency times throughput of
+					   FMA/DOT_PROD_EXPR/SAD_EXPR,
+					   it's used to determine unroll
+					   factor in the vectorizer.  */
+  1,					/* Limit how much the autovectorizer
+					   may unroll a loop.  */
   athlon_memcpy,
   athlon_memset,
   COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
@@ -1120,6 +1247,7 @@ struct processor_costs k8_cost = {
   {4, 4, 10, 10, 20},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   5,					/* cost of moving SSE register to integer.  */
+  5,					/* cost of moving integer register to SSE.  */
   4, 4,					/* Gather load static, per_elt.  */
   4, 4,					/* Gather store static, per_elt.  */
   64,					/* size of l1 cache.  */
@@ -1150,7 +1278,20 @@ struct processor_costs k8_cost = {
   COSTS_N_INSNS (20),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (19),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (27),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (8),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (16),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (14),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (10),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (5),			/* cost of CVT(T)PS2PI instruction.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  {1, 1, 1},				/* latency times throughput of
+					   FMA/DOT_PROD_EXPR/SAD_EXPR,
+					   it's used to determine unroll
+					   factor in the vectorizer.  */
+  1,					/* Limit how much the autovectorizer
+					   may unroll a loop.  */
   k8_memcpy,
   k8_memset,
   COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
@@ -1251,6 +1392,7 @@ struct processor_costs amdfam10_cost = {
   {4, 4, 5, 10, 20},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   3,					/* cost of moving SSE register to integer.  */
+  3,					/* cost of moving integer register to SSE.  */
   4, 4,					/* Gather load static, per_elt.  */
   4, 4,					/* Gather store static, per_elt.  */
   64,					/* size of l1 cache.  */
@@ -1281,7 +1423,20 @@ struct processor_costs amdfam10_cost = {
   COSTS_N_INSNS (20),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (19),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (27),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (8),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (16),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (14),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (8),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (7),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVT(T)PS2PI instruction.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  {1, 1, 1},				/* latency times throughput of
+					   FMA/DOT_PROD_EXPR/SAD_EXPR,
+					   it's used to determine unroll
+					   factor in the vectorizer.  */
+  1,					/* Limit how much the autovectorizer
+					   may unroll a loop.  */
   amdfam10_memcpy,
   amdfam10_memset,
   COSTS_N_INSNS (2),			/* cond_taken_branch_cost.  */
@@ -1374,6 +1529,7 @@ const struct processor_costs bdver_cost = {
   {10, 10, 10, 40, 60},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   16,					/* cost of moving SSE register to integer.  */
+  16,					/* cost of moving integer register to SSE.  */
   12, 12,				/* Gather load static, per_elt.  */
   10, 10,				/* Gather store static, per_elt.  */
   16,					/* size of l1 cache.  */
@@ -1405,7 +1561,20 @@ const struct processor_costs bdver_cost = {
   COSTS_N_INSNS (27),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (15),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (26),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (7),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (14),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (14),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (13),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVT(T)PS2PI instruction.  */
   1, 2, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  {1, 1, 1},				/* latency times throughput of
+					   FMA/DOT_PROD_EXPR/SAD_EXPR,
+					   it's used to determine unroll
+					   factor in the vectorizer.  */
+  1,					/* Limit how much the autovectorizer
+					   may unroll a loop.  */
   bdver_memcpy,
   bdver_memset,
   COSTS_N_INSNS (4),			/* cond_taken_branch_cost.  */
@@ -1518,6 +1687,7 @@ struct processor_costs znver1_cost = {
   {8, 8, 8, 16, 32},			/* cost of unaligned stores.  */
   2, 3, 6,				/* cost of moving XMM,YMM,ZMM register.  */
   6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
   /* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops,
      throughput 12.  Approx 9 uops do not depend on vector size and every load
      is 7 uops.  */
@@ -1553,6 +1723,14 @@ struct processor_costs znver1_cost = {
   COSTS_N_INSNS (13),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (10),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (15),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  /* Real latency is 4, but for split regs multiply cost of half op by 2.  */
+  COSTS_N_INSNS (6),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (12),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (8),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (7),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVT(T)PS2PI instruction.  */
   /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles
      and it can execute 2 integer additions and 2 multiplications thus
      reassociation may make sense up to with of 6.  SPEC2k6 bencharks suggests
@@ -1562,6 +1740,12 @@ struct processor_costs znver1_cost = {
      plus/minus operations per cycle but only one multiply.  This is adjusted
      in ix86_reassociation_width.  */
   4, 4, 3, 6,				/* reassoc int, fp, vec_int, vec_fp.  */
+  {5, 1, 3},				/* latency times throughput of
+					   FMA/DOT_PROD_EXPR/SAD_EXPR,
+					   it's used to determine unroll
+					   factor in the vectorizer.  */
+  4,					/* Limit how much the autovectorizer
+					   may unroll a loop.  */
   znver1_memcpy,
   znver1_memset,
   COSTS_N_INSNS (4),			/* cond_taken_branch_cost.  */
@@ -1677,6 +1861,7 @@ struct processor_costs znver2_cost = {
   2, 2, 3,				/* cost of moving XMM,YMM,ZMM
 					   register.  */
   6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
   /* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops,
      throughput 12.  Approx 9 uops do not depend on vector size and every load
      is 7 uops.  */
@@ -1712,6 +1897,13 @@ struct processor_costs znver2_cost = {
   COSTS_N_INSNS (13),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (10),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (15),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (5),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (10),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (7),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVT(T)PS2PI instruction.  */
   /* Zen can execute 4 integer operations per cycle.  FP operations
      take 3 cycles and it can execute 2 integer additions and 2
      multiplications thus reassociation may make sense up to with of 6.
@@ -1722,6 +1914,12 @@ struct processor_costs znver2_cost = {
      plus/minus operations per cycle but only one multiply.  This is adjusted
      in ix86_reassociation_width.  */
   4, 4, 3, 6,				/* reassoc int, fp, vec_int, vec_fp.  */
+  {10, 1, 3},				/* latency times throughput of
+					   FMA/DOT_PROD_EXPR/SAD_EXPR,
+					   it's used to determine unroll
+					   factor in the vectorizer.  */
+  4,					/* Limit how much the autovectorizer
+					   may unroll a loop.  */
   znver2_memcpy,
   znver2_memset,
   COSTS_N_INSNS (4),			/* cond_taken_branch_cost.  */
@@ -1812,6 +2010,7 @@ struct processor_costs znver3_cost = {
   2, 2, 3,				/* cost of moving XMM,YMM,ZMM
 					   register.  */
   6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
   /* VGATHERDPD is 15 uops and throughput is 4, VGATHERDPS is 23 uops,
      throughput 9.  Approx 7 uops do not depend on vector size and every load
      is 4 uops.  */
@@ -1847,6 +2046,13 @@ struct processor_costs znver3_cost = {
   COSTS_N_INSNS (13),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (10),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (15),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (5),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (10),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (6),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVT(T)PS2PI instruction.  */
   /* Zen can execute 4 integer operations per cycle.  FP operations
      take 3 cycles and it can execute 2 integer additions and 2
      multiplications thus reassociation may make sense up to with of 6.
@@ -1857,6 +2063,12 @@ struct processor_costs znver3_cost = {
      plus/minus operations per cycle but only one multiply.  This is adjusted
      in ix86_reassociation_width.  */
   4, 4, 3, 6,				/* reassoc int, fp, vec_int, vec_fp.  */
+  {8, 1, 6},				/* latency times throughput of
+					   FMA/DOT_PROD_EXPR/SAD_EXPR,
+					   it's used to determine unroll
+					   factor in the vectorizer.  */
+  4,					/* Limit how much the autovectorizer
+					   may unroll a loop.  */
   znver2_memcpy,
   znver2_memset,
   COSTS_N_INSNS (4),			/* cond_taken_branch_cost.  */
@@ -1949,6 +2161,7 @@ struct processor_costs znver4_cost = {
   2, 2, 2,				/* cost of moving XMM,YMM,ZMM
 					   register.  */
   6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
   /* VGATHERDPD is 17 uops and throughput is 4, VGATHERDPS is 24 uops,
      throughput 5.  Approx 7 uops do not depend on vector size and every load
      is 5 uops.  */
@@ -1984,6 +2197,14 @@ struct processor_costs znver4_cost = {
   COSTS_N_INSNS (13),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (15),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (21),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (5),			/* cost of 256bit VCVTPS2PD etc.  */
+  /* Real latency is 6, but for split regs multiply cost of half op by 2.  */
+  COSTS_N_INSNS (10),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (6),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVT(T)PS2PI instruction.  */
   /* Zen can execute 4 integer operations per cycle.  FP operations
      take 3 cycles and it can execute 2 integer additions and 2
      multiplications thus reassociation may make sense up to with of 6.
@@ -1994,6 +2215,12 @@ struct processor_costs znver4_cost = {
      plus/minus operations per cycle but only one multiply.  This is adjusted
      in ix86_reassociation_width.  */
   4, 4, 3, 6,				/* reassoc int, fp, vec_int, vec_fp.  */
+  {8, 8, 6},				/* latency times throughput of
+					   FMA/DOT_PROD_EXPR/SAD_EXPR,
+					   it's used to determine unroll
+					   factor in the vectorizer.  */
+  4,					/* Limit how much the autovectorizer
+					   may unroll a loop.  */
   znver2_memcpy,
   znver2_memset,
   COSTS_N_INSNS (4),			/* cond_taken_branch_cost.  */
@@ -2089,6 +2316,7 @@ struct processor_costs znver5_cost = {
   2, 2, 2,				/* cost of moving XMM,YMM,ZMM
 					   register.  */
   6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
 
   /* TODO: gather and scatter instructions are currently disabled in
      x86-tune.def.  In some cases they are however a win, see PR116582
@@ -2120,7 +2348,7 @@ struct processor_costs znver5_cost = {
   COSTS_N_INSNS (1),			/* cost of cheap SSE instruction.  */
   /* ADDSS has throughput 2 and latency 2
      (in some cases when source is another addition).  */
-  COSTS_N_INSNS (3),			/* cost of ADDSS/SD SUBSS/SD insns.  */
+  COSTS_N_INSNS (2),			/* cost of ADDSS/SD SUBSS/SD insns.  */
   /* MULSS has throughput 2 and latency 3.  */
   COSTS_N_INSNS (3),			/* cost of MULSS instruction.  */
   COSTS_N_INSNS (3),			/* cost of MULSD instruction.  */
@@ -2135,6 +2363,13 @@ struct processor_costs znver5_cost = {
   COSTS_N_INSNS (14),			/* cost of SQRTSS instruction.  */
   /* DIVSD has throughtput 0.13 and latency 20.  */
   COSTS_N_INSNS (20),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (5),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (5),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (6),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVT(T)PS2PI instruction.  */
   /* Zen5 can execute:
       - integer ops: 6 per cycle, at most 3 multiplications.
 	latency 1 for additions, 3 for multiplications (pipelined)
@@ -2148,6 +2383,12 @@ struct processor_costs znver5_cost = {
 	We increase width to 6 for multiplications
 	in ix86_reassociation_width.  */
   6, 6, 4, 6,				/* reassoc int, fp, vec_int, vec_fp.  */
+  {8, 8, 6},				/* latency times throughput of
+					   FMA/DOT_PROD_EXPR/SAD_EXPR,
+					   it's used to determine unroll
+					   factor in the vectorizer.  */
+  4,					/* Limit how much the autovectorizer
+					   may unroll a loop.  */
   znver2_memcpy,
   znver2_memset,
   COSTS_N_INSNS (4),			/* cond_taken_branch_cost.  */
@@ -2250,6 +2491,7 @@ struct processor_costs skylake_cost = {
   {8, 8, 8, 8, 16},			/* cost of unaligned stores.  */
   2, 2, 4,				/* cost of moving XMM,YMM,ZMM register */
   6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
   20, 8,				/* Gather load static, per_elt.  */
   22, 10,				/* Gather store static, per_elt.  */
   64,					/* size of l1 cache.  */
@@ -2274,7 +2516,20 @@ struct processor_costs skylake_cost = {
   COSTS_N_INSNS (14),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (12),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (18),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (2),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (2),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (4),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (6),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (7),			/* cost of CVT(T)PS2PI instruction.  */
   1, 4, 2, 2,				/* reassoc int, fp, vec_int, vec_fp.  */
+  {8, 1, 3},				/* latency times throughput of
+					   FMA/DOT_PROD_EXPR/SAD_EXPR,
+					   it's used to determine unroll
+					   factor in the vectorizer.  */
+  4,					/* Limit how much the autovectorizer
+					   may unroll a loop.  */
   skylake_memcpy,
   skylake_memset,
   COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
@@ -2379,6 +2634,7 @@ struct processor_costs icelake_cost = {
   {8, 8, 8, 8, 16},			/* cost of unaligned stores.  */
   2, 2, 4,				/* cost of moving XMM,YMM,ZMM register */
   6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
   20, 8,				/* Gather load static, per_elt.  */
   22, 10,				/* Gather store static, per_elt.  */
   64,					/* size of l1 cache.  */
@@ -2403,7 +2659,20 @@ struct processor_costs icelake_cost = {
   COSTS_N_INSNS (14),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (12),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (18),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (2),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (2),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (2),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (6),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (7),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVT(T)PS2PI instruction.  */
   1, 4, 2, 2,				/* reassoc int, fp, vec_int, vec_fp.  */
+  {8, 10, 3},				/* latency times throughput of
+					   FMA/DOT_PROD_EXPR/SAD_EXPR,
+					   it's used to determine unroll
+					   factor in the vectorizer.  */
+  4,					/* Limit how much the autovectorizer
+					   may unroll a loop.  */
   icelake_memcpy,
   icelake_memset,
   COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
@@ -2502,6 +2771,7 @@ struct processor_costs alderlake_cost = {
   {8, 8, 8, 10, 15},			/* cost of unaligned storess.  */
   2, 3, 4,				/* cost of moving XMM,YMM,ZMM register */
   6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
   18, 6,				/* Gather load static, per_elt.  */
   18, 6,				/* Gather store static, per_elt.  */
   32,					/* size of l1 cache.  */
@@ -2526,7 +2796,20 @@ struct processor_costs alderlake_cost = {
   COSTS_N_INSNS (17),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (14),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (18),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (2),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (2),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (2),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (6),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (7),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVT(T)PS2PI instruction.  */
   1, 4, 3, 3,				/* reassoc int, fp, vec_int, vec_fp.  */
+  {8, 8, 3},				/* latency times throughput of
+					   FMA/DOT_PROD_EXPR/SAD_EXPR,
+					   it's used to determine unroll
+					   factor in the vectorizer.  */
+  4,					/* Limit how much the autovectorizer
+					   may unroll a loop.  */
   alderlake_memcpy,
   alderlake_memset,
   COSTS_N_INSNS (4),			/* cond_taken_branch_cost.  */
@@ -2618,6 +2901,7 @@ const struct processor_costs btver1_cost = {
   {10, 10, 12, 48, 96},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   14,					/* cost of moving SSE register to integer.  */
+  14,					/* cost of moving integer register to SSE.  */
   10, 10,				/* Gather load static, per_elt.  */
   10, 10,				/* Gather store static, per_elt.  */
   32,					/* size of l1 cache.  */
@@ -2642,7 +2926,20 @@ const struct processor_costs btver1_cost = {
   COSTS_N_INSNS (17),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (14),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (48),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (7),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (14),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (14),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (13),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVT(T)PS2PI instruction.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  {1, 1, 1},				/* latency times throughput of
+					   FMA/DOT_PROD_EXPR/SAD_EXPR,
+					   it's used to determine unroll
+					   factor in the vectorizer.  */
+  1,					/* Limit how much the autovectorizer
+					   may unroll a loop.  */
   btver1_memcpy,
   btver1_memset,
   COSTS_N_INSNS (2),			/* cond_taken_branch_cost.  */
@@ -2731,6 +3028,7 @@ const struct processor_costs btver2_cost = {
   {10, 10, 12, 48, 96},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   14,					/* cost of moving SSE register to integer.  */
+  14,					/* cost of moving integer register to SSE.  */
   10, 10,				/* Gather load static, per_elt.  */
   10, 10,				/* Gather store static, per_elt.  */
   32,					/* size of l1 cache.  */
@@ -2755,7 +3053,20 @@ const struct processor_costs btver2_cost = {
   COSTS_N_INSNS (19),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (16),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (21),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (7),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (14),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (14),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (13),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVT(T)PS2PI instruction.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  {1, 1, 1},				/* latency times throughput of
+					   FMA/DOT_PROD_EXPR/SAD_EXPR,
+					   it's used to determine unroll
+					   factor in the vectorizer.  */
+  1,					/* Limit how much the autovectorizer
+					   may unroll a loop.  */
   btver2_memcpy,
   btver2_memset,
   COSTS_N_INSNS (2),			/* cond_taken_branch_cost.  */
@@ -2843,6 +3154,7 @@ struct processor_costs pentium4_cost = {
   {32, 32, 32, 64, 128},		/* cost of unaligned stores.  */
   12, 24, 48,				/* cost of moving XMM,YMM,ZMM register */
   20,					/* cost of moving SSE register to integer.  */
+  20,					/* cost of moving integer register to SSE.  */
   16, 16,				/* Gather load static, per_elt.  */
   16, 16,				/* Gather store static, per_elt.  */
   8,					/* size of l1 cache.  */
@@ -2867,7 +3179,20 @@ struct processor_costs pentium4_cost = {
   COSTS_N_INSNS (38),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (23),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (38),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (10),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (20),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (40),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (20),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (17),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (12),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (8),			/* cost of CVT(T)PS2PI instruction.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  {1, 1, 1},				/* latency times throughput of
+					   FMA/DOT_PROD_EXPR/SAD_EXPR,
+					   it's used to determine unroll
+					   factor in the vectorizer.  */
+  1,					/* Limit how much the autovectorizer
+					   may unroll a loop.  */
   pentium4_memcpy,
   pentium4_memset,
   COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
@@ -2958,6 +3283,7 @@ struct processor_costs nocona_cost = {
   {24, 24, 24, 48, 96},			/* cost of unaligned stores.  */
   6, 12, 24,				/* cost of moving XMM,YMM,ZMM register */
   20,					/* cost of moving SSE register to integer.  */
+  20,					/* cost of moving integer register to SSE.  */
   12, 12,				/* Gather load static, per_elt.  */
   12, 12,				/* Gather store static, per_elt.  */
   8,					/* size of l1 cache.  */
@@ -2982,7 +3308,20 @@ struct processor_costs nocona_cost = {
   COSTS_N_INSNS (40),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (32),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (41),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (10),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (20),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (40),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (20),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (17),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (12),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (8),			/* cost of CVT(T)PS2PI instruction.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  {1, 1, 1},				/* latency times throughput of
+					   FMA/DOT_PROD_EXPR/SAD_EXPR,
+					   it's used to determine unroll
+					   factor in the vectorizer.  */
+  1,					/* Limit how much the autovectorizer
+					   may unroll a loop.  */
   nocona_memcpy,
   nocona_memset,
   COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
@@ -3071,6 +3410,7 @@ struct processor_costs atom_cost = {
   {16, 16, 16, 32, 64},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   8,					/* cost of moving SSE register to integer.  */
+  8,					/* cost of moving integer register to SSE.  */
   8, 8,					/* Gather load static, per_elt.  */
   8, 8,					/* Gather store static, per_elt.  */
   32,					/* size of l1 cache.  */
@@ -3095,7 +3435,20 @@ struct processor_costs atom_cost = {
   COSTS_N_INSNS (60),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (31),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (63),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (12),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (24),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (7),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (10),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVT(T)PS2PI instruction.  */
   2, 2, 2, 2,				/* reassoc int, fp, vec_int, vec_fp.  */
+  {8, 8, 3},				/* latency times throughput of
+					   FMA/DOT_PROD_EXPR/SAD_EXPR,
+					   it's used to determine unroll
+					   factor in the vectorizer.  */
+  2,					/* Limit how much the autovectorizer
+					   may unroll a loop.  */
   atom_memcpy,
   atom_memset,
   COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
@@ -3184,6 +3537,7 @@ struct processor_costs slm_cost = {
   {16, 16, 16, 32, 64},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   8,					/* cost of moving SSE register to integer.  */
+  8,					/* cost of moving integer register to SSE.  */
   8, 8,					/* Gather load static, per_elt.  */
   8, 8,					/* Gather store static, per_elt.  */
   32,					/* size of l1 cache.  */
@@ -3208,7 +3562,20 @@ struct processor_costs slm_cost = {
   COSTS_N_INSNS (69),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (20),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (35),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (6),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (12),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (5),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (5),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVT(T)PS2PI instruction.  */
   1, 2, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  {8, 8, 3},				/* latency times throughput of
+					   FMA/DOT_PROD_EXPR/SAD_EXPR,
+					   it's used to determine unroll
+					   factor in the vectorizer.  */
+  1,					/* Limit how much the autovectorizer
+					   may unroll a loop.  */
   slm_memcpy,
   slm_memset,
   COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
@@ -3309,6 +3676,7 @@ struct processor_costs tremont_cost = {
   {6, 6, 6, 10, 15},			/* cost of unaligned storess.  */
   2, 3, 4,				/* cost of moving XMM,YMM,ZMM register */
   6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
   18, 6,				/* Gather load static, per_elt.  */
   18, 6,				/* Gather store static, per_elt.  */
   32,					/* size of l1 cache.  */
@@ -3335,7 +3703,20 @@ struct processor_costs tremont_cost = {
   COSTS_N_INSNS (17),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (14),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (18),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (6),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (12),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (6),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVT(T)PS2PI instruction.  */
   1, 4, 3, 3,				/* reassoc int, fp, vec_int, vec_fp.  */
+  {8, 1, 3},				/* latency times throughput of
+					   FMA/DOT_PROD_EXPR/SAD_EXPR,
+					   it's used to determine unroll
+					   factor in the vectorizer.  */
+  4,					/* Limit how much the autovectorizer
+					   may unroll a loop.  */
   tremont_memcpy,
   tremont_memset,
   COSTS_N_INSNS (4),			/* cond_taken_branch_cost.  */
@@ -3349,119 +3730,6 @@ struct processor_costs tremont_cost = {
   COSTS_N_INSNS (2),			/* Branch mispredict scale.  */
 };
 
-static stringop_algs intel_memcpy[2] = {
-  {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
-  {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
-             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
-static stringop_algs intel_memset[2] = {
-  {libcall, {{8, loop, false}, {15, unrolled_loop, false},
-             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-  {libcall, {{24, loop, false}, {32, unrolled_loop, false},
-             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
-static const
-struct processor_costs intel_cost = {
-  {
-  /* Start of register allocator costs.  integer->integer move cost is 2. */
-  6,				     /* cost for loading QImode using movzbl */
-  {4, 4, 4},				/* cost of loading integer registers
-					   in QImode, HImode and SImode.
-					   Relative to reg-reg move (2).  */
-  {6, 6, 6},				/* cost of storing integer registers */
-  2,					/* cost of reg,reg fld/fst */
-  {6, 6, 8},				/* cost of loading fp registers
-					   in SFmode, DFmode and XFmode */
-  {6, 6, 10},				/* cost of storing fp registers
-					   in SFmode, DFmode and XFmode */
-  2,					/* cost of moving MMX register */
-  {6, 6},				/* cost of loading MMX registers
-					   in SImode and DImode */
-  {6, 6},				/* cost of storing MMX registers
-					   in SImode and DImode */
-  2, 2, 2,				/* cost of moving XMM,YMM,ZMM register */
-  {6, 6, 6, 6, 6},			/* cost of loading SSE registers
-					   in 32,64,128,256 and 512-bit */
-  {6, 6, 6, 6, 6},			/* cost of storing SSE registers
-					   in 32,64,128,256 and 512-bit */
-  4, 4,				/* SSE->integer and integer->SSE moves */
-  4, 4,				/* mask->integer and integer->mask moves */
-  {4, 4, 4},				/* cost of loading mask register
-					   in QImode, HImode, SImode.  */
-  {6, 6, 6},				/* cost if storing mask register
-					   in QImode, HImode, SImode.  */
-  2,					/* cost of moving mask register.  */
-  /* End of register allocator costs.  */
-  },
-
-  COSTS_N_INSNS (1),			/* cost of an add instruction */
-  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
-  COSTS_N_INSNS (1),			/* variable shift costs */
-  COSTS_N_INSNS (1),			/* constant shift costs */
-  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
-   COSTS_N_INSNS (3),			/*				 HI */
-   COSTS_N_INSNS (3),			/*				 SI */
-   COSTS_N_INSNS (4),			/*				 DI */
-   COSTS_N_INSNS (2)},			/*			      other */
-  0,					/* cost of multiply per each bit set */
-  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
-   COSTS_N_INSNS (26),			/*			    HI */
-   COSTS_N_INSNS (42),			/*			    SI */
-   COSTS_N_INSNS (74),			/*			    DI */
-   COSTS_N_INSNS (74)},			/*			    other */
-  COSTS_N_INSNS (1),			/* cost of movsx */
-  COSTS_N_INSNS (1),			/* cost of movzx */
-  8,					/* "large" insn */
-  17,					/* MOVE_RATIO */
-  6,					/* CLEAR_RATIO */
-  {4, 4, 4},				/* cost of loading integer registers
-					   in QImode, HImode and SImode.
-					   Relative to reg-reg move (2).  */
-  {6, 6, 6},				/* cost of storing integer registers */
-  {6, 6, 6, 6, 6},			/* cost of loading SSE register
-					   in 32bit, 64bit, 128bit, 256bit and 512bit */
-  {6, 6, 6, 6, 6},			/* cost of storing SSE register
-					   in 32bit, 64bit, 128bit, 256bit and 512bit */
-  {10, 10, 10, 10, 10},			/* cost of unaligned loads.  */
-  {10, 10, 10, 10, 10},			/* cost of unaligned loads.  */
-  2, 2, 2,				/* cost of moving XMM,YMM,ZMM register */
-  4,					/* cost of moving SSE register to integer.  */
-  6, 6,					/* Gather load static, per_elt.  */
-  6, 6,					/* Gather store static, per_elt.  */
-  32,					/* size of l1 cache.  */
-  256,					/* size of l2 cache.  */
-  64,					/* size of prefetch block */
-  6,					/* number of parallel prefetches */
-  3,					/* Branch cost */
-  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
-  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
-  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
-  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
-  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
-  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
-
-  COSTS_N_INSNS (1),			/* cost of cheap SSE instruction.  */
-  COSTS_N_INSNS (8),			/* cost of ADDSS/SD SUBSS/SD insns.  */
-  COSTS_N_INSNS (8),			/* cost of MULSS instruction.  */
-  COSTS_N_INSNS (8),			/* cost of MULSD instruction.  */
-  COSTS_N_INSNS (6),			/* cost of FMA SS instruction.  */
-  COSTS_N_INSNS (6),			/* cost of FMA SD instruction.  */
-  COSTS_N_INSNS (20),			/* cost of DIVSS instruction.  */
-  COSTS_N_INSNS (20),			/* cost of DIVSD instruction.  */
-  COSTS_N_INSNS (40),			/* cost of SQRTSS instruction.  */
-  COSTS_N_INSNS (40),			/* cost of SQRTSD instruction.  */
-  1, 4, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
-  intel_memcpy,
-  intel_memset,
-  COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
-  COSTS_N_INSNS (1),			/* cond_not_taken_branch_cost.  */
-  "16",					/* Loop alignment.  */
-  "16:8:8",				/* Jump alignment.  */
-  "0:0:8",				/* Label alignment.  */
-  "16",					/* Func alignment.  */
-  4,					/* Small unroll limit.  */
-  2,					/* Small unroll factor.  */
-  COSTS_N_INSNS (2),			/* Branch mispredict scale.  */
-};
-
 /* lujiazui_cost should produce code tuned for ZHAOXIN lujiazui CPU.  */
 static stringop_algs lujiazui_memcpy[2] = {
   {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
@@ -3532,15 +3800,16 @@ struct processor_costs lujiazui_cost = {
   {6, 6, 6},				/* cost of loading integer registers
 					   in QImode, HImode and SImode.
 					   Relative to reg-reg move (2).  */
-  {6, 6, 6},			/* cost of storing integer registers.  */
+  {6, 6, 6},				/* cost of storing integer registers.  */
   {6, 6, 6, 10, 15},			/* cost of loading SSE register
-				in 32bit, 64bit, 128bit, 256bit and 512bit.  */
+					   in 32bit, 64bit, 128bit, 256bit and 512bit.  */
   {6, 6, 6, 10, 15},			/* cost of storing SSE register
-				in 32bit, 64bit, 128bit, 256bit and 512bit.  */
+					   in 32bit, 64bit, 128bit, 256bit and 512bit.  */
   {6, 6, 6, 10, 15},			/* cost of unaligned loads.  */
   {6, 6, 6, 10, 15},			/* cost of unaligned storess.  */
-  2, 3, 4,			/* cost of moving XMM,YMM,ZMM register.  */
-  6,				/* cost of moving SSE register to integer.  */
+  2, 3, 4,				/* cost of moving XMM,YMM,ZMM register.  */
+  6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
   18, 6,				/* Gather load static, per_elt.  */
   18, 6,				/* Gather store static, per_elt.  */
   32,				  	/* size of l1 cache.  */
@@ -3566,7 +3835,20 @@ struct processor_costs lujiazui_cost = {
   COSTS_N_INSNS (17),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (32),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (60),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (6),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (12),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVT(T)PS2PI instruction.  */
   1, 4, 3, 3,				/* reassoc int, fp, vec_int, vec_fp.  */
+  {8, 1, 3},				/* latency times throughput of
+					   FMA/DOT_PROD_EXPR/SAD_EXPR,
+					   it's used to determine unroll
+					   factor in the vectorizer.  */
+  4,					/* Limit how much the autovectorizer
+					   may unroll a loop.  */
   lujiazui_memcpy,
   lujiazui_memset,
   COSTS_N_INSNS (4),			/* cond_taken_branch_cost.  */
@@ -3658,6 +3940,7 @@ struct processor_costs yongfeng_cost = {
   {8, 8, 8, 12, 15},			/* cost of unaligned storess.  */
   2, 3, 4,			/* cost of moving XMM,YMM,ZMM register.  */
   8,				/* cost of moving SSE register to integer.  */
+  8,					/* cost of moving integer register to SSE.  */
   18, 6,				/* Gather load static, per_elt.  */
   18, 6,				/* Gather store static, per_elt.  */
   32,				  	/* size of l1 cache.  */
@@ -3682,7 +3965,20 @@ struct processor_costs yongfeng_cost = {
   COSTS_N_INSNS (14),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (20),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (35),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (6),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (12),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVT(T)PS2PI instruction.  */
   4, 4, 4, 4,				/* reassoc int, fp, vec_int, vec_fp.  */
+  {8, 1, 3},				/* latency times throughput of
+					   FMA/DOT_PROD_EXPR/SAD_EXPR,
+					   it's used to determine unroll
+					   factor in the vectorizer.  */
+  1,					/* Limit how much the autovectorizer
+					   may unroll a loop.  */
   yongfeng_memcpy,
   yongfeng_memset,
   COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
@@ -3774,6 +4070,7 @@ struct processor_costs shijidadao_cost = {
   {8, 8, 8, 12, 15},			/* cost of unaligned storess.  */
   2, 3, 4,			/* cost of moving XMM,YMM,ZMM register.  */
   8,				/* cost of moving SSE register to integer.  */
+  8,					/* cost of moving integer register to SSE.  */
   18, 6,				/* Gather load static, per_elt.  */
   18, 6,				/* Gather store static, per_elt.  */
   32,				  	/* size of l1 cache.  */
@@ -3798,7 +4095,20 @@ struct processor_costs shijidadao_cost = {
   COSTS_N_INSNS (14),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (11),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (18),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (6),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (12),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVT(T)PS2PI instruction.  */
   4, 4, 4, 4,				/* reassoc int, fp, vec_int, vec_fp.  */
+  {8, 1, 3},				/* latency times throughput of
+					   FMA/DOT_PROD_EXPR/SAD_EXPR,
+					   it's used to determine unroll
+					   factor in the vectorizer.  */
+  1,					/* Limit how much the autovectorizer
+					   may unroll a loop.  */
   shijidadao_memcpy,
   shijidadao_memset,
   COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
@@ -3814,19 +4124,36 @@ struct processor_costs shijidadao_cost = {
 
 
 
-/* Generic should produce code tuned for Core-i7 (and newer chips)
-   and btver1 (and newer chips).  */
+/* Generic should produce code tuned for Haswell (and newer chips)
+   and znver1 (and newer chips):
+   1. Don't align memory.
+   2. For known sizes, prefer vector loop, unroll loop with 4 moves or
+      stores per iteration without aligning the loop, up to 256 bytes.
+   3. For unknown sizes, use memcpy/memset.
+   4. Since each loop iteration has 4 stores and 8 stores for zeroing
+      with unroll loop may be needed, change CLEAR_RATIO to 10 so that
+      zeroing up to 72 bytes are fully unrolled with 9 stores without
+      SSE.
+ */
 
 static stringop_algs generic_memcpy[2] = {
-  {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
-             {-1, libcall, false}}},
-  {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
-             {-1, libcall, false}}}};
+  {libcall,
+   {{256, vector_loop, true},
+    {256, unrolled_loop, true},
+    {-1, libcall, true}}},
+  {libcall,
+   {{256, vector_loop, true},
+    {256, unrolled_loop, true},
+    {-1, libcall, true}}}};
 static stringop_algs generic_memset[2] = {
-  {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
-             {-1, libcall, false}}},
-  {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
-             {-1, libcall, false}}}};
+  {libcall,
+   {{256, vector_loop, true},
+    {256, unrolled_loop, true},
+    {-1, libcall, true}}},
+  {libcall,
+   {{256, vector_loop, true},
+    {256, unrolled_loop, true},
+    {-1, libcall, true}}}};
 static const
 struct processor_costs generic_cost = {
   {
@@ -3883,7 +4210,7 @@ struct processor_costs generic_cost = {
   COSTS_N_INSNS (1),			/* cost of movzx */
   8,					/* "large" insn */
   17,					/* MOVE_RATIO */
-  6,					/* CLEAR_RATIO */
+  10,					/* CLEAR_RATIO */
   {6, 6, 6},				/* cost of loading integer registers
 					   in QImode, HImode and SImode.
 					   Relative to reg-reg move (2).  */
@@ -3896,6 +4223,7 @@ struct processor_costs generic_cost = {
   {6, 6, 6, 10, 15},			/* cost of unaligned storess.  */
   2, 3, 4,				/* cost of moving XMM,YMM,ZMM register */
   6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
   18, 6,				/* Gather load static, per_elt.  */
   18, 6,				/* Gather store static, per_elt.  */
   32,					/* size of l1 cache.  */
@@ -3922,7 +4250,20 @@ struct processor_costs generic_cost = {
   COSTS_N_INSNS (17),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (14),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (18),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (4),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (5),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (6),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVT(T)PS2PI instruction.  */
   1, 4, 3, 3,				/* reassoc int, fp, vec_int, vec_fp.  */
+  {8, 8, 3},				/* latency times throughput of
+					   FMA/DOT_PROD_EXPR/SAD_EXPR,
+					   it's used to determine unroll
+					   factor in the vectorizer.  */
+  4,					/* Limit how much the autovectorizer
+					   may unroll a loop.  */
   generic_memcpy,
   generic_memset,
   COSTS_N_INSNS (4),			/* cond_taken_branch_cost.  */
@@ -4022,6 +4363,7 @@ struct processor_costs core_cost = {
   {6, 6, 6, 6, 12},			/* cost of unaligned stores.  */
   2, 2, 4,				/* cost of moving XMM,YMM,ZMM register */
   2,					/* cost of moving SSE register to integer.  */
+  2,					/* cost of moving integer register to SSE.  */
   /* VGATHERDPD is 7 uops, rec throughput 5, while VGATHERDPD is 9 uops,
      rec. throughput 6.
      So 5 uops statically and one uops per load.  */
@@ -4051,7 +4393,20 @@ struct processor_costs core_cost = {
   COSTS_N_INSNS (32),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (30),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (58),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (2),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (2),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (2),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (6),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (7),			/* cost of CVT(T)PS2PI instruction.  */
   1, 4, 2, 2,				/* reassoc int, fp, vec_int, vec_fp.  */
+  {8, 1, 3},				/* latency times throughput of
+					   FMA/DOT_PROD_EXPR/SAD_EXPR,
+					   it's used to determine unroll
+					   factor in the vectorizer.  */
+  1,					/* Limit how much the autovectorizer
+					   may unroll a loop.  */
   core_memcpy,
   core_memset,
   COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
diff --git a/gcc/config/i386/x86-tune-sched.cc b/gcc/config/i386/x86-tune-sched.cc
index 15d3d91..ff9c268 100644
--- a/gcc/config/i386/x86-tune-sched.cc
+++ b/gcc/config/i386/x86-tune-sched.cc
@@ -45,7 +45,6 @@ ix86_issue_rate (void)
     case PROCESSOR_LAKEMONT:
     case PROCESSOR_BONNELL:
     case PROCESSOR_SILVERMONT:
-    case PROCESSOR_INTEL:
     case PROCESSOR_K6:
     case PROCESSOR_BTVER2:
     case PROCESSOR_PENTIUM4:
@@ -80,6 +79,8 @@ ix86_issue_rate (void)
     case PROCESSOR_ALDERLAKE:
     case PROCESSOR_YONGFENG:
     case PROCESSOR_SHIJIDADAO:
+    case PROCESSOR_SIERRAFOREST:
+    case PROCESSOR_INTEL:
     case PROCESSOR_GENERIC:
     /* For znver5 decoder can handle 4 or 8 instructions per cycle,
        op cache 12 instruction/cycle, dispatch 8 instructions
@@ -99,6 +100,14 @@ ix86_issue_rate (void)
       return 5;
 
     case PROCESSOR_SAPPHIRERAPIDS:
+    case PROCESSOR_GRANITERAPIDS:
+    case PROCESSOR_GRANITERAPIDS_D:
+    case PROCESSOR_DIAMONDRAPIDS:
+    case PROCESSOR_GRANDRIDGE:
+    case PROCESSOR_CLEARWATERFOREST:
+    case PROCESSOR_ARROWLAKE:
+    case PROCESSOR_ARROWLAKE_S:
+    case PROCESSOR_PANTHERLAKE:
       return 6;
 
     default:
@@ -488,6 +497,7 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
     case PROCESSOR_HASWELL:
     case PROCESSOR_TREMONT:
     case PROCESSOR_ALDERLAKE:
+    case PROCESSOR_INTEL:
     case PROCESSOR_GENERIC:
       /* Stack engine allows to execute push&pop instructions in parall.  */
       if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
@@ -510,7 +520,6 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
       break;
 
     case PROCESSOR_SILVERMONT:
-    case PROCESSOR_INTEL:
       if (!reload_completed)
 	return cost;
 
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index c857e76..a86cbad 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -31,7 +31,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 	- Updating ix86_issue_rate and ix86_adjust_cost in i386.md
 	- possibly updating ia32_multipass_dfa_lookahead, ix86_sched_reorder
 	  and ix86_sched_init_global if those tricks are needed.
-    - Tunning the flags bellow. Those are split into sections and each
+    - tuning flags below; those are split into sections and each
       section is very roughly ordered by importance.  */
 
 /*****************************************************************************/
@@ -87,9 +87,7 @@ DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY,
    several insns to break false dependency on the dest register for GLC
    micro-architecture.  */
 DEF_TUNE (X86_TUNE_DEST_FALSE_DEP_FOR_GLC,
-	  "dest_false_dep_for_glc", m_SAPPHIRERAPIDS | m_GRANITERAPIDS
-	  | m_GRANITERAPIDS_D | m_DIAMONDRAPIDS | m_CORE_HYBRID
-	  | m_CORE_ATOM)
+	  "dest_false_dep_for_glc", m_SAPPHIRERAPIDS | m_ALDERLAKE)
 
 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
    are resolved on SSE register parts instead of whole registers, so we may
@@ -574,6 +572,11 @@ DEF_TUNE (X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD,
 DEF_TUNE (X86_TUNE_SSE_MOVCC_USE_BLENDV,
 	  "sse_movcc_use_blendv", ~m_CORE_ATOM)
 
+/* X86_TUNE_V4SI_REDUCTION_PREFER_SHUFD: Prefer pshuf to reduce V16QI,
+   V8HI, V8HI, V4SI, V4FI, V2DI modes when lshr are costlier. */
+DEF_TUNE (X86_TUNE_SSE_REDUCTION_PREFER_PSHUF,
+   "sse_reduction_prefer_pshuf", m_ZNVER4 | m_ZNVER5)
+
 /*****************************************************************************/
 /* AVX instruction selection tuning (some of SSE flags affects AVX, too)     */
 /*****************************************************************************/
@@ -636,6 +639,11 @@ DEF_TUNE (X86_TUNE_AVX512_STORE_BY_PIECES, "avx512_store_by_pieces",
 DEF_TUNE (X86_TUNE_AVX512_TWO_EPILOGUES, "avx512_two_epilogues",
 	  m_ZNVER4 | m_ZNVER5)
 
+/* X86_TUNE_AVX512_MAKED_EPILOGUES: Use two masked vector epilogues
+   when fit.  */
+DEF_TUNE (X86_TUNE_AVX512_MASKED_EPILOGUES, "avx512_masked_epilogues",
+	  m_ZNVER4 | m_ZNVER5)
+
 /*****************************************************************************/
 /*****************************************************************************/
 /* Historical relics: tuning flags that helps a specific old CPU designs     */
diff --git a/gcc/config/ia64/ia64.cc b/gcc/config/ia64/ia64.cc
index 91b7310..8dab927 100644
--- a/gcc/config/ia64/ia64.cc
+++ b/gcc/config/ia64/ia64.cc
@@ -1241,8 +1241,7 @@ ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
 					 LCT_CONST, Pmode,
 					 tga_op1, Pmode, tga_op2, Pmode);
 
-      insns = get_insns ();
-      end_sequence ();
+      insns = end_sequence ();
 
       if (GET_MODE (op0) != Pmode)
 	op0 = tga_ret;
@@ -1265,8 +1264,7 @@ ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
 					 LCT_CONST, Pmode,
 					 tga_op1, Pmode, tga_op2, Pmode);
 
-      insns = get_insns ();
-      end_sequence ();
+      insns = end_sequence ();
 
       tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
 				UNSPEC_LD_BASE);
@@ -1879,8 +1877,7 @@ ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
       emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (ncode, BImode,
 						   ret, const0_rtx)));
 
-      rtx_insn *insns = get_insns ();
-      end_sequence ();
+      rtx_insn *insns = end_sequence ();
 
       emit_libcall_block (insns, cmp, cmp,
 			  gen_rtx_fmt_ee (code, BImode, *op0, *op1));
@@ -3174,8 +3171,7 @@ spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
 				 spill_fill_data.init_reg[iter],
 				 disp_rtx));
 
-	  seq = get_insns ();
-	  end_sequence ();
+	  seq = end_sequence ();
 	}
 
       /* Careful for being the first insn in a sequence.  */
@@ -11711,8 +11707,7 @@ expand_vec_perm_interleave_2 (struct expand_vec_perm_d *d)
      this *will* succeed.  For V8QImode or V2SImode it may not.  */
   start_sequence ();
   ok = expand_vec_perm_1 (&dfinal);
-  seq = get_insns ();
-  end_sequence ();
+  seq = end_sequence ();
   if (!ok)
     return false;
   if (d->testing_p)
diff --git a/gcc/config/loongarch/genopts/isa-evolution.in b/gcc/config/loongarch/genopts/isa-evolution.in
index 50f72d5..836d93a 100644
--- a/gcc/config/loongarch/genopts/isa-evolution.in
+++ b/gcc/config/loongarch/genopts/isa-evolution.in
@@ -2,4 +2,5 @@
 2	26	div32		1.1		Support div.w[u] and mod.w[u] instructions with inputs not sign-extended.
 2	27	lam-bh		1.1		Support am{swap/add}[_db].{b/h} instructions.
 2	28	lamcas		1.1		Support amcas[_db].{b/h/w/d} instructions.
+2	30	scq		1.1		Support sc.q instruction.
 3	23	ld-seq-sa	1.1		Do not need load-load barriers (dbar 0x700).
diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
index 43e3ab0..3d71f30 100644
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -2060,9 +2060,9 @@
   [(set_attr "type" "simd_int_arith")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "lasx_xvshuf_<lasxfmt_f>"
+(define_insn "@lasx_xvshuf_<lasxfmt_f>"
   [(set (match_operand:LASX_DWH 0 "register_operand" "=f")
-	(unspec:LASX_DWH [(match_operand:LASX_DWH 1 "register_operand" "0")
+	(unspec:LASX_DWH [(match_operand:<VIMODE> 1 "register_operand" "0")
 			  (match_operand:LASX_DWH 2 "register_operand" "f")
 			  (match_operand:LASX_DWH 3 "register_operand" "f")]
 			UNSPEC_LASX_XVSHUF))]
diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc
index 04b277e..dcd8d90 100644
--- a/gcc/config/loongarch/loongarch-def.cc
+++ b/gcc/config/loongarch/loongarch-def.cc
@@ -72,7 +72,7 @@ array_arch<loongarch_isa> loongarch_cpu_default_isa =
 	    .simd_ (ISA_EXT_SIMD_LASX)
 	    .evolution_ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA
 			 | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS
-			 | OPTION_MASK_ISA_FRECIPE))
+			 | OPTION_MASK_ISA_FRECIPE | OPTION_MASK_ISA_SCQ))
     .set (ARCH_LA64V1_0,
 	  loongarch_isa ()
 	    .base_ (ISA_BASE_LA64)
@@ -86,7 +86,7 @@ array_arch<loongarch_isa> loongarch_cpu_default_isa =
 	    .simd_ (ISA_EXT_SIMD_LSX)
 	    .evolution_ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA
 			 | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS
-			 | OPTION_MASK_ISA_FRECIPE));
+			 | OPTION_MASK_ISA_FRECIPE | OPTION_MASK_ISA_SCQ));
 
 
 static inline loongarch_cache la464_cache ()
diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h
index 0bcd2a7..0a7d0c9 100644
--- a/gcc/config/loongarch/loongarch-def.h
+++ b/gcc/config/loongarch/loongarch-def.h
@@ -78,12 +78,10 @@ extern loongarch_def_array<const char *, N_ISA_EXT_TYPES>
 
 
 /* Base ABI */
-enum {
-  ABI_BASE_LP64D	= 0,
-  ABI_BASE_LP64F	= 1,
-  ABI_BASE_LP64S	= 2,
-  N_ABI_BASE_TYPES	= 3
-};
+#define ABI_BASE_LP64D	  0
+#define ABI_BASE_LP64F	  1
+#define ABI_BASE_LP64S	  2
+#define N_ABI_BASE_TYPES  3
 
 extern loongarch_def_array<const char *, N_ABI_BASE_TYPES>
   loongarch_abi_base_strings;
diff --git a/gcc/config/loongarch/loongarch-evolution.cc b/gcc/config/loongarch/loongarch-evolution.cc
index de68624..a92a645 100644
--- a/gcc/config/loongarch/loongarch-evolution.cc
+++ b/gcc/config/loongarch/loongarch-evolution.cc
@@ -32,6 +32,7 @@ int la_evo_feature_masks[] = {
   OPTION_MASK_ISA_DIV32,
   OPTION_MASK_ISA_LAM_BH,
   OPTION_MASK_ISA_LAMCAS,
+  OPTION_MASK_ISA_SCQ,
   OPTION_MASK_ISA_LD_SEQ_SA,
 };
 
@@ -40,6 +41,7 @@ const char* la_evo_macro_name[] = {
   "__loongarch_div32",
   "__loongarch_lam_bh",
   "__loongarch_lamcas",
+  "__loongarch_scq",
   "__loongarch_ld_seq_sa",
 };
 
@@ -48,6 +50,7 @@ int la_evo_version_major[] = {
   1,    /* DIV32 */
   1,    /* LAM_BH */
   1,    /* LAMCAS */
+  1,    /* SCQ */
   1,    /* LD_SEQ_SA */
 };
 
@@ -56,5 +59,6 @@ int la_evo_version_minor[] = {
   1,    /* DIV32 */
   1,    /* LAM_BH */
   1,    /* LAMCAS */
+  1,    /* SCQ */
   1,    /* LD_SEQ_SA */
 };
diff --git a/gcc/config/loongarch/loongarch-evolution.h b/gcc/config/loongarch/loongarch-evolution.h
index 5f90839..7fb7b0d 100644
--- a/gcc/config/loongarch/loongarch-evolution.h
+++ b/gcc/config/loongarch/loongarch-evolution.h
@@ -36,6 +36,7 @@ static constexpr struct {
   { 2, 1u << 26, OPTION_MASK_ISA_DIV32 },
   { 2, 1u << 27, OPTION_MASK_ISA_LAM_BH },
   { 2, 1u << 28, OPTION_MASK_ISA_LAMCAS },
+  { 2, 1u << 30, OPTION_MASK_ISA_SCQ },
   { 3, 1u << 23, OPTION_MASK_ISA_LD_SEQ_SA },
 };
 
@@ -58,8 +59,9 @@ enum {
   EVO_DIV32 = 1,
   EVO_LAM_BH = 2,
   EVO_LAMCAS = 3,
-  EVO_LD_SEQ_SA = 4,
-  N_EVO_FEATURES = 5
+  EVO_SCQ = 4,
+  EVO_LD_SEQ_SA = 5,
+  N_EVO_FEATURES = 6
 };
 
 /* Condition macros */
@@ -71,6 +73,8 @@ enum {
   (la_target.isa.evolution & OPTION_MASK_ISA_LAM_BH)
 #define ISA_HAS_LAMCAS \
   (la_target.isa.evolution & OPTION_MASK_ISA_LAMCAS)
+#define ISA_HAS_SCQ \
+  (la_target.isa.evolution & OPTION_MASK_ISA_SCQ)
 #define ISA_HAS_LD_SEQ_SA \
   (la_target.isa.evolution & OPTION_MASK_ISA_LD_SEQ_SA)
 
diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h
index 1546ea3..583cce8 100644
--- a/gcc/config/loongarch/loongarch-str.h
+++ b/gcc/config/loongarch/loongarch-str.h
@@ -70,6 +70,7 @@ along with GCC; see the file COPYING3.  If not see
 #define OPTSTR_DIV32	"div32"
 #define OPTSTR_LAM_BH	"lam-bh"
 #define OPTSTR_LAMCAS	"lamcas"
+#define OPTSTR_SCQ	"scq"
 #define OPTSTR_LD_SEQ_SA	"ld-seq-sa"
 
 #endif /* LOONGARCH_STR_H */
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index 7533e53..0935d7b 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -2948,9 +2948,7 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0)
 
   RTL_CONST_CALL_P (insn) = 1;
   use_reg (&CALL_INSN_FUNCTION_USAGE (insn), a0);
-  insn = get_insns ();
-
-  end_sequence ();
+  insn = end_sequence ();
 
   return insn;
 }
@@ -4390,6 +4388,7 @@ loongarch_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
 	    break;
 	  }
       else if (TARGET_RECIP_VEC_DIV
+	       && vectype
 	       && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
 	{
 	  machine_mode mode = TYPE_MODE (vectype);
@@ -6223,9 +6222,8 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part,
    'Q'  Print R_LARCH_RELAX for TLS IE.
    'r'  Print address 12-31bit relocation associated with OP.
    'R'  Print address 32-51bit relocation associated with OP.
-   'T'	Print 'f' for (eq:CC ...), 't' for (ne:CC ...),
-	      'z' for (eq:?I ...), 'n' for (ne:?I ...).
-   't'	Like 'T', but with the EQ/NE cases reversed
+   'T'	Print a comment marker if %G outputs nothing.
+   't'	Print the register containing the higher 64 bits of a TImode.
    'u'	Print a LASX register.
    'v'	Print the insn size suffix b, h, w or d for vector modes V16QI, V8HI,
 	  V4SI, V2SI, and w, d for vector modes V4SF, V2DF respectively.
@@ -6308,6 +6306,13 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
 	fputs ("dbar\t0x700", file);
       break;
 
+    case 'T':
+      if (!loongarch_cas_failure_memorder_needs_acquire (
+	    memmodel_from_int (INTVAL (op)))
+	  && ISA_HAS_LD_SEQ_SA)
+	fprintf (file, "%s", ASM_COMMENT_START);
+      break;
+
     case 'h':
       if (code == HIGH)
 	op = XEXP (op, 0);
@@ -6386,14 +6391,6 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
 				     false /* lo_reloc */);
       break;
 
-    case 't':
-    case 'T':
-      {
-	int truth = (code == NE) == (letter == 'T');
-	fputc ("zfnt"[truth * 2 + FCC_REG_P (REGNO (XEXP (op, 0)))], file);
-      }
-      break;
-
     case 'V':
       if (CONST_VECTOR_P (op))
 	{
@@ -6497,6 +6494,16 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
 	}
       break;
 
+    case 't':
+      if (GET_MODE (op) != TImode
+	  || (op != CONST0_RTX (TImode) && code != REG))
+	{
+	  output_operand_lossage ("invalid use of '%%%c'", letter);
+	  break;
+	}
+      op = loongarch_subword (op, 1);
+      letter = 'z';
+      /* fall through */
     default:
       switch (code)
 	{
@@ -8382,7 +8389,7 @@ static bool
 loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d)
 {
   int i;
-  rtx target, op0, op1, sel, tmp;
+  rtx target, op0, op1;
   rtx rperm[MAX_VECT_LEN];
 
   if (GET_MODE_SIZE (d->vmode) == 16)
@@ -8401,47 +8408,23 @@ loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d)
       for (i = 0; i < d->nelt; i += 1)
 	  rperm[i] = GEN_INT (d->perm[i]);
 
-      if (d->vmode == E_V2DFmode)
-	{
-	  sel = gen_rtx_CONST_VECTOR (E_V2DImode, gen_rtvec_v (d->nelt, rperm));
-	  tmp = simplify_gen_subreg (E_V2DImode, d->target, d->vmode, 0);
-	  emit_move_insn (tmp, sel);
-	}
-      else if (d->vmode == E_V4SFmode)
-	{
-	  sel = gen_rtx_CONST_VECTOR (E_V4SImode, gen_rtvec_v (d->nelt, rperm));
-	  tmp = simplify_gen_subreg (E_V4SImode, d->target, d->vmode, 0);
-	  emit_move_insn (tmp, sel);
-	}
+      machine_mode sel_mode = related_int_vector_mode (d->vmode)
+	.require ();
+      rtvec sel_v = gen_rtvec_v (d->nelt, rperm);
+
+      /* Despite vshuf.* (except vshuf.b) needs sel == target, we cannot
+	 load sel into target right now: here we are dealing with
+	 pseudo regs, and target may be the same pseudo as one of op0
+	 or op1.  Then we'd clobber the input.  Instead, we use a new
+	 pseudo reg here.  The reload pass will look at the constraint
+	 of vshuf.* and move sel into target first if needed.  */
+      rtx sel = force_reg (sel_mode,
+			   gen_rtx_CONST_VECTOR (sel_mode, sel_v));
+
+      if (d->vmode == E_V16QImode)
+	emit_insn (gen_lsx_vshuf_b (target, op1, op0, sel));
       else
-	{
-	  sel = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (d->nelt, rperm));
-	  emit_move_insn (d->target, sel);
-	}
-
-      switch (d->vmode)
-	{
-	case E_V2DFmode:
-	  emit_insn (gen_lsx_vshuf_d_f (target, target, op1, op0));
-	  break;
-	case E_V2DImode:
-	  emit_insn (gen_lsx_vshuf_d (target, target, op1, op0));
-	  break;
-	case E_V4SFmode:
-	  emit_insn (gen_lsx_vshuf_w_f (target, target, op1, op0));
-	  break;
-	case E_V4SImode:
-	  emit_insn (gen_lsx_vshuf_w (target, target, op1, op0));
-	  break;
-	case E_V8HImode:
-	  emit_insn (gen_lsx_vshuf_h (target, target, op1, op0));
-	  break;
-	case E_V16QImode:
-	  emit_insn (gen_lsx_vshuf_b (target, op1, op0, target));
-	  break;
-	default:
-	  break;
-	}
+	emit_insn (gen_lsx_vshuf (d->vmode, target, sel, op1, op0));
 
       return true;
     }
@@ -9437,7 +9420,7 @@ loongarch_expand_vec_perm_const (struct expand_vec_perm_d *d)
   bool flag = false;
   unsigned int i;
   unsigned char idx;
-  rtx target, op0, op1, sel, tmp;
+  rtx target, op0, op1;
   rtx rperm[MAX_VECT_LEN];
   unsigned int remapped[MAX_VECT_LEN];
   unsigned char perm2[MAX_VECT_LEN];
@@ -9617,63 +9600,23 @@ loongarch_expand_vec_perm_const (struct expand_vec_perm_d *d)
 expand_perm_const_end:
       if (flag)
 	{
-	  /* Copy selector vector from memory to vector register for later insn
-	     gen function.
-	     If vector's element in floating point value, we cannot fit
-	     selector argument into insn gen function directly, because of the
-	     insn template definition.  As a solution, generate a integral mode
-	     subreg of target, then copy selector vector (that is in integral
-	     mode) to this subreg.  */
-	  switch (d->vmode)
-	    {
-	    case E_V4DFmode:
-	      sel = gen_rtx_CONST_VECTOR (E_V4DImode, gen_rtvec_v (d->nelt,
-								   rperm));
-	      tmp = simplify_gen_subreg (E_V4DImode, d->target, d->vmode, 0);
-	      emit_move_insn (tmp, sel);
-	      break;
-	    case E_V8SFmode:
-	      sel = gen_rtx_CONST_VECTOR (E_V8SImode, gen_rtvec_v (d->nelt,
-								   rperm));
-	      tmp = simplify_gen_subreg (E_V8SImode, d->target, d->vmode, 0);
-	      emit_move_insn (tmp, sel);
-	      break;
-	    default:
-	      sel = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (d->nelt,
-								 rperm));
-	      emit_move_insn (d->target, sel);
-	      break;
-	    }
-
 	  target = d->target;
 	  op0 = d->op0;
 	  op1 = d->one_vector_p ? d->op0 : d->op1;
 
-	  /* We FINALLY can generate xvshuf.* insn.  */
-	  switch (d->vmode)
-	    {
-	    case E_V4DFmode:
-	      emit_insn (gen_lasx_xvshuf_d_f (target, target, op1, op0));
-	      break;
-	    case E_V4DImode:
-	      emit_insn (gen_lasx_xvshuf_d (target, target, op1, op0));
-	      break;
-	    case E_V8SFmode:
-	      emit_insn (gen_lasx_xvshuf_w_f (target, target, op1, op0));
-	      break;
-	    case E_V8SImode:
-	      emit_insn (gen_lasx_xvshuf_w (target, target, op1, op0));
-	      break;
-	    case E_V16HImode:
-	      emit_insn (gen_lasx_xvshuf_h (target, target, op1, op0));
-	      break;
-	    case E_V32QImode:
-	      emit_insn (gen_lasx_xvshuf_b (target, op1, op0, target));
-	      break;
-	    default:
-	      gcc_unreachable ();
-	      break;
-	    }
+	  machine_mode sel_mode = related_int_vector_mode (d->vmode)
+	    .require ();
+	  rtvec sel_v = gen_rtvec_v (d->nelt, rperm);
+
+	  /* See the comment in loongarch_expand_lsx_shuffle for why
+	     we don't simply use a SUBREG to pun target.  */
+	  rtx sel = force_reg (sel_mode,
+			       gen_rtx_CONST_VECTOR (sel_mode, sel_v));
+
+	  if (d->vmode == E_V32QImode)
+	    emit_insn (gen_lasx_xvshuf_b (target, op1, op0, sel));
+	  else
+	    emit_insn (gen_lasx_xvshuf (d->vmode, target, sel, op1, op0));
 
 	  return true;
 	}
@@ -10852,9 +10795,9 @@ loongarch_expand_vec_cmp (rtx operands[])
    to a fixed type.  */
 
 static machine_mode
-loongarch_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
+loongarch_promote_function_mode (const_tree type,
 				 machine_mode mode,
-				 int *punsignedp ATTRIBUTE_UNUSED,
+				 int *punsignedp,
 				 const_tree fntype ATTRIBUTE_UNUSED,
 				 int for_return ATTRIBUTE_UNUSED)
 {
@@ -11118,17 +11061,21 @@ static bool
 loongarch_builtin_support_vector_misalignment (machine_mode mode,
 					       const_tree type,
 					       int misalignment,
-					       bool is_packed)
+					       bool is_packed,
+					       bool is_gather_scatter)
 {
   if ((ISA_HAS_LSX || ISA_HAS_LASX) && STRICT_ALIGNMENT)
     {
+      if (is_gather_scatter)
+	return true;
       if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
 	return false;
       if (misalignment == -1)
 	return false;
     }
   return default_builtin_support_vector_misalignment (mode, type, misalignment,
-						      is_packed);
+						      is_packed,
+						      is_gather_scatter);
 }
 
 /* Return a PARALLEL containing NELTS elements, with element I equal
@@ -11216,6 +11163,46 @@ loongarch_c_mode_for_suffix (char suffix)
   return VOIDmode;
 }
 
+/* Implement TARGET_C_BITINT_TYPE_INFO.
+   Return true if _BitInt(N) is supported and fill its details into *INFO.  */
+bool
+loongarch_bitint_type_info (int n, struct bitint_info *info)
+{
+  if (n <= 8)
+    info->limb_mode = QImode;
+  else if (n <= 16)
+    info->limb_mode = HImode;
+  else if (n <= 32)
+    info->limb_mode = SImode;
+  else if (n <= 64)
+    info->limb_mode = DImode;
+  else if (n <= 128)
+    info->limb_mode = TImode;
+  else
+    info->limb_mode = DImode;
+
+  info->abi_limb_mode = info->limb_mode;
+
+  if (n > 64)
+    info->abi_limb_mode = TImode;
+
+  info->big_endian = false;
+  info->extended = true;
+  return true;
+}
+
+/* Implement TARGET_COMPUTE_PRESSURE_CLASSES.  */
+
+static int
+loongarch_compute_pressure_classes (reg_class *classes)
+{
+  int i = 0;
+  classes[i++] = GENERAL_REGS;
+  classes[i++] = FP_REGS;
+  classes[i++] = FCC_REGS;
+  return i;
+}
+
 /* Initialize the GCC target structure.  */
 #undef TARGET_ASM_ALIGNED_HI_OP
 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
@@ -11490,6 +11477,12 @@ loongarch_c_mode_for_suffix (char suffix)
 #undef TARGET_C_MODE_FOR_SUFFIX
 #define TARGET_C_MODE_FOR_SUFFIX loongarch_c_mode_for_suffix
 
+#undef TARGET_C_BITINT_TYPE_INFO
+#define TARGET_C_BITINT_TYPE_INFO loongarch_bitint_type_info
+
+#undef TARGET_COMPUTE_PRESSURE_CLASSES
+#define TARGET_COMPUTE_PRESSURE_CLASSES loongarch_compute_pressure_classes
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-loongarch.h"
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
index d897763..e8819bf 100644
--- a/gcc/config/loongarch/loongarch.h
+++ b/gcc/config/loongarch/loongarch.h
@@ -270,7 +270,9 @@ along with GCC; see the file COPYING3.  If not see
   if (GET_MODE_CLASS (MODE) == MODE_INT \
       && GET_MODE_SIZE (MODE) < UNITS_PER_WORD) \
     { \
-      if ((MODE) == SImode) \
+      if ((MODE) == SImode \
+	  && !(TYPE && TREE_CODE (TYPE) == BITINT_TYPE \
+	       && TYPE_PRECISION (TYPE) < 32)) \
 	(UNSIGNEDP) = 0; \
       (MODE) = Pmode; \
     }
@@ -823,8 +825,6 @@ typedef struct {
 
 #define CASE_VECTOR_MODE Pmode
 
-#define CASE_VECTOR_SHORTEN_MODE(MIN, MAX, BODY) Pmode
-
 /* Define this as 1 if `char' should by default be signed; else as 0.  */
 #ifndef DEFAULT_SIGNED_CHAR
 #define DEFAULT_SIGNED_CHAR 1
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index f7005de..32ef980 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -4573,9 +4573,10 @@
   "&& true"
   [(set (match_dup 3) (match_dup 2))
    (set (match_dup 0)
-	(unspec:SI [(match_dup 3) (subreg:SI (match_dup 1) 0)] CRC))]
+	(unspec:SI [(match_dup 3) (match_dup 1)] CRC))]
   {
     operands[3] = gen_reg_rtx (<MODE>mode);
+    operands[1] = lowpart_subreg (SImode, operands[1], DImode);
   })
 
 ;; With normal or medium code models, if the only use of a pc-relative
diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt
index 4d85cf5..fbe61c0 100644
--- a/gcc/config/loongarch/loongarch.opt
+++ b/gcc/config/loongarch/loongarch.opt
@@ -334,6 +334,10 @@ mlamcas
 Target Mask(ISA_LAMCAS) Var(la_isa_evolution)
 Support amcas[_db].{b/h/w/d} instructions.
 
+mscq
+Target Mask(ISA_SCQ) Var(la_isa_evolution)
+Support sc.q instruction.
+
 mld-seq-sa
 Target Mask(ISA_LD_SEQ_SA) Var(la_isa_evolution)
 Do not need load-load barriers (dbar 0x700).
diff --git a/gcc/config/loongarch/loongarch.opt.urls b/gcc/config/loongarch/loongarch.opt.urls
index 5f644f6..606a211 100644
--- a/gcc/config/loongarch/loongarch.opt.urls
+++ b/gcc/config/loongarch/loongarch.opt.urls
@@ -90,6 +90,9 @@ UrlSuffix(gcc/LoongArch-Options.html#index-mlam-bh)
 mlamcas
 UrlSuffix(gcc/LoongArch-Options.html#index-mlamcas)
 
+mscq
+UrlSuffix(gcc/LoongArch-Options.html#index-mscq)
+
 mld-seq-sa
 UrlSuffix(gcc/LoongArch-Options.html#index-mld-seq-sa)
 
diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
index 407c868..fb0236b 100644
--- a/gcc/config/loongarch/lsx.md
+++ b/gcc/config/loongarch/lsx.md
@@ -535,9 +535,9 @@
   DONE;
 })
 
-(define_insn "lsx_vshuf_<lsxfmt_f>"
+(define_insn "@lsx_vshuf_<lsxfmt_f>"
   [(set (match_operand:LSX_DWH 0 "register_operand" "=f")
-	(unspec:LSX_DWH [(match_operand:LSX_DWH 1 "register_operand" "0")
+	(unspec:LSX_DWH [(match_operand:<VIMODE> 1 "register_operand" "0")
 			 (match_operand:LSX_DWH 2 "register_operand" "f")
 			 (match_operand:LSX_DWH 3 "register_operand" "f")]
 			UNSPEC_LSX_VSHUF))]
diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
index dd17cd1..4156b26 100644
--- a/gcc/config/loongarch/simd.md
+++ b/gcc/config/loongarch/simd.md
@@ -773,7 +773,7 @@
 	      (vec_select:<VEC_HALF>
 		(match_operand:IVEC 2 "register_operand" "f")
 		(match_operand:IVEC 4 "vect_par_cnst_even_or_odd_half")))
-	    (any_extend:<WVEC>
+	    (any_extend:<WVEC_HALF>
 	      (vec_select:<VEC_HALF>
 		(match_operand:IVEC 3 "register_operand" "f")
 		(match_dup 4))))
diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
index fd8d732..2ee400e 100644
--- a/gcc/config/loongarch/sync.md
+++ b/gcc/config/loongarch/sync.md
@@ -21,25 +21,25 @@
 
 (define_c_enum "unspec" [
   UNSPEC_COMPARE_AND_SWAP
+  UNSPEC_COMPARE_AND_SWAP_AMCAS
   UNSPEC_COMPARE_AND_SWAP_ADD
   UNSPEC_COMPARE_AND_SWAP_SUB
-  UNSPEC_COMPARE_AND_SWAP_AND
-  UNSPEC_COMPARE_AND_SWAP_XOR
-  UNSPEC_COMPARE_AND_SWAP_OR
   UNSPEC_COMPARE_AND_SWAP_NAND
   UNSPEC_SYNC_OLD_OP
   UNSPEC_SYNC_EXCHANGE
   UNSPEC_ATOMIC_STORE
   UNSPEC_ATOMIC_LOAD
   UNSPEC_MEMORY_BARRIER
+
+  UNSPEC_TI_FETCH_ADD
+  UNSPEC_TI_FETCH_SUB
+  UNSPEC_TI_FETCH_AND
+  UNSPEC_TI_FETCH_XOR
+  UNSPEC_TI_FETCH_OR
+  UNSPEC_TI_FETCH_NAND_MASK_INVERTED
 ])
 
 (define_code_iterator any_atomic [plus ior xor and])
-(define_code_attr atomic_optab
-  [(plus "add") (ior "or") (xor "xor") (and "and")])
-
-;; This attribute gives the format suffix for atomic memory operations.
-(define_mode_attr amo [(QI "b") (HI "h") (SI "w") (DI "d")])
 
 ;; <amop> expands to the name of the atomic operand that implements a
 ;; particular code.
@@ -107,7 +107,7 @@
 (define_insn "atomic_load<mode>"
   [(set (match_operand:QHWD 0 "register_operand" "=r")
     (unspec_volatile:QHWD
-      [(match_operand:QHWD 1 "memory_operand" "+m")
+      [(match_operand:QHWD 1 "memory_operand" "m")
        (match_operand:SI 2 "const_int_operand")]                        ;; model
       UNSPEC_ATOMIC_LOAD))]
   ""
@@ -142,9 +142,50 @@
 }
   [(set (attr "length") (const_int 12))])
 
+(define_insn "atomic_loadti_lsx"
+  [(set (match_operand:V2DI 0 "register_operand" "=f")
+	(unspec_volatile:V2DI
+          [(match_operand:TI 1 "memory_operand" "m")
+	   (match_operand:SI 2 "const_int_operand")] ;; model
+	  UNSPEC_ATOMIC_LOAD))]
+  "ISA_HAS_LSX && TARGET_64BIT"
+{
+  enum memmodel model = memmodel_base (INTVAL (operands[2]));
+
+  switch (model)
+    {
+    case MEMMODEL_SEQ_CST:
+      output_asm_insn ("dbar\t0x11", operands);
+      /* fall through */
+    case MEMMODEL_ACQUIRE:
+    case MEMMODEL_RELAXED:
+      return "vld\t%w0,%1\\n\\t%G2";
+
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set (attr "length") (const_int 12))])
+
+(define_expand "atomic_loadti"
+  [(match_operand:TI 0 "register_operand" "=r")
+   (match_operand:TI 1 "memory_operand"   "m")
+   (match_operand:SI 2 "const_int_operand")]
+  "ISA_HAS_LSX && TARGET_64BIT"
+{
+  rtx vr = gen_reg_rtx (V2DImode);
+
+  emit_insn (gen_atomic_loadti_lsx (vr, operands[1], operands[2]));
+  for (int i = 0; i < 2; i++)
+    emit_insn (
+      gen_lsx_vpickve2gr_d (loongarch_subword (operands[0], i), vr,
+			    GEN_INT (i)));
+  DONE;
+})
+
 ;; Implement atomic stores with amoswap.  Fall back to fences for atomic loads.
 (define_insn "atomic_store<mode>"
-  [(set (match_operand:QHWD 0 "memory_operand" "+m")
+  [(set (match_operand:QHWD 0 "memory_operand" "=m")
     (unspec_volatile:QHWD
       [(match_operand:QHWD 1 "reg_or_0_operand" "rJ")
        (match_operand:SI 2 "const_int_operand")]      ;; model
@@ -175,7 +216,67 @@
 }
   [(set (attr "length") (const_int 12))])
 
-(define_insn "atomic_<atomic_optab><mode>"
+(define_insn "atomic_storeti_lsx"
+  [(set (match_operand:TI 0 "memory_operand" "=m")
+	(unspec_volatile:TI
+	  [(match_operand:V2DI 1 "register_operand" "f")
+	   (match_operand:SI   2 "const_int_operand")] ;; model
+	UNSPEC_ATOMIC_STORE))]
+  "ISA_HAS_LSX && TARGET_64BIT"
+{
+  enum memmodel model = memmodel_base (INTVAL (operands[2]));
+
+  switch (model)
+    {
+    case MEMMODEL_SEQ_CST:
+      return "dbar\t0x12\\n\\t"
+	     "vst\t%w1,%0\\n\\t"
+	     "dbar\t0x18";
+    case MEMMODEL_RELEASE:
+      return "dbar\t0x12\\n\\t"
+	     "vst\t%w1,%0";
+    case MEMMODEL_RELAXED:
+      return "vst\t%w1,%0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set (attr "length") (const_int 12))])
+
+(define_insn "atomic_storeti_scq"
+  [(set (match_operand:TI 0 "memory_operand" "=m")
+	(unspec_volatile:TI
+	  [(match_operand:TI 1 "register_operand" "r")]
+	  UNSPEC_ATOMIC_STORE))
+   (clobber (match_scratch:DI 2 "=&r"))]
+  "TARGET_64BIT && ISA_HAS_SCQ"
+  "1:\\n\\tll.d\t$r0,%0\n\tmove\t%2,%1\n\tsc.q\t%2,%t1,%0\n\tbeqz\t%2,1b"
+  [(set (attr "length") (const_int 16))])
+
+(define_expand "atomic_storeti"
+  [(match_operand:TI 0 "memory_operand"   "=m")
+   (match_operand:TI 1 "reg_or_0_operand" "rJ")
+   (match_operand:SI 2 "const_int_operand")]
+  "TARGET_64BIT && (ISA_HAS_LSX || ISA_HAS_SCQ)"
+{
+  if (!ISA_HAS_LSX)
+    {
+      emit_insn (gen_atomic_storeti_scq (operands[0], operands[1]));
+      DONE;
+    }
+
+  rtx vr = gen_reg_rtx (V2DImode), op1 = operands[1];
+  rtvec v = rtvec_alloc (2);
+
+  for (int i = 0; i < 2; i++)
+    RTVEC_ELT (v, i) = loongarch_subword (op1, i);
+
+  emit_insn (gen_vec_initv2didi (vr, gen_rtx_PARALLEL (V2DImode, v)));
+  emit_insn (gen_atomic_storeti_lsx (operands[0], vr, operands[2]));
+  DONE;
+})
+
+(define_insn "atomic_<amop><mode>"
   [(set (match_operand:GPR 0 "memory_operand" "+ZB")
 	(unspec_volatile:GPR
 	  [(any_atomic:GPR (match_dup 0)
@@ -183,7 +284,7 @@
 	   (match_operand:SI 2 "const_int_operand")] ;; model
 	 UNSPEC_SYNC_OLD_OP))]
   ""
-  "am<amop>%A2.<amo>\t$zero,%z1,%0"
+  "am<amop>%A2.<size>\t$zero,%z1,%0"
   [(set (attr "length") (const_int 4))])
 
 (define_insn "atomic_add<mode>"
@@ -194,10 +295,10 @@
 	   (match_operand:SI 2 "const_int_operand")] ;; model
 	 UNSPEC_SYNC_OLD_OP))]
   "ISA_HAS_LAM_BH"
-  "amadd%A2.<amo>\t$zero,%z1,%0"
+  "amadd%A2.<size>\t$zero,%z1,%0"
   [(set (attr "length") (const_int 4))])
 
-(define_insn "atomic_fetch_<atomic_optab><mode>"
+(define_insn "atomic_fetch_<amop><mode>"
   [(set (match_operand:GPR 0 "register_operand" "=&r")
 	(match_operand:GPR 1 "memory_operand" "+ZB"))
    (set (match_dup 1)
@@ -207,9 +308,52 @@
 	   (match_operand:SI 3 "const_int_operand")] ;; model
 	 UNSPEC_SYNC_OLD_OP))]
   ""
-  "am<amop>%A3.<amo>\t%0,%z2,%1"
+  "am<amop>%A3.<size>\t%0,%z2,%1"
   [(set (attr "length") (const_int 4))])
 
+(define_insn "atomic_fetch_nand_mask_inverted<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=&r")
+	(match_operand:GPR 1 "memory_operand" "+ZC"))
+   (set (match_dup 1)
+	(unspec_volatile:GPR
+	  [(ior:GPR (not (match_dup 1))
+		    (match_operand:GPR 2 "register_operand" "r"))]
+	  UNSPEC_SYNC_OLD_OP))
+   (clobber (match_scratch:GPR 3 "=&r"))]
+  ""
+  {
+    return "1:\\n\\t"
+	   "ll.<d>\\t%0,%1\\n\\t"
+	   "orn\\t%3,%2,%0\\n\\t"
+	   "sc.<d>\\t%3,%1\\n\\t"
+	   "beqz\\t%3,1b";
+  }
+  [(set (attr "length") (const_int 16))])
+
+(define_mode_iterator ALL_SC [GPR (TI "TARGET_64BIT && ISA_HAS_SCQ")])
+(define_mode_attr _scq [(SI "") (DI "") (TI "_scq")])
+(define_expand "atomic_fetch_nand<mode>"
+  [(match_operand:ALL_SC 0 "register_operand")
+   (match_operand:ALL_SC 1 "memory_operand")
+   (match_operand:ALL_SC 2 "reg_or_0_operand")
+   (match_operand:SI     3 "const_int_operand")]
+  ""
+  {
+    /* ~(atom & mask) = (~mask) | (~atom), so we can hoist
+       (~mask) out of the ll/sc loop and use the orn instruction in the
+       ll/sc loop.  */
+    rtx inverted_mask = gen_reg_rtx (<MODE>mode);
+    emit_move_insn (inverted_mask,
+		    expand_simple_unop (<MODE>mode, NOT, operands[2],
+					NULL_RTX, false));
+
+    emit_insn (
+      gen_atomic_fetch_nand_mask_inverted<mode><_scq> (operands[0],
+						       operands[1],
+						       inverted_mask));
+    DONE;
+  })
+
 (define_insn "atomic_exchange<mode>"
   [(set (match_operand:GPR 0 "register_operand" "=&r")
 	(unspec_volatile:GPR
@@ -219,9 +363,44 @@
    (set (match_dup 1)
 	(match_operand:GPR 2 "register_operand" "r"))]
   ""
-  "amswap%A3.<amo>\t%0,%z2,%1"
+  "amswap%A3.<size>\t%0,%z2,%1"
   [(set (attr "length") (const_int 4))])
 
+(define_insn "atomic_exchangeti_scq"
+  [(set (match_operand:TI 0 "register_operand" "=&r")
+	(unspec_volatile:TI
+	  [(match_operand:TI 1 "memory_operand" "+ZB")]
+	  UNSPEC_SYNC_EXCHANGE))
+   (set (match_dup 1)
+	(match_operand:TI 2 "register_operand" "rJ"))
+   (clobber (match_scratch:DI 3 "=&r"))]
+  "TARGET_64BIT && ISA_HAS_SCQ"
+{
+  output_asm_insn ("1:", operands);
+  output_asm_insn ("ll.d\t%0,%1", operands);
+  if (!ISA_HAS_LD_SEQ_SA)
+    output_asm_insn ("dbar\t0x700", operands);
+  output_asm_insn ("ld.d\t%t0,%b1,8", operands);
+  output_asm_insn ("move\t%3,%z2", operands);
+  output_asm_insn ("sc.q\t%3,%t2,%1", operands);
+  output_asm_insn ("beqz\t%3,1b", operands);
+
+  return "";
+}
+  [(set (attr "length") (const_int 24))])
+
+(define_expand "atomic_exchangeti"
+  [(match_operand:TI 0 "register_operand" "=&r")
+   (match_operand:TI 1 "memory_operand"   "+ZB")
+   (match_operand:TI 2 "register_operand" "rJ")
+   (match_operand:SI 3 "const_int_operand")] ;; model
+  "TARGET_64BIT && ISA_HAS_SCQ"
+{
+  emit_insn (gen_atomic_exchangeti_scq (operands[0], operands[1],
+					operands[2]));
+  DONE;
+})
+
 (define_insn "atomic_exchange<mode>_short"
   [(set (match_operand:SHORT 0 "register_operand" "=&r")
 	(unspec_volatile:SHORT
@@ -231,7 +410,7 @@
    (set (match_dup 1)
 	(match_operand:SHORT 2 "register_operand" "r"))]
   "ISA_HAS_LAM_BH"
-  "amswap%A3.<amo>\t%0,%z2,%1"
+  "amswap%A3.<size>\t%0,%z2,%1"
   [(set (attr "length") (const_int 4))])
 
 (define_insn "atomic_cas_value_strong<mode>"
@@ -240,13 +419,13 @@
    (set (match_dup 1)
 	(unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ")
 			      (match_operand:GPR 3 "reg_or_0_operand" "rJ")
-			      (match_operand:SI 4 "const_int_operand")]  ;; mod_s
+			      (match_operand:SI 4 "const_int_operand")]  ;; mod_f
 	 UNSPEC_COMPARE_AND_SWAP))
    (clobber (match_scratch:GPR 5 "=&r"))]
   ""
 {
   output_asm_insn ("1:", operands);
-  output_asm_insn ("ll.<amo>\t%0,%1", operands);
+  output_asm_insn ("ll.<size>\t%0,%1", operands);
 
   /* Like the test case atomic-cas-int.C, in loongarch64, O1 and higher, the
      return value of the val_without_const_folding will not be truncated and
@@ -266,9 +445,9 @@
     output_asm_insn ("bne\t%0,%z2,2f", operands);
 
   output_asm_insn ("or%i3\t%5,$zero,%3", operands);
-  output_asm_insn ("sc.<amo>\t%5,%1", operands);
+  output_asm_insn ("sc.<size>\t%5,%1", operands);
   output_asm_insn ("beqz\t%5,1b", operands);
-  output_asm_insn ("b\t3f", operands);
+  output_asm_insn ("%T4b\t3f", operands);
   output_asm_insn ("2:", operands);
   output_asm_insn ("%G4", operands);
   output_asm_insn ("3:", operands);
@@ -288,10 +467,10 @@
    (set (match_dup 1)
 	(unspec_volatile:QHWD [(match_operand:QHWD 2 "reg_or_0_operand" "rJ")
 			       (match_operand:QHWD 3 "reg_or_0_operand" "rJ")
-			       (match_operand:SI 4 "const_int_operand")]  ;; mod_s
-	 UNSPEC_COMPARE_AND_SWAP))]
+			       (match_operand:SI 4 "const_int_operand")]  ;; mod
+	 UNSPEC_COMPARE_AND_SWAP_AMCAS))]
   "ISA_HAS_LAMCAS"
-  "ori\t%0,%z2,0\n\tamcas%A4.<amo>\t%0,%z3,%1"
+  "ori\t%0,%z2,0\n\tamcas%A4.<size>\t%0,%z3,%1"
   [(set (attr "length") (const_int 8))])
 
 (define_expand "atomic_compare_and_swap<mode>"
@@ -318,16 +497,14 @@
       && is_mm_release (memmodel_base (INTVAL (mod_s))))
     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
 
-  operands[6] = mod_s;
-
   if (ISA_HAS_LAMCAS)
     emit_insn (gen_atomic_cas_value_strong<mode>_amcas (operands[1], operands[2],
 							 operands[3], operands[4],
-							 operands[6]));
+							 mod_s));
   else
     emit_insn (gen_atomic_cas_value_strong<mode> (operands[1], operands[2],
 						  operands[3], operands[4],
-						  operands[6]));
+						  mod_f));
 
   rtx compare = operands[1];
   if (operands[3] != const0_rtx)
@@ -349,49 +526,74 @@
   DONE;
 })
 
-(define_expand "atomic_test_and_set"
-  [(match_operand:QI 0 "register_operand" "")     ;; bool output
-   (match_operand:QI 1 "memory_operand" "+ZB")    ;; memory
-   (match_operand:SI 2 "const_int_operand" "")]   ;; model
+(define_expand "atomic_fetch_<amop><mode>"
+  [(match_operand:SHORT 0 "register_operand" "")		 ;; output
+   (any_bitwise (match_operand:SHORT 1 "memory_operand"   "+ZB") ;; memory
+		(match_operand:SHORT 2 "reg_or_0_operand" "rJ")) ;; val
+   (match_operand:SI 3 "const_int_operand" "")]			 ;; model
   ""
 {
-  /* We have no QImode atomics, so use the address LSBs to form a mask,
-     then use an aligned SImode atomic.  */
+  /* We have no QI/HImode bitwise atomics, so use the address LSBs to form
+     a mask, then use an aligned SImode atomic.  */
   rtx result = operands[0];
   rtx mem = operands[1];
-  rtx model = operands[2];
+  rtx model = operands[3];
   rtx addr = force_reg (Pmode, XEXP (mem, 0));
-  rtx tmp_reg = gen_reg_rtx (Pmode);
-  rtx zero_reg = gen_rtx_REG (Pmode, 0);
-
+  rtx mask = gen_int_mode (-4, Pmode);
   rtx aligned_addr = gen_reg_rtx (Pmode);
-  emit_move_insn (tmp_reg, gen_rtx_PLUS (Pmode, zero_reg, GEN_INT (-4)));
-  emit_move_insn (aligned_addr, gen_rtx_AND (Pmode, addr, tmp_reg));
+
+  if (!and_operand (mask, Pmode))
+    mask = force_reg (Pmode, mask);
+
+  emit_move_insn (aligned_addr, gen_rtx_AND (Pmode, addr, mask));
 
   rtx aligned_mem = change_address (mem, SImode, aligned_addr);
   set_mem_alias_set (aligned_mem, 0);
 
-  rtx offset = gen_reg_rtx (SImode);
-  emit_move_insn (offset, gen_rtx_AND (SImode, gen_lowpart (SImode, addr),
-				       GEN_INT (3)));
-
   rtx tmp = gen_reg_rtx (SImode);
-  emit_move_insn (tmp, GEN_INT (1));
+  emit_move_insn (tmp, simplify_gen_unary (ZERO_EXTEND, SImode,
+					   operands[2], <MODE>mode));
 
+  /* Note that we have defined SHIFT_COUNT_TRUNCATED to 1, so we don't need
+     to mask addr with 0b11 here.  */
   rtx shmt = gen_reg_rtx (SImode);
-  emit_move_insn (shmt, gen_rtx_ASHIFT (SImode, offset, GEN_INT (3)));
+  emit_move_insn (shmt, gen_rtx_ASHIFT (SImode, gen_lowpart (SImode, addr),
+					GEN_INT (3)));
 
   rtx word = gen_reg_rtx (SImode);
   emit_move_insn (word, gen_rtx_ASHIFT (SImode, tmp, shmt));
 
+  if (<is_and>)
+    {
+      /* word = word | ~(mode_mask << shmt) */
+      rtx tmp = force_reg (SImode,
+			   gen_int_mode (GET_MODE_MASK (<MODE>mode),
+					 SImode));
+      emit_move_insn (tmp, gen_rtx_ASHIFT (SImode, tmp, shmt));
+      emit_move_insn (word, gen_rtx_IOR (SImode, gen_rtx_NOT (SImode, tmp),
+					 word));
+    }
+
   tmp = gen_reg_rtx (SImode);
-  emit_insn (gen_atomic_fetch_orsi (tmp, aligned_mem, word, model));
+  emit_insn (gen_atomic_fetch_<amop>si (tmp, aligned_mem, word, model));
 
   emit_move_insn (gen_lowpart (SImode, result),
 		  gen_rtx_LSHIFTRT (SImode, tmp, shmt));
   DONE;
 })
 
+(define_expand "atomic_test_and_set"
+  [(match_operand:QI 0 "register_operand" "")     ;; bool output
+   (match_operand:QI 1 "memory_operand" "+ZB")    ;; memory
+   (match_operand:SI 2 "const_int_operand" "")]   ;; model
+  ""
+{
+  rtx one = force_reg (QImode, gen_int_mode (1, QImode));
+  emit_insn (gen_atomic_fetch_orqi (operands[0], operands[1], one,
+				    operands[2]));
+  DONE;
+})
+
 (define_insn "atomic_cas_value_cmp_and_7_<mode>"
   [(set (match_operand:GPR 0 "register_operand" "=&r")
 	(match_operand:GPR 1 "memory_operand" "+ZC"))
@@ -400,20 +602,20 @@
 			      (match_operand:GPR 3 "reg_or_0_operand" "rJ")
 			      (match_operand:GPR 4 "reg_or_0_operand"  "rJ")
 			      (match_operand:GPR 5 "reg_or_0_operand"  "rJ")
-			      (match_operand:SI 6 "const_int_operand")] ;; model
+			      (match_operand:SI 6 "const_int_operand")] ;; mod_f
 	 UNSPEC_COMPARE_AND_SWAP))
    (clobber (match_scratch:GPR 7 "=&r"))]
   ""
 {
   return "1:\\n\\t"
-	 "ll.<amo>\\t%0,%1\\n\\t"
+	 "ll.<size>\\t%0,%1\\n\\t"
 	 "and\\t%7,%0,%2\\n\\t"
 	 "bne\\t%7,%z4,2f\\n\\t"
 	 "and\\t%7,%0,%z3\\n\\t"
 	 "or%i5\\t%7,%7,%5\\n\\t"
-	 "sc.<amo>\\t%7,%1\\n\\t"
+	 "sc.<size>\\t%7,%1\\n\\t"
 	 "beq\\t$zero,%7,1b\\n\\t"
-	 "b\\t3f\\n\\t"
+	 "%T6b\\t3f\\n\\t"
 	 "2:\\n\\t"
 	 "%G6\\n\\t"
 	 "3:\\n\\t";
@@ -444,18 +646,16 @@
       && is_mm_release (memmodel_base (INTVAL (mod_s))))
     mod_s = GEN_INT (MEMMODEL_ACQ_REL);
 
-  operands[6] = mod_s;
-
   if (ISA_HAS_LAMCAS)
     emit_insn (gen_atomic_cas_value_strong<mode>_amcas (operands[1], operands[2],
 						       operands[3], operands[4],
-						       operands[6]));
+						       mod_s));
   else
     {
       union loongarch_gen_fn_ptrs generator;
       generator.fn_7 = gen_atomic_cas_value_cmp_and_7_si;
       loongarch_expand_atomic_qihi (generator, operands[1], operands[2],
-				    operands[3], operands[4], operands[6]);
+				    operands[3], operands[4], mod_f);
     }
 
       rtx compare = operands[1];
@@ -481,83 +681,96 @@
   DONE;
 })
 
-(define_insn "atomic_cas_value_add_7_<mode>"
-  [(set (match_operand:GPR 0 "register_operand" "=&r")				;; res
-	(match_operand:GPR 1 "memory_operand" "+ZC"))
+(define_insn "atomic_compare_and_swapti_scq"
+  [(set (match_operand:TI 0 "register_operand" "=&r")
+	(match_operand:TI 1 "memory_operand"   "+ZB"))
    (set (match_dup 1)
-	(unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ")	;; mask
-			      (match_operand:GPR 3 "reg_or_0_operand" "rJ")	;; inverted_mask
-			      (match_operand:GPR 4 "reg_or_0_operand"  "rJ")	;; old val
-			      (match_operand:GPR 5 "reg_or_0_operand"  "rJ")	;; new val
-			      (match_operand:SI 6 "const_int_operand")]		;; model
-	 UNSPEC_COMPARE_AND_SWAP_ADD))
-   (clobber (match_scratch:GPR 7 "=&r"))
-   (clobber (match_scratch:GPR 8 "=&r"))]
-  ""
+	(unspec_volatile:TI [(match_operand:TI 2 "reg_or_0_operand" "rJ")
+			     (match_operand:TI 3 "reg_or_0_operand" "rJ")
+			     (match_operand:SI 4 "const_int_operand")]  ;; mod_f
+	 UNSPEC_COMPARE_AND_SWAP))
+   (clobber (match_scratch:DI 5 "=&r"))]
+  "TARGET_64BIT && ISA_HAS_SCQ"
 {
-  return "1:\\n\\t"
-	 "ll.<amo>\\t%0,%1\\n\\t"
-	 "and\\t%7,%0,%3\\n\\t"
-	 "add.w\\t%8,%0,%z5\\n\\t"
-	 "and\\t%8,%8,%z2\\n\\t"
-	 "or%i8\\t%7,%7,%8\\n\\t"
-	 "sc.<amo>\\t%7,%1\\n\\t"
-	 "beq\\t$zero,%7,1b";
-}
+  output_asm_insn ("1:", operands);
+  output_asm_insn ("ll.d\t%0,%1", operands);
 
-  [(set (attr "length") (const_int 28))])
+  /* Compare the low word */
+  output_asm_insn ("bne\t%0,%z2,2f", operands);
 
-(define_insn "atomic_cas_value_sub_7_<mode>"
-  [(set (match_operand:GPR 0 "register_operand" "=&r")				;; res
-	(match_operand:GPR 1 "memory_operand" "+ZC"))
-   (set (match_dup 1)
-	(unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ")	;; mask
-			      (match_operand:GPR 3 "reg_or_0_operand" "rJ")	;; inverted_mask
-			      (match_operand:GPR 4 "reg_or_0_operand"  "rJ")	;; old val
-			      (match_operand:GPR 5 "reg_or_0_operand"  "rJ")	;; new val
-			      (match_operand:SI 6 "const_int_operand")]		;; model
-	 UNSPEC_COMPARE_AND_SWAP_SUB))
-   (clobber (match_scratch:GPR 7 "=&r"))
-   (clobber (match_scratch:GPR 8 "=&r"))]
-  ""
-{
-  return "1:\\n\\t"
-	 "ll.<amo>\\t%0,%1\\n\\t"
-	 "and\\t%7,%0,%3\\n\\t"
-	 "sub.w\\t%8,%0,%z5\\n\\t"
-	 "and\\t%8,%8,%z2\\n\\t"
-	 "or%i8\\t%7,%7,%8\\n\\t"
-	 "sc.<amo>\\t%7,%1\\n\\t"
-	 "beq\\t$zero,%7,1b";
+  /* Don't reorder the load of high word before ll.d.  As the TImode
+     must be aligned in the memory, the high and low words must be in
+     the same cacheline, thus dbar 0x700 is enough.  */
+  if (!ISA_HAS_LD_SEQ_SA)
+    output_asm_insn ("dbar\t0x700", operands);
+
+  /* Now load the high word.  As the high and low words are in the same
+     cacheline, in case another core has clobbered the high word before the
+     sc.q instruction is executed, the LL bit for the low word will be
+     cleared.  Thus a normal load is sufficient.  */
+  output_asm_insn ("ld.d\t%t0,%b1,8", operands);
+
+  /* Compare the high word.  */
+  output_asm_insn ("bne\t%t0,%t2,2f", operands);
+
+  /* Copy the low word of the new value as it'll be clobbered by sc.q.  */
+  output_asm_insn ("move\t%5,%z3", operands);
+
+  /* Store both words if LL bit is still set.  */
+  output_asm_insn ("sc.q\t%5,%t3,%1", operands);
+
+  /* Check if sc.q has done the store.  */
+  output_asm_insn ("beqz\t%5,1b", operands);
+
+  /* Jump over the mod_f barrier if sc.q has succeeded.  */
+  output_asm_insn ("%T4b\t3f", operands);
+
+  /* The barrier for mod_f.  */
+  output_asm_insn ("2:", operands);
+  output_asm_insn ("%G4", operands);
+
+  output_asm_insn ("3:", operands);
+  return "";
 }
-  [(set (attr "length") (const_int 28))])
+  [(set_attr "length" "40")])
 
-(define_insn "atomic_cas_value_and_7_<mode>"
-  [(set (match_operand:GPR 0 "register_operand" "=&r")				;; res
-	(match_operand:GPR 1 "memory_operand" "+ZC"))
-   (set (match_dup 1)
-	(unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ")	;; mask
-			      (match_operand:GPR 3 "reg_or_0_operand" "rJ")	;; inverted_mask
-			      (match_operand:GPR 4 "reg_or_0_operand"  "rJ")	;; old val
-			      (match_operand:GPR 5 "reg_or_0_operand"  "rJ")	;; new val
-			      (match_operand:SI 6 "const_int_operand")]		;; model
-	 UNSPEC_COMPARE_AND_SWAP_AND))
-   (clobber (match_scratch:GPR 7 "=&r"))
-   (clobber (match_scratch:GPR 8 "=&r"))]
-  ""
+(define_expand "atomic_compare_and_swapti"
+  [(match_operand:SI 0 "register_operand" "")   ;; bool output
+   (match_operand:TI 1 "register_operand" "")  ;; val output
+   (match_operand:TI 2 "memory_operand" "")    ;; memory
+   (match_operand:TI 3 "reg_or_0_operand" "")  ;; expected value
+   (match_operand:TI 4 "reg_or_0_operand" "")  ;; desired value
+   (match_operand:SI 5 "const_int_operand" "")  ;; is_weak
+   (match_operand:SI 6 "const_int_operand" "")  ;; mod_s
+   (match_operand:SI 7 "const_int_operand" "")] ;; mod_f
+  "TARGET_64BIT && ISA_HAS_SCQ"
 {
-  return "1:\\n\\t"
-	 "ll.<amo>\\t%0,%1\\n\\t"
-	 "and\\t%7,%0,%3\\n\\t"
-	 "and\\t%8,%0,%z5\\n\\t"
-	 "and\\t%8,%8,%z2\\n\\t"
-	 "or%i8\\t%7,%7,%8\\n\\t"
-	 "sc.<amo>\\t%7,%1\\n\\t"
-	 "beq\\t$zero,%7,1b";
-}
-  [(set (attr "length") (const_int 28))])
+  emit_insn (gen_atomic_compare_and_swapti_scq (operands[1], operands[2],
+						operands[3], operands[4],
+						operands[7]));
+
+  rtx t[2];
 
-(define_insn "atomic_cas_value_xor_7_<mode>"
+  for (int i = 0; i < 2; i++)
+    {
+      rtx compare = loongarch_subword (operands[1], i);
+      rtx expect = loongarch_subword (operands[3], i);
+
+      t[i] = gen_reg_rtx (DImode);
+
+      if (expect != const0_rtx)
+	emit_insn (gen_xordi3 (t[i], compare, expect));
+      else
+	emit_move_insn (t[i], compare);
+    }
+
+  emit_insn (gen_iordi3 (t[0], t[0], t[1]));
+  emit_insn (gen_rtx_SET (operands[0],
+			  gen_rtx_EQ (SImode, t[0], const0_rtx)));
+  DONE;
+})
+
+(define_insn "atomic_cas_value_add_7_<mode>"
   [(set (match_operand:GPR 0 "register_operand" "=&r")				;; res
 	(match_operand:GPR 1 "memory_operand" "+ZC"))
    (set (match_dup 1)
@@ -566,24 +779,24 @@
 			      (match_operand:GPR 4 "reg_or_0_operand"  "rJ")	;; old val
 			      (match_operand:GPR 5 "reg_or_0_operand"  "rJ")	;; new val
 			      (match_operand:SI 6 "const_int_operand")]		;; model
-	 UNSPEC_COMPARE_AND_SWAP_XOR))
+	 UNSPEC_COMPARE_AND_SWAP_ADD))
    (clobber (match_scratch:GPR 7 "=&r"))
    (clobber (match_scratch:GPR 8 "=&r"))]
   ""
 {
   return "1:\\n\\t"
-	 "ll.<amo>\\t%0,%1\\n\\t"
+	 "ll.<size>\\t%0,%1\\n\\t"
 	 "and\\t%7,%0,%3\\n\\t"
-	 "xor\\t%8,%0,%z5\\n\\t"
+	 "add.w\\t%8,%0,%z5\\n\\t"
 	 "and\\t%8,%8,%z2\\n\\t"
 	 "or%i8\\t%7,%7,%8\\n\\t"
-	 "sc.<amo>\\t%7,%1\\n\\t"
+	 "sc.<size>\\t%7,%1\\n\\t"
 	 "beq\\t$zero,%7,1b";
 }
 
   [(set (attr "length") (const_int 28))])
 
-(define_insn "atomic_cas_value_or_7_<mode>"
+(define_insn "atomic_cas_value_sub_7_<mode>"
   [(set (match_operand:GPR 0 "register_operand" "=&r")				;; res
 	(match_operand:GPR 1 "memory_operand" "+ZC"))
    (set (match_dup 1)
@@ -592,21 +805,20 @@
 			      (match_operand:GPR 4 "reg_or_0_operand"  "rJ")	;; old val
 			      (match_operand:GPR 5 "reg_or_0_operand"  "rJ")	;; new val
 			      (match_operand:SI 6 "const_int_operand")]		;; model
-	 UNSPEC_COMPARE_AND_SWAP_OR))
+	 UNSPEC_COMPARE_AND_SWAP_SUB))
    (clobber (match_scratch:GPR 7 "=&r"))
    (clobber (match_scratch:GPR 8 "=&r"))]
   ""
 {
   return "1:\\n\\t"
-	 "ll.<amo>\\t%0,%1\\n\\t"
+	 "ll.<size>\\t%0,%1\\n\\t"
 	 "and\\t%7,%0,%3\\n\\t"
-	 "or\\t%8,%0,%z5\\n\\t"
+	 "sub.w\\t%8,%0,%z5\\n\\t"
 	 "and\\t%8,%8,%z2\\n\\t"
 	 "or%i8\\t%7,%7,%8\\n\\t"
-	 "sc.<amo>\\t%7,%1\\n\\t"
+	 "sc.<size>\\t%7,%1\\n\\t"
 	 "beq\\t$zero,%7,1b";
 }
-
   [(set (attr "length") (const_int 28))])
 
 (define_insn "atomic_cas_value_nand_7_<mode>"
@@ -624,12 +836,12 @@
   ""
 {
   return "1:\\n\\t"
-	 "ll.<amo>\\t%0,%1\\n\\t"
+	 "ll.<size>\\t%0,%1\\n\\t"
 	 "and\\t%7,%0,%3\\n\\t"
 	 "and\\t%8,%0,%z5\\n\\t"
 	 "xor\\t%8,%8,%z2\\n\\t"
 	 "or%i8\\t%7,%7,%8\\n\\t"
-	 "sc.<amo>\\t%7,%1\\n\\t"
+	 "sc.<size>\\t%7,%1\\n\\t"
 	 "beq\\t$zero,%7,1b";
 }
   [(set (attr "length") (const_int 28))])
@@ -648,10 +860,10 @@
   ""
 {
   return "1:\\n\\t"
-	 "ll.<amo>\\t%0,%1\\n\\t"
+	 "ll.<size>\\t%0,%1\\n\\t"
 	 "and\\t%7,%0,%z3\\n\\t"
 	 "or%i5\\t%7,%7,%5\\n\\t"
-	 "sc.<amo>\\t%7,%1\\n\\t"
+	 "sc.<size>\\t%7,%1\\n\\t"
 	 "beqz\\t%7,1b\\n\\t";
 }
   [(set (attr "length") (const_int 20))])
@@ -678,6 +890,101 @@
   DONE;
 })
 
+(define_int_iterator UNSPEC_TI_FETCH_DIRECT
+  [UNSPEC_TI_FETCH_ADD
+   UNSPEC_TI_FETCH_SUB
+   UNSPEC_TI_FETCH_AND
+   UNSPEC_TI_FETCH_XOR
+   UNSPEC_TI_FETCH_OR])
+(define_int_iterator UNSPEC_TI_FETCH
+  [UNSPEC_TI_FETCH_DIRECT UNSPEC_TI_FETCH_NAND_MASK_INVERTED])
+(define_int_attr amop_ti_fetch
+  [(UNSPEC_TI_FETCH_ADD "add")
+   (UNSPEC_TI_FETCH_SUB "sub")
+   (UNSPEC_TI_FETCH_AND "and")
+   (UNSPEC_TI_FETCH_XOR "xor")
+   (UNSPEC_TI_FETCH_OR "or")
+   (UNSPEC_TI_FETCH_NAND_MASK_INVERTED "nand_mask_inverted")])
+(define_int_attr size_ti_fetch
+  [(UNSPEC_TI_FETCH_ADD "36")
+   (UNSPEC_TI_FETCH_SUB "36")
+   (UNSPEC_TI_FETCH_AND "28")
+   (UNSPEC_TI_FETCH_XOR "28")
+   (UNSPEC_TI_FETCH_OR "28")
+   (UNSPEC_TI_FETCH_NAND_MASK_INVERTED "28")])
+
+(define_insn "atomic_fetch_<amop_ti_fetch>ti_scq"
+  [(set (match_operand:TI 0 "register_operand" "=&r")
+        (match_operand:TI 1 "memory_operand" "+ZB"))
+   (set (match_dup 1)
+	(unspec_volatile:TI
+	  [(match_dup 0)
+	   (match_operand:TI 2 "reg_or_0_operand" "rJ")]
+	  UNSPEC_TI_FETCH))
+   (clobber (match_scratch:DI 3 "=&r"))
+   (clobber (match_scratch:DI 4 "=&r"))]
+  "TARGET_64BIT && ISA_HAS_SCQ"
+{
+  output_asm_insn ("1:", operands);
+  output_asm_insn ("ll.d\t%0,%1", operands);
+  if (!ISA_HAS_LD_SEQ_SA)
+    output_asm_insn ("dbar\t0x700", operands);
+  output_asm_insn ("ld.d\t%t0,%b1,8", operands);
+
+  switch (<UNSPEC_TI_FETCH>)
+    {
+    case UNSPEC_TI_FETCH_AND:
+    case UNSPEC_TI_FETCH_OR:
+    case UNSPEC_TI_FETCH_XOR:
+      output_asm_insn ("<amop_ti_fetch>\t%3,%0,%z2", operands);
+      output_asm_insn ("<amop_ti_fetch>\t%4,%t0,%t2", operands);
+      break;
+    case UNSPEC_TI_FETCH_NAND_MASK_INVERTED:
+      output_asm_insn ("orn\t%3,%z2,%0", operands);
+      output_asm_insn ("orn\t%4,%t2,%t0", operands);
+      break;
+    case UNSPEC_TI_FETCH_ADD:
+    case UNSPEC_TI_FETCH_SUB:
+      output_asm_insn ("<amop_ti_fetch>.d\t%3,%0,%z2", operands);
+
+      /* Generate carry bit.  */
+      output_asm_insn (
+	<UNSPEC_TI_FETCH> == UNSPEC_TI_FETCH_ADD ? "sltu\t%4,%3,%0"
+						 : "sltu\t%4,%0,%3",
+	operands);
+
+      output_asm_insn ("<amop_ti_fetch>.d\t%4,%t0,%4", operands);
+      output_asm_insn ("<amop_ti_fetch>.d\t%4,%4,%t2", operands);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  output_asm_insn ("sc.q\t%3,%4,%1", operands);
+  output_asm_insn ("beqz\t%3,1b", operands);
+
+  return "";
+}
+  [(set_attr "length" "<size_ti_fetch>")])
+
+(define_expand "atomic_fetch_<amop_ti_fetch>ti"
+  [(set (match_operand:TI 0 "register_operand" "=&r")
+        (match_operand:TI 1 "memory_operand" "+ZB"))
+   (set (match_dup 1)
+	(unspec_volatile:TI
+	  [(match_dup 0)
+	   (match_operand:TI 2 "reg_or_0_operand" "rJ")]
+	  UNSPEC_TI_FETCH_DIRECT))
+   (match_operand:SI    3 "const_int_operand")] ;; model
+  "TARGET_64BIT && ISA_HAS_SCQ"
+{
+  /* Model is ignored as sc.q implies a full barrier.  */
+  emit_insn (gen_atomic_fetch_<amop_ti_fetch>ti_scq (operands[0],
+						     operands[1],
+						     operands[2]));
+  DONE;
+})
+
 (define_insn "atomic_fetch_add<mode>_short"
   [(set (match_operand:SHORT 0 "register_operand" "=&r")
 	(match_operand:SHORT 1 "memory_operand" "+ZB"))
@@ -688,7 +995,7 @@
 	   (match_operand:SI 3 "const_int_operand")] ;; model
 	 UNSPEC_SYNC_OLD_OP))]
   "ISA_HAS_LAM_BH"
-  "amadd%A3.<amo>\t%0,%z2,%1"
+  "amadd%A3.<size>\t%0,%z2,%1"
   [(set (attr "length") (const_int 4))])
 
 (define_expand "atomic_fetch_add<mode>"
@@ -724,7 +1031,7 @@
 			(match_operand:SHORT 2 "reg_or_0_operand" "rJ"))
 	   (match_operand:SI 3 "const_int_operand")] ;; model
 	 UNSPEC_SYNC_OLD_OP))]
-  ""
+  "!ISA_HAS_LAM_BH"
 {
   union loongarch_gen_fn_ptrs generator;
   generator.fn_7 = gen_atomic_cas_value_sub_7_si;
@@ -733,60 +1040,6 @@
   DONE;
 })
 
-(define_expand "atomic_fetch_and<mode>"
-  [(set (match_operand:SHORT 0 "register_operand" "=&r")
-	(match_operand:SHORT 1 "memory_operand" "+ZB"))
-   (set (match_dup 1)
-	(unspec_volatile:SHORT
-	  [(and:SHORT (match_dup 1)
-		      (match_operand:SHORT 2 "reg_or_0_operand" "rJ"))
-	   (match_operand:SI 3 "const_int_operand")] ;; model
-	 UNSPEC_SYNC_OLD_OP))]
-  ""
-{
-  union loongarch_gen_fn_ptrs generator;
-  generator.fn_7 = gen_atomic_cas_value_and_7_si;
-  loongarch_expand_atomic_qihi (generator, operands[0], operands[1],
-				operands[1], operands[2], operands[3]);
-  DONE;
-})
-
-(define_expand "atomic_fetch_xor<mode>"
-  [(set (match_operand:SHORT 0 "register_operand" "=&r")
-	(match_operand:SHORT 1 "memory_operand" "+ZB"))
-   (set (match_dup 1)
-	(unspec_volatile:SHORT
-	  [(xor:SHORT (match_dup 1)
-		      (match_operand:SHORT 2 "reg_or_0_operand" "rJ"))
-	   (match_operand:SI 3 "const_int_operand")] ;; model
-	 UNSPEC_SYNC_OLD_OP))]
-  ""
-{
-  union loongarch_gen_fn_ptrs generator;
-  generator.fn_7 = gen_atomic_cas_value_xor_7_si;
-  loongarch_expand_atomic_qihi (generator, operands[0], operands[1],
-				operands[1], operands[2], operands[3]);
-  DONE;
-})
-
-(define_expand "atomic_fetch_or<mode>"
-  [(set (match_operand:SHORT 0 "register_operand" "=&r")
-	(match_operand:SHORT 1 "memory_operand" "+ZB"))
-   (set (match_dup 1)
-	(unspec_volatile:SHORT
-	  [(ior:SHORT (match_dup 1)
-		      (match_operand:SHORT 2 "reg_or_0_operand" "rJ"))
-	   (match_operand:SI 3 "const_int_operand")] ;; model
-	 UNSPEC_SYNC_OLD_OP))]
-  ""
-{
-  union loongarch_gen_fn_ptrs generator;
-  generator.fn_7 = gen_atomic_cas_value_or_7_si;
-  loongarch_expand_atomic_qihi (generator, operands[0], operands[1],
-				operands[1], operands[2], operands[3]);
-  DONE;
-})
-
 (define_expand "atomic_fetch_nand<mode>"
   [(set (match_operand:SHORT 0 "register_operand" "=&r")
 	(match_operand:SHORT 1 "memory_operand" "+ZB"))
diff --git a/gcc/config/m32r/m32r.cc b/gcc/config/m32r/m32r.cc
index a96634c..75db280 100644
--- a/gcc/config/m32r/m32r.cc
+++ b/gcc/config/m32r/m32r.cc
@@ -1150,8 +1150,7 @@ gen_split_move_double (rtx operands[])
   else
     gcc_unreachable ();
 
-  val = get_insns ();
-  end_sequence ();
+  val = end_sequence ();
   return val;
 }
 
diff --git a/gcc/config/m32r/m32r.md b/gcc/config/m32r/m32r.md
index 309d895..393e0da 100644
--- a/gcc/config/m32r/m32r.md
+++ b/gcc/config/m32r/m32r.md
@@ -1554,8 +1554,7 @@
   start_sequence ();
   emit_insn (gen_cmp_ltusi_insn (op1, const1_rtx));
   emit_insn (gen_movcc_insn (op0));
-  operands[3] = get_insns ();
-  end_sequence ();
+  operands[3] = end_sequence ();
 }")
 
 (define_insn "seq_insn"
@@ -1607,8 +1606,7 @@
 
   emit_insn (gen_cmp_ltusi_insn (op3, const1_rtx));
   emit_insn (gen_movcc_insn (op0));
-  operands[4] = get_insns ();
-  end_sequence ();
+  operands[4] = end_sequence ();
 }")
 
 (define_insn "sne_zero_insn"
diff --git a/gcc/config/m68k/m68k.cc b/gcc/config/m68k/m68k.cc
index d8fa6e0..800a385 100644
--- a/gcc/config/m68k/m68k.cc
+++ b/gcc/config/m68k/m68k.cc
@@ -2763,8 +2763,7 @@ m68k_call_tls_get_addr (rtx x, rtx eqv, enum m68k_reloc reloc)
 				Pmode, x, Pmode);
   m68k_libcall_value_in_a0_p = false;
 
-  insns = get_insns ();
-  end_sequence ();
+  insns = end_sequence ();
 
   gcc_assert (can_create_pseudo_p ());
   dest = gen_reg_rtx (Pmode);
@@ -2811,8 +2810,7 @@ m68k_call_m68k_read_tp (void)
   a0 = emit_library_call_value (m68k_get_m68k_read_tp (), NULL_RTX, LCT_PURE,
 				Pmode);
   m68k_libcall_value_in_a0_p = false;
-  insns = get_insns ();
-  end_sequence ();
+  insns = end_sequence ();
 
   /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
      share the m68k_read_tp result with other IE/LE model accesses.  */
@@ -6799,8 +6797,7 @@ m68k_sched_md_init_global (FILE *sched_dump ATTRIBUTE_UNUSED,
 
   start_sequence ();
   emit_insn (gen_ib ());
-  sched_ib.insn = get_insns ();
-  end_sequence ();
+  sched_ib.insn = end_sequence ();
 }
 
 /* Scheduling pass is now finished.  Free/reset static variables.  */
diff --git a/gcc/config/m68k/m68k.md b/gcc/config/m68k/m68k.md
index 35bf1c6..c96937f 100644
--- a/gcc/config/m68k/m68k.md
+++ b/gcc/config/m68k/m68k.md
@@ -4098,8 +4098,7 @@
       emit_move_insn (operand_subword (operands[0], 1, 1, DFmode),
 		      operand_subword_force (operands[1], 1, DFmode));
 
-      insns = get_insns ();
-      end_sequence ();
+      insns = end_sequence ();
 
       emit_insn (insns);
       DONE;
@@ -4132,8 +4131,7 @@
       emit_move_insn (operand_subword (operands[0], 2, 1, XFmode),
 		      operand_subword_force (operands[1], 2, XFmode));
 
-      insns = get_insns ();
-      end_sequence ();
+      insns = end_sequence ();
 
       emit_insn (insns);
       DONE;
@@ -4251,8 +4249,7 @@
       emit_move_insn (operand_subword (operands[0], 1, 1, DFmode),
 		      operand_subword_force (operands[1], 1, DFmode));
 
-      insns = get_insns ();
-      end_sequence ();
+      insns = end_sequence ();
 
       emit_insn (insns);
       DONE;
@@ -4285,8 +4282,7 @@
       emit_move_insn (operand_subword (operands[0], 2, 1, XFmode),
 		      operand_subword_force (operands[1], 2, XFmode));
 
-      insns = get_insns ();
-      end_sequence ();
+      insns = end_sequence ();
 
       emit_insn (insns);
       DONE;
diff --git a/gcc/config/mcore/mcore.cc b/gcc/config/mcore/mcore.cc
index cd5f2c5..c4fc145 100644
--- a/gcc/config/mcore/mcore.cc
+++ b/gcc/config/mcore/mcore.cc
@@ -2984,9 +2984,7 @@ mcore_mark_dllimport (tree decl)
   /* ??? At least I think that's why we do this.  */
   idp = get_identifier (newname);
 
-  newrtl = gen_rtx_MEM (Pmode,
-		    gen_rtx_SYMBOL_REF (Pmode,
-			     IDENTIFIER_POINTER (idp)));
+  newrtl = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (idp));
   XEXP (DECL_RTL (decl), 0) = newrtl;
 }
 
diff --git a/gcc/config/microblaze/microblaze.cc b/gcc/config/microblaze/microblaze.cc
index fc223fb..2ab5ada 100644
--- a/gcc/config/microblaze/microblaze.cc
+++ b/gcc/config/microblaze/microblaze.cc
@@ -239,6 +239,10 @@ section *sdata2_section;
 #define TARGET_HAVE_TLS true
 #endif
 
+/* MicroBlaze does not do speculative execution.  */
+#undef  TARGET_HAVE_SPECULATION_SAFE_VALUE
+#define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
+
 /* Return truth value if a CONST_DOUBLE is ok to be a legitimate constant.  */
 static bool
 microblaze_const_double_ok (rtx op, machine_mode mode)
@@ -585,8 +589,7 @@ microblaze_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
                                      LCT_PURE, /* LCT_CONST?  */
                                      Pmode, reg, Pmode);
 
-  insns = get_insns ();
-  end_sequence ();
+  insns = end_sequence ();
 
   return insns;
 }
diff --git a/gcc/config/mingw/mingw32.h b/gcc/config/mingw/mingw32.h
index 10bcd29..be2461f 100644
--- a/gcc/config/mingw/mingw32.h
+++ b/gcc/config/mingw/mingw32.h
@@ -308,6 +308,15 @@ do {						         \
 #undef TARGET_N_FORMAT_TYPES
 #define TARGET_N_FORMAT_TYPES 3
 
+#undef TARGET_WIN32_TLS
+#define TARGET_WIN32_TLS 1
+
+#undef TARGET_ASM_SELECT_SECTION
+#define TARGET_ASM_SELECT_SECTION mingw_pe_select_section
+
+#undef DEFAULT_TLS_SEG_REG
+#define DEFAULT_TLS_SEG_REG (TARGET_64BIT ? ADDR_SPACE_SEG_GS : ADDR_SPACE_SEG_FS)
+
 #define HAVE_ENABLE_EXECUTE_STACK
 #undef  CHECK_EXECUTE_STACK_ENABLED
 #define CHECK_EXECUTE_STACK_ENABLED flag_setstackexecutable
diff --git a/gcc/config/mingw/winnt.cc b/gcc/config/mingw/winnt.cc
index 08a761d..f224966 100644
--- a/gcc/config/mingw/winnt.cc
+++ b/gcc/config/mingw/winnt.cc
@@ -391,6 +391,15 @@ i386_pe_strip_name_encoding_full (const char *str)
   return name;
 }
 
+section *
+mingw_pe_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
+{
+  if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
+    return get_named_section (decl, ".tls$", reloc);
+  else
+    return default_select_section (decl, reloc, align);
+}
+
 void
 mingw_pe_unique_section (tree decl, int reloc)
 {
@@ -415,6 +424,8 @@ mingw_pe_unique_section (tree decl, int reloc)
     prefix = ".text$";
   else if (decl_readonly_section (decl, reloc))
     prefix = ".rdata$";
+  else if (DECL_THREAD_LOCAL_P (decl))
+    prefix = ".tls$";
   else
     prefix = ".data$";
   len = strlen (name) + strlen (prefix);
@@ -489,6 +500,9 @@ mingw_pe_asm_named_section (const char *name, unsigned int flags,
     *f++ = 'e';
 #endif
 
+  if (strcmp (name, ".tls$") == 0)
+    *f++ = 'd';
+
   if ((flags & (SECTION_CODE | SECTION_WRITE)) == 0)
     /* readonly data */
     {
diff --git a/gcc/config/mingw/winnt.h b/gcc/config/mingw/winnt.h
index fa2d6c0..23f4dc9 100644
--- a/gcc/config/mingw/winnt.h
+++ b/gcc/config/mingw/winnt.h
@@ -31,6 +31,7 @@ extern void mingw_pe_file_end (void);
 extern void mingw_pe_maybe_record_exported_symbol (tree, const char *, int);
 extern void mingw_pe_record_stub (const char *, bool);
 extern unsigned int mingw_pe_section_type_flags (tree, const char *, int);
+extern section *mingw_pe_select_section (tree, int, unsigned HOST_WIDE_INT);
 extern void mingw_pe_unique_section (tree, int);
 extern bool mingw_pe_valid_dllimport_attribute_p (const_tree);
 
diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 24a28dc..81eaa3c 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -3621,9 +3621,7 @@ mips_call_tls_get_addr (rtx sym, enum mips_symbol_type type, rtx v0)
 			   const0_rtx, NULL_RTX, false);
   RTL_CONST_CALL_P (insn) = 1;
   use_reg (&CALL_INSN_FUNCTION_USAGE (insn), a0);
-  insn = get_insns ();
-
-  end_sequence ();
+  insn = end_sequence ();
 
   return insn;
 }
@@ -15167,23 +15165,19 @@ mips_ls2_init_dfa_post_cycle_insn (void)
 {
   start_sequence ();
   emit_insn (gen_ls2_alu1_turn_enabled_insn ());
-  mips_ls2.alu1_turn_enabled_insn = get_insns ();
-  end_sequence ();
+  mips_ls2.alu1_turn_enabled_insn = end_sequence ();
 
   start_sequence ();
   emit_insn (gen_ls2_alu2_turn_enabled_insn ());
-  mips_ls2.alu2_turn_enabled_insn = get_insns ();
-  end_sequence ();
+  mips_ls2.alu2_turn_enabled_insn = end_sequence ();
 
   start_sequence ();
   emit_insn (gen_ls2_falu1_turn_enabled_insn ());
-  mips_ls2.falu1_turn_enabled_insn = get_insns ();
-  end_sequence ();
+  mips_ls2.falu1_turn_enabled_insn = end_sequence ();
 
   start_sequence ();
   emit_insn (gen_ls2_falu2_turn_enabled_insn ());
-  mips_ls2.falu2_turn_enabled_insn = get_insns ();
-  end_sequence ();
+  mips_ls2.falu2_turn_enabled_insn = end_sequence ();
 
   mips_ls2.alu1_core_unit_code = get_cpu_unit_code ("ls2_alu1_core");
   mips_ls2.alu2_core_unit_code = get_cpu_unit_code ("ls2_alu2_core");
@@ -19892,8 +19886,7 @@ mips16_split_long_branches (void)
 		emit_label (new_label);
 	      }
 
-	    jump_sequence = get_insns ();
-	    end_sequence ();
+	    jump_sequence = end_sequence ();
 
 	    emit_insn_after (jump_sequence, jump_insn);
 	    if (new_label)
diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index e224ade..494f14c 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -2363,8 +2363,14 @@ enum reg_class
 
 #define STACK_GROWS_DOWNWARD 1
 
-#define FRAME_GROWS_DOWNWARD (flag_stack_protect != 0			\
-			      || (flag_sanitize & SANITIZE_ADDRESS) != 0)
+/* Growing the frame downwards allows us to put spills closest to
+   the stack pointer which is good as they are likely to be accessed
+   frequently.  We can also arrange for normal stack usage to place
+   scalars last so that they too are close to the stack pointer.  */
+#define FRAME_GROWS_DOWNWARD ((TARGET_MIPS16			    \
+			       && TARGET_FRAME_GROWS_DOWNWARDS)     \
+			      || (flag_stack_protect != 0	    \
+				  || (flag_sanitize & SANITIZE_ADDRESS) != 0))
 
 /* Size of the area allocated in the frame to save the GP.  */
 
diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt
index e245654..f07db5a 100644
--- a/gcc/config/mips/mips.opt
+++ b/gcc/config/mips/mips.opt
@@ -473,6 +473,10 @@ mframe-header-opt
 Target Var(flag_frame_header_optimization) Optimization
 Optimize frame header.
 
+mgrow-frame-downwards
+Target Var(TARGET_FRAME_GROWS_DOWNWARDS) Init(1) Undocumented
+Change the behaviour to grow the frame downwards.
+
 noasmopt
 Driver
 
diff --git a/gcc/config/nds32/nds32-intrinsic.md b/gcc/config/nds32/nds32-intrinsic.md
index e05dce1..85acea3 100644
--- a/gcc/config/nds32/nds32-intrinsic.md
+++ b/gcc/config/nds32/nds32-intrinsic.md
@@ -333,30 +333,31 @@
   ""
 {
   rtx system_reg = NULL_RTX;
+  rtx shift_amt = NULL_RTX;
 
   /* Set system register form nds32_intrinsic_register_names[].  */
   if ((INTVAL (operands[1]) >= NDS32_INT_H0)
       && (INTVAL (operands[1]) <= NDS32_INT_H15))
     {
       system_reg = GEN_INT (__NDS32_REG_INT_PEND__);
-      operands[2] = GEN_INT (31 - INTVAL (operands[1]));
+      shift_amt = GEN_INT (31 - INTVAL (operands[1]));
     }
   else if (INTVAL (operands[1]) == NDS32_INT_SWI)
     {
       system_reg = GEN_INT (__NDS32_REG_INT_PEND__);
-      operands[2] = GEN_INT (15);
+      shift_amt = GEN_INT (15);
     }
   else if ((INTVAL (operands[1]) >= NDS32_INT_H16)
 	   && (INTVAL (operands[1]) <= NDS32_INT_H31))
     {
       system_reg = GEN_INT (__NDS32_REG_INT_PEND2__);
-      operands[2] = GEN_INT (31 - INTVAL (operands[1]));
+      shift_amt = GEN_INT (31 - INTVAL (operands[1]));
     }
   else if ((INTVAL (operands[1]) >= NDS32_INT_H32)
 	   && (INTVAL (operands[1]) <= NDS32_INT_H63))
     {
       system_reg = GEN_INT (__NDS32_REG_INT_PEND3__);
-      operands[2] = GEN_INT (31 - (INTVAL (operands[1]) - 32));
+      shift_amt = GEN_INT (31 - (INTVAL (operands[1]) - 32));
     }
   else
     error ("%<get_pending_int%> not support %<NDS32_INT_ALZ%>,"
@@ -366,7 +367,7 @@
   if (system_reg != NULL_RTX)
     {
       emit_insn (gen_unspec_volatile_mfsr (operands[0], system_reg));
-      emit_insn (gen_ashlsi3 (operands[0], operands[0], operands[2]));
+      emit_insn (gen_ashlsi3 (operands[0], operands[0], shift_amt));
       emit_insn (gen_lshrsi3 (operands[0], operands[0], GEN_INT (31)));
       emit_insn (gen_unspec_dsb ());
     }
diff --git a/gcc/config/nvptx/gen-multilib-matches-tests b/gcc/config/nvptx/gen-multilib-matches-tests
index a07f19a..fbfae88 100644
--- a/gcc/config/nvptx/gen-multilib-matches-tests
+++ b/gcc/config/nvptx/gen-multilib-matches-tests
@@ -18,6 +18,7 @@ AEMM .=misa?sm_35
 AEMM .=misa?sm_37
 AEMM .=misa?sm_52
 AEMM .=misa?sm_53
+AEMM .=misa?sm_61
 AEMM .=misa?sm_70
 AEMM .=misa?sm_75
 AEMM .=misa?sm_80
@@ -32,14 +33,15 @@ AEMM .=misa?sm_35
 AEMM .=misa?sm_37
 AEMM .=misa?sm_52
 AEMM .=misa?sm_53
+AEMM .=misa?sm_61
 AEMM .=misa?sm_70
 AEMM .=misa?sm_75
 AEMM .=misa?sm_80
 CMMC
 
-BEGIN '--with-arch=sm_30', '--with-multilib-list=sm_30,sm_35,sm_37,sm_52,sm_53,sm_70,sm_75,sm_80,sm_89'
+BEGIN '--with-arch=sm_30', '--with-multilib-list=sm_30,sm_35,sm_37,sm_52,sm_53,sm_61,sm_70,sm_75,sm_80,sm_89'
 SMOID sm_30
-SMOIL sm_30 sm_35 sm_37 sm_52 sm_53 sm_70 sm_75 sm_80 sm_89
+SMOIL sm_30 sm_35 sm_37 sm_52 sm_53 sm_61 sm_70 sm_75 sm_80 sm_89
 AEMM .=misa?sm_30
 CMMC
 
@@ -52,6 +54,7 @@ AEMM .=misa?sm_35
 AEMM .=misa?sm_37
 AEMM .=misa?sm_52
 AEMM .=misa?sm_53
+AEMM .=misa?sm_61
 AEMM .=misa?sm_70
 AEMM .=misa?sm_75
 AEMM .=misa?sm_80
@@ -65,6 +68,7 @@ AEMM .=misa?sm_35
 AEMM .=misa?sm_37
 AEMM .=misa?sm_52
 AEMM .=misa?sm_53
+AEMM .=misa?sm_61
 AEMM .=misa?sm_70
 AEMM .=misa?sm_75
 AEMM .=misa?sm_80
@@ -79,6 +83,7 @@ AEMM misa?sm_30=misa?sm_35
 AEMM .=misa?sm_37
 AEMM .=misa?sm_52
 AEMM .=misa?sm_53
+AEMM .=misa?sm_61
 AEMM .=misa?sm_70
 AEMM .=misa?sm_75
 AEMM .=misa?sm_80
@@ -93,6 +98,7 @@ AEMM misa?sm_35=misa?sm_30
 AEMM misa?sm_35=misa?sm_37
 AEMM .=misa?sm_52
 AEMM .=misa?sm_53
+AEMM .=misa?sm_61
 AEMM .=misa?sm_70
 AEMM misa?sm_75=misa?sm_80
 AEMM misa?sm_75=misa?sm_89
@@ -106,6 +112,7 @@ AEMM misa?sm_30=misa?sm_35
 AEMM misa?sm_30=misa?sm_37
 AEMM misa?sm_30=misa?sm_52
 AEMM .=misa?sm_53
+AEMM .=misa?sm_61
 AEMM .=misa?sm_70
 AEMM .=misa?sm_75
 AEMM .=misa?sm_80
@@ -119,19 +126,55 @@ AEMM misa?sm_37=misa?sm_30
 AEMM misa?sm_37=misa?sm_35
 AEMM misa?sm_37=misa?sm_52
 AEMM .=misa?sm_53
+AEMM .=misa?sm_61
 AEMM .=misa?sm_70
 AEMM .=misa?sm_75
 AEMM .=misa?sm_80
 AEMM .=misa?sm_89
 CMMC
 
-BEGIN '--with-arch=sm_53', '--with-multilib-list=sm_53=sm_30,sm_35,sm_37,sm_52,sm_70,sm_75,sm_80,sm_89'
+BEGIN '--with-arch=sm_53', '--with-multilib-list=sm_30,sm_35,sm_37,sm_52,sm_61,sm_70,sm_75,sm_80,sm_89'
 SMOID sm_53
-SMOIL sm_53 sm_30 sm_35 sm_37 sm_52 sm_70 sm_75 sm_80 sm_89
+SMOIL sm_53 sm_30 sm_35 sm_37 sm_52 sm_61 sm_70 sm_75 sm_80 sm_89
 AEMM .=misa?sm_53
 CMMC
 
 
+BEGIN '--with-arch=sm_61', '--with-multilib-list=sm_61,sm_30'
+SMOID sm_61
+SMOIL sm_61 sm_30
+AEMM misa?sm_30=misa?sm_35
+AEMM misa?sm_30=misa?sm_37
+AEMM misa?sm_30=misa?sm_52
+AEMM misa?sm_30=misa?sm_53
+AEMM .=misa?sm_61
+AEMM .=misa?sm_70
+AEMM .=misa?sm_75
+AEMM .=misa?sm_80
+AEMM .=misa?sm_89
+CMMC
+
+BEGIN '--with-arch=sm_61', '--with-multilib-list=sm_61,sm_37'
+SMOID sm_61
+SMOIL sm_61 sm_37
+AEMM misa?sm_37=misa?sm_30
+AEMM misa?sm_37=misa?sm_35
+AEMM misa?sm_37=misa?sm_52
+AEMM misa?sm_37=misa?sm_53
+AEMM .=misa?sm_61
+AEMM .=misa?sm_70
+AEMM .=misa?sm_75
+AEMM .=misa?sm_80
+AEMM .=misa?sm_89
+CMMC
+
+BEGIN '--with-arch=sm_61', '--with-multilib-list=sm_30,sm_35,sm_37,sm_52,sm_61,sm_70,sm_75,sm_80,sm_89'
+SMOID sm_61
+SMOIL sm_61 sm_30 sm_35 sm_37 sm_52 sm_53 sm_70 sm_75 sm_80 sm_89
+AEMM .=misa?sm_61
+CMMC
+
+
 BEGIN '--with-arch=sm_70', '--with-multilib-list=sm_70'
 SMOID sm_70
 SMOIL sm_70
@@ -140,6 +183,7 @@ AEMM .=misa?sm_35
 AEMM .=misa?sm_37
 AEMM .=misa?sm_52
 AEMM .=misa?sm_53
+AEMM .=misa?sm_61
 AEMM .=misa?sm_70
 AEMM .=misa?sm_75
 AEMM .=misa?sm_80
@@ -153,6 +197,7 @@ AEMM misa?sm_30=misa?sm_35
 AEMM misa?sm_30=misa?sm_37
 AEMM misa?sm_30=misa?sm_52
 AEMM misa?sm_30=misa?sm_53
+AEMM misa?sm_30=misa?sm_61
 AEMM .=misa?sm_70
 AEMM .=misa?sm_75
 AEMM .=misa?sm_80
@@ -166,6 +211,7 @@ AEMM misa?sm_53=misa?sm_30
 AEMM misa?sm_53=misa?sm_35
 AEMM misa?sm_53=misa?sm_37
 AEMM misa?sm_53=misa?sm_52
+AEMM misa?sm_53=misa?sm_61
 AEMM .=misa?sm_70
 AEMM .=misa?sm_75
 AEMM .=misa?sm_80
@@ -178,6 +224,7 @@ SMOIL sm_70 sm_53 sm_30
 AEMM misa?sm_30=misa?sm_35
 AEMM misa?sm_30=misa?sm_37
 AEMM misa?sm_30=misa?sm_52
+AEMM misa?sm_53=misa?sm_61
 AEMM .=misa?sm_70
 AEMM .=misa?sm_75
 AEMM .=misa?sm_80
@@ -192,6 +239,7 @@ AEMM misa?sm_30=misa?sm_35
 AEMM misa?sm_30=misa?sm_37
 AEMM misa?sm_30=misa?sm_52
 AEMM misa?sm_30=misa?sm_53
+AEMM misa?sm_30=misa?sm_61
 AEMM misa?sm_30=misa?sm_70
 AEMM .=misa?sm_75
 AEMM .=misa?sm_80
@@ -205,6 +253,7 @@ AEMM misa?sm_53=misa?sm_30
 AEMM misa?sm_53=misa?sm_35
 AEMM misa?sm_53=misa?sm_37
 AEMM misa?sm_53=misa?sm_52
+AEMM misa?sm_53=misa?sm_61
 AEMM misa?sm_53=misa?sm_70
 AEMM .=misa?sm_75
 AEMM .=misa?sm_80
@@ -217,6 +266,7 @@ SMOIL sm_75 sm_30 sm_53
 AEMM misa?sm_30=misa?sm_35
 AEMM misa?sm_30=misa?sm_37
 AEMM misa?sm_30=misa?sm_52
+AEMM misa?sm_53=misa?sm_61
 AEMM misa?sm_53=misa?sm_70
 AEMM .=misa?sm_75
 AEMM .=misa?sm_80
@@ -232,6 +282,7 @@ AEMM .=misa?sm_35
 AEMM .=misa?sm_37
 AEMM .=misa?sm_52
 AEMM .=misa?sm_53
+AEMM .=misa?sm_61
 AEMM .=misa?sm_70
 AEMM .=misa?sm_75
 AEMM .=misa?sm_80
@@ -245,6 +296,7 @@ AEMM misa?sm_30=misa?sm_35
 AEMM misa?sm_30=misa?sm_37
 AEMM misa?sm_30=misa?sm_52
 AEMM misa?sm_30=misa?sm_53
+AEMM misa?sm_30=misa?sm_61
 AEMM misa?sm_30=misa?sm_70
 AEMM misa?sm_30=misa?sm_75
 AEMM .=misa?sm_80
@@ -259,6 +311,7 @@ AEMM misa?sm_75=misa?sm_35
 AEMM misa?sm_75=misa?sm_37
 AEMM misa?sm_75=misa?sm_52
 AEMM misa?sm_75=misa?sm_53
+AEMM misa?sm_75=misa?sm_61
 AEMM misa?sm_75=misa?sm_70
 AEMM .=misa?sm_80
 AEMM .=misa?sm_89
@@ -273,6 +326,7 @@ AEMM .=misa?sm_35
 AEMM .=misa?sm_37
 AEMM .=misa?sm_52
 AEMM .=misa?sm_53
+AEMM .=misa?sm_61
 AEMM .=misa?sm_70
 AEMM .=misa?sm_75
 AEMM .=misa?sm_80
@@ -286,6 +340,7 @@ AEMM misa?sm_52=misa?sm_30
 AEMM misa?sm_52=misa?sm_35
 AEMM misa?sm_52=misa?sm_37
 AEMM misa?sm_52=misa?sm_53
+AEMM misa?sm_52=misa?sm_61
 AEMM misa?sm_52=misa?sm_70
 AEMM misa?sm_52=misa?sm_75
 AEMM misa?sm_52=misa?sm_80
@@ -293,8 +348,8 @@ AEMM .=misa?sm_89
 CMMC
 
 
-BEGIN '--with-arch=sm_89', '--with-multilib-list=sm_89,sm_30,sm_35,sm_37,sm_52,sm_53,sm_70,sm_75,sm_80'
+BEGIN '--with-arch=sm_89', '--with-multilib-list=sm_89,sm_30,sm_35,sm_37,sm_52,sm_53,sm_61,sm_70,sm_75,sm_80'
 SMOID sm_89
-SMOIL sm_89 sm_30 sm_35 sm_37 sm_52 sm_53 sm_70 sm_75 sm_80
+SMOIL sm_89 sm_30 sm_35 sm_37 sm_52 sm_53 sm_61 sm_70 sm_75 sm_80
 AEMM .=misa?sm_89
 CMMC
diff --git a/gcc/config/nvptx/mkoffload.cc b/gcc/config/nvptx/mkoffload.cc
index e7ec0ef..bb3f0fc 100644
--- a/gcc/config/nvptx/mkoffload.cc
+++ b/gcc/config/nvptx/mkoffload.cc
@@ -260,8 +260,10 @@ process (FILE *in, FILE *out, uint32_t omp_requires)
   unsigned ix;
   const char *sm_ver = NULL, *version = NULL;
   const char *sm_ver2 = NULL, *version2 = NULL;
-  size_t file_cnt = 0;
-  size_t *file_idx = XALLOCAVEC (size_t, len);
+  /* To reduce the number of reallocations for 'file_idx', guess 'file_cnt'
+     (very roughly...), based on 'len'.  */
+  const size_t file_cnt_guessed = 13 + len / 27720;
+  auto_vec<size_t> file_idx (file_cnt_guessed);
 
   fprintf (out, "#include <stdint.h>\n\n");
 
@@ -269,9 +271,10 @@ process (FILE *in, FILE *out, uint32_t omp_requires)
      terminated by a NUL.  */
   for (size_t i = 0; i != len;)
     {
+      file_idx.safe_push (i);
+
       char c;
       bool output_fn_ptr = false;
-      file_idx[file_cnt++] = i;
 
       fprintf (out, "static const char ptx_code_%u[] =\n\t\"", obj_count++);
       while ((c = input[i++]))
@@ -349,6 +352,9 @@ process (FILE *in, FILE *out, uint32_t omp_requires)
 	}
     }
 
+  const size_t file_cnt = file_idx.length ();
+  gcc_checking_assert (file_cnt == obj_count);
+
   /* Create function-pointer array, required for reverse
      offload function-pointer lookup.  */
 
diff --git a/gcc/config/nvptx/nvptx-gen.h b/gcc/config/nvptx/nvptx-gen.h
index 893df41..f5b9899 100644
--- a/gcc/config/nvptx/nvptx-gen.h
+++ b/gcc/config/nvptx/nvptx-gen.h
@@ -26,6 +26,7 @@
 #define TARGET_SM37 (ptx_isa_option >= PTX_ISA_SM37)
 #define TARGET_SM52 (ptx_isa_option >= PTX_ISA_SM52)
 #define TARGET_SM53 (ptx_isa_option >= PTX_ISA_SM53)
+#define TARGET_SM61 (ptx_isa_option >= PTX_ISA_SM61)
 #define TARGET_SM70 (ptx_isa_option >= PTX_ISA_SM70)
 #define TARGET_SM75 (ptx_isa_option >= PTX_ISA_SM75)
 #define TARGET_SM80 (ptx_isa_option >= PTX_ISA_SM80)
diff --git a/gcc/config/nvptx/nvptx-gen.opt b/gcc/config/nvptx/nvptx-gen.opt
index f45e8ef..bbae32d 100644
--- a/gcc/config/nvptx/nvptx-gen.opt
+++ b/gcc/config/nvptx/nvptx-gen.opt
@@ -39,6 +39,9 @@ EnumValue
 Enum(ptx_isa) String(sm_53) Value(PTX_ISA_SM53)
 
 EnumValue
+Enum(ptx_isa) String(sm_61) Value(PTX_ISA_SM61)
+
+EnumValue
 Enum(ptx_isa) String(sm_70) Value(PTX_ISA_SM70)
 
 EnumValue
diff --git a/gcc/config/nvptx/nvptx-opts.h b/gcc/config/nvptx/nvptx-opts.h
index d886701..07bcd32 100644
--- a/gcc/config/nvptx/nvptx-opts.h
+++ b/gcc/config/nvptx/nvptx-opts.h
@@ -40,6 +40,7 @@ enum ptx_version
   PTX_VERSION_3_1,
   PTX_VERSION_4_1,
   PTX_VERSION_4_2,
+  PTX_VERSION_5_0,
   PTX_VERSION_6_0,
   PTX_VERSION_6_3,
   PTX_VERSION_7_0,
diff --git a/gcc/config/nvptx/nvptx-sm.def b/gcc/config/nvptx/nvptx-sm.def
index 1485f89..9f9e864 100644
--- a/gcc/config/nvptx/nvptx-sm.def
+++ b/gcc/config/nvptx/nvptx-sm.def
@@ -25,6 +25,7 @@ NVPTX_SM (35, NVPTX_SM_SEP)
 NVPTX_SM (37, NVPTX_SM_SEP)
 NVPTX_SM (52, NVPTX_SM_SEP)
 NVPTX_SM (53, NVPTX_SM_SEP)
+NVPTX_SM (61, NVPTX_SM_SEP)
 NVPTX_SM (70, NVPTX_SM_SEP)
 NVPTX_SM (75, NVPTX_SM_SEP)
 NVPTX_SM (80, NVPTX_SM_SEP)
diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc
index d1e25b9..a92a1e3 100644
--- a/gcc/config/nvptx/nvptx.cc
+++ b/gcc/config/nvptx/nvptx.cc
@@ -220,6 +220,8 @@ first_ptx_version_supporting_sm (enum ptx_isa sm)
       return PTX_VERSION_4_1;
     case PTX_ISA_SM53:
       return PTX_VERSION_4_2;
+    case PTX_ISA_SM61:
+      return PTX_VERSION_5_0;
     case PTX_ISA_SM70:
       return PTX_VERSION_6_0;
     case PTX_ISA_SM75:
@@ -268,6 +270,8 @@ ptx_version_to_string (enum ptx_version v)
       return "4.1";
     case PTX_VERSION_4_2:
       return "4.2";
+    case PTX_VERSION_5_0:
+      return "5.0";
     case PTX_VERSION_6_0:
       return "6.0";
     case PTX_VERSION_6_3:
@@ -294,6 +298,8 @@ ptx_version_to_number (enum ptx_version v, bool major_p)
       return major_p ? 4 : 1;
     case PTX_VERSION_4_2:
       return major_p ? 4 : 2;
+    case PTX_VERSION_5_0:
+      return major_p ? 5 : 0;
     case PTX_VERSION_6_0:
       return major_p ? 6 : 0;
     case PTX_VERSION_6_3:
@@ -2039,8 +2045,7 @@ nvptx_gen_shuffle (rtx dst, rtx src, rtx idx, nvptx_shuffle_kind kind)
 	  start_sequence ();
 	  emit_insn (nvptx_gen_shuffle (dst_real, src_real, idx, kind));
 	  emit_insn (nvptx_gen_shuffle (dst_imag, src_imag, idx, kind));
-	  res = get_insns ();
-	  end_sequence ();
+	  res = end_sequence ();
 	}
 	break;
     case E_SImode:
@@ -2060,8 +2065,7 @@ nvptx_gen_shuffle (rtx dst, rtx src, rtx idx, nvptx_shuffle_kind kind)
 	emit_insn (nvptx_gen_shuffle (tmp0, tmp0, idx, kind));
 	emit_insn (nvptx_gen_shuffle (tmp1, tmp1, idx, kind));
 	emit_insn (nvptx_gen_pack (dst, tmp0, tmp1));
-	res = get_insns ();
-	end_sequence ();
+	res = end_sequence ();
       }
       break;
     case E_V2SImode:
@@ -2079,8 +2083,7 @@ nvptx_gen_shuffle (rtx dst, rtx src, rtx idx, nvptx_shuffle_kind kind)
 	emit_insn (nvptx_gen_shuffle (tmp1, tmp1, idx, kind));
 	emit_insn (gen_movsi (dst0, tmp0));
 	emit_insn (gen_movsi (dst1, tmp1));
-	res = get_insns ();
-	end_sequence ();
+	res = end_sequence ();
       }
       break;
     case E_V2DImode:
@@ -2098,8 +2101,7 @@ nvptx_gen_shuffle (rtx dst, rtx src, rtx idx, nvptx_shuffle_kind kind)
 	emit_insn (nvptx_gen_shuffle (tmp1, tmp1, idx, kind));
 	emit_insn (gen_movdi (dst0, tmp0));
 	emit_insn (gen_movdi (dst1, tmp1));
-	res = get_insns ();
-	end_sequence ();
+	res = end_sequence ();
       }
       break;
     case E_BImode:
@@ -2110,8 +2112,7 @@ nvptx_gen_shuffle (rtx dst, rtx src, rtx idx, nvptx_shuffle_kind kind)
 	emit_insn (gen_sel_truesi (tmp, src, GEN_INT (1), const0_rtx));
 	emit_insn (nvptx_gen_shuffle (tmp, tmp, idx, kind));
 	emit_insn (gen_rtx_SET (dst, gen_rtx_NE (BImode, tmp, const0_rtx)));
-	res = get_insns ();
-	end_sequence ();
+	res = end_sequence ();
       }
       break;
     case E_QImode:
@@ -2124,8 +2125,7 @@ nvptx_gen_shuffle (rtx dst, rtx src, rtx idx, nvptx_shuffle_kind kind)
 	emit_insn (nvptx_gen_shuffle (tmp, tmp, idx, kind));
 	emit_insn (gen_rtx_SET (dst, gen_rtx_fmt_e (TRUNCATE, GET_MODE (dst),
 						    tmp)));
-	res = get_insns ();
-	end_sequence ();
+	res = end_sequence ();
       }
       break;
 
@@ -2188,8 +2188,7 @@ nvptx_gen_shared_bcast (rtx reg, propagate_mask pm, unsigned rep,
 	emit_insn (nvptx_gen_shared_bcast (tmp, pm, rep, data, vector));
 	if (pm & PM_write)
 	  emit_insn (gen_rtx_SET (reg, gen_rtx_NE (BImode, tmp, const0_rtx)));
-	res = get_insns ();
-	end_sequence ();
+	res = end_sequence ();
       }
       break;
 
@@ -2225,8 +2224,7 @@ nvptx_gen_shared_bcast (rtx reg, propagate_mask pm, unsigned rep,
 	    emit_insn (res);
 	    emit_insn (gen_adddi3 (data->ptr, data->ptr,
 				   GEN_INT (GET_MODE_SIZE (GET_MODE (reg)))));
-	    res = get_insns ();
-	    end_sequence ();
+	    res = end_sequence ();
 	  }
 	else
 	  rep = 1;
@@ -4597,8 +4595,7 @@ nvptx_propagate (bool is_call, basic_block block, rtx_insn *insn,
 	}
       emit_insn (gen_rtx_CLOBBER (GET_MODE (tmp), tmp));
       emit_insn (gen_rtx_CLOBBER (GET_MODE (ptr), ptr));
-      rtx cpy = get_insns ();
-      end_sequence ();
+      rtx cpy = end_sequence ();
       insn = emit_insn_after (cpy, insn);
     }
 
@@ -5603,8 +5600,7 @@ workaround_uninit_method_1 (void)
       if (nvptx_comment && first != NULL)
 	emit_insn (gen_comment ("Start: Added by -minit-regs=1"));
       emit_move_insn (reg, CONST0_RTX (GET_MODE (reg)));
-      rtx_insn *inits = get_insns ();
-      end_sequence ();
+      rtx_insn *inits = end_sequence ();
 
       if (dump_file && (dump_flags & TDF_DETAILS))
 	for (rtx_insn *init = inits; init != NULL; init = NEXT_INSN (init))
@@ -5660,8 +5656,7 @@ workaround_uninit_method_2 (void)
       if (nvptx_comment && first != NULL)
 	emit_insn (gen_comment ("Start: Added by -minit-regs=2:"));
       emit_move_insn (reg, CONST0_RTX (GET_MODE (reg)));
-      rtx_insn *inits = get_insns ();
-      end_sequence ();
+      rtx_insn *inits = end_sequence ();
 
       if (dump_file && (dump_flags & TDF_DETAILS))
 	for (rtx_insn *init = inits; init != NULL; init = NEXT_INSN (init))
@@ -5731,8 +5726,7 @@ workaround_uninit_method_3 (void)
 
 	      start_sequence ();
 	      emit_move_insn (reg, CONST0_RTX (GET_MODE (reg)));
-	      rtx_insn *inits = get_insns ();
-	      end_sequence ();
+	      rtx_insn *inits = end_sequence ();
 
 	      if (dump_file && (dump_flags & TDF_DETAILS))
 		for (rtx_insn *init = inits; init != NULL;
@@ -5763,8 +5757,7 @@ workaround_uninit_method_3 (void)
 	    emit_insn (gen_comment ("Start: Added by -minit-regs=3:"));
 	    emit_insn (e->insns.r);
 	    emit_insn (gen_comment ("End: Added by -minit-regs=3:"));
-	    e->insns.r = get_insns ();
-	    end_sequence ();
+	    e->insns.r = end_sequence ();
 	  }
       }
 
@@ -7905,8 +7898,6 @@ nvptx_asm_output_def_from_decls (FILE *stream, tree name,
 #define TARGET_ASM_DECLARE_CONSTANT_NAME nvptx_asm_declare_constant_name
 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
-#undef TARGET_ASM_NEED_VAR_DECL_BEFORE_USE
-#define TARGET_ASM_NEED_VAR_DECL_BEFORE_USE true
 
 #undef TARGET_MACHINE_DEPENDENT_REORG
 #define TARGET_MACHINE_DEPENDENT_REORG nvptx_reorg
diff --git a/gcc/config/nvptx/nvptx.h b/gcc/config/nvptx/nvptx.h
index 35ef4bd..a2bb2fb 100644
--- a/gcc/config/nvptx/nvptx.h
+++ b/gcc/config/nvptx/nvptx.h
@@ -101,6 +101,7 @@
    PTX ISA Version 3.1.  */
 #define TARGET_PTX_4_1 (ptx_version_option >= PTX_VERSION_4_1)
 #define TARGET_PTX_4_2 (ptx_version_option >= PTX_VERSION_4_2)
+#define TARGET_PTX_5_0 (ptx_version_option >= PTX_VERSION_5_0)
 #define TARGET_PTX_6_0 (ptx_version_option >= PTX_VERSION_6_0)
 #define TARGET_PTX_6_3 (ptx_version_option >= PTX_VERSION_6_3)
 #define TARGET_PTX_7_0 (ptx_version_option >= PTX_VERSION_7_0)
diff --git a/gcc/config/nvptx/nvptx.opt b/gcc/config/nvptx/nvptx.opt
index ce9fbc7..9796839 100644
--- a/gcc/config/nvptx/nvptx.opt
+++ b/gcc/config/nvptx/nvptx.opt
@@ -88,10 +88,10 @@ march-map=sm_60
 Target RejectNegative Alias(misa=,sm_53)
 
 march-map=sm_61
-Target RejectNegative Alias(misa=,sm_53)
+Target RejectNegative Alias(misa=,sm_61)
 
 march-map=sm_62
-Target RejectNegative Alias(misa=,sm_53)
+Target RejectNegative Alias(misa=,sm_61)
 
 march-map=sm_70
 Target RejectNegative Alias(misa=,sm_70)
@@ -120,6 +120,51 @@ Target RejectNegative Alias(misa=,sm_89)
 march-map=sm_90a
 Target RejectNegative Alias(misa=,sm_89)
 
+march-map=sm_100
+Target RejectNegative Alias(misa=,sm_89)
+
+march-map=sm_100f
+Target RejectNegative Alias(misa=,sm_89)
+
+march-map=sm_100a
+Target RejectNegative Alias(misa=,sm_89)
+
+march-map=sm_101
+Target RejectNegative Alias(misa=,sm_89)
+
+march-map=sm_101f
+Target RejectNegative Alias(misa=,sm_89)
+
+march-map=sm_101a
+Target RejectNegative Alias(misa=,sm_89)
+
+march-map=sm_103
+Target RejectNegative Alias(misa=,sm_89)
+
+march-map=sm_103f
+Target RejectNegative Alias(misa=,sm_89)
+
+march-map=sm_103a
+Target RejectNegative Alias(misa=,sm_89)
+
+march-map=sm_120
+Target RejectNegative Alias(misa=,sm_89)
+
+march-map=sm_120f
+Target RejectNegative Alias(misa=,sm_89)
+
+march-map=sm_120a
+Target RejectNegative Alias(misa=,sm_89)
+
+march-map=sm_121
+Target RejectNegative Alias(misa=,sm_89)
+
+march-map=sm_121f
+Target RejectNegative Alias(misa=,sm_89)
+
+march-map=sm_121a
+Target RejectNegative Alias(misa=,sm_89)
+
 Enum
 Name(ptx_version) Type(enum ptx_version)
 Known PTX ISA versions (for use with the -mptx= option):
@@ -134,6 +179,9 @@ EnumValue
 Enum(ptx_version) String(4.2) Value(PTX_VERSION_4_2)
 
 EnumValue
+Enum(ptx_version) String(5.0) Value(PTX_VERSION_5_0)
+
+EnumValue
 Enum(ptx_version) String(6.0) Value(PTX_VERSION_6_0)
 
 EnumValue
diff --git a/gcc/config/or1k/or1k.cc b/gcc/config/or1k/or1k.cc
index aa486aa..868df67 100644
--- a/gcc/config/or1k/or1k.cc
+++ b/gcc/config/or1k/or1k.cc
@@ -460,8 +460,7 @@ or1k_init_pic_reg (void)
       cfun->machine->set_got_insn =
 	emit_insn (gen_set_got_tmp (pic_offset_table_rtx));
 
-      rtx_insn *seq = get_insns ();
-      end_sequence ();
+      rtx_insn *seq = end_sequence ();
 
       edge entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
       insert_insn_on_edge (seq, entry_edge);
@@ -1409,8 +1408,9 @@ static bool
 or1k_can_change_mode_class (machine_mode from, machine_mode to,
 			    reg_class_t rclass)
 {
+  /* Allow cnoverting special flags to SI mode subregs.  */
   if (rclass == FLAG_REGS)
-    return from == to;
+    return from == to || (from == BImode && to == SImode);
   return true;
 }
 
@@ -1654,6 +1654,63 @@ or1k_rtx_costs (rtx x, machine_mode mode, int outer_code, int /* opno */,
 #undef TARGET_RTX_COSTS
 #define TARGET_RTX_COSTS or1k_rtx_costs
 
+static bool
+or1k_is_cmov_insn (rtx_insn *seq)
+{
+  rtx_insn *curr_insn = seq;
+  rtx set = NULL_RTX;
+
+  /* The pattern may start with a simple set with register operands.  Skip
+     through any of those.  */
+  while (curr_insn)
+    {
+      set = single_set (curr_insn);
+      if (!set
+	  || !REG_P (SET_DEST (set)))
+	return false;
+
+      /* If it's not a simple reg or immediate break.  */
+      if (REG_P (SET_SRC (set)) || CONST_INT_P (SET_SRC (set)))
+	curr_insn = NEXT_INSN (curr_insn);
+      else
+	break;
+    }
+
+  if (!curr_insn)
+    return false;
+
+  /* The next instruction should be a compare.  OpenRISC has many operators used
+     for comparison so skip and confirm the next is IF_THEN_ELSE.  */
+  curr_insn = NEXT_INSN (curr_insn);
+  if (!curr_insn)
+    return false;
+
+  /* And the last instruction should be an IF_THEN_ELSE.  */
+  set = single_set (curr_insn);
+  if (!set
+      || !REG_P (SET_DEST (set))
+      || GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
+    return false;
+
+  return !NEXT_INSN (curr_insn);
+}
+
+/* Implement TARGET_NOCE_CONVERSION_PROFITABLE_P.  We detect if the conversion
+   resulted in a l.cmov instruction and if so we consider it more profitable than
+   branch instructions.  */
+
+static bool
+or1k_noce_conversion_profitable_p (rtx_insn *seq,
+				    struct noce_if_info *if_info)
+{
+  if (TARGET_CMOV)
+    return or1k_is_cmov_insn (seq);
+
+  return default_noce_conversion_profitable_p (seq, if_info);
+}
+
+#undef TARGET_NOCE_CONVERSION_PROFITABLE_P
+#define TARGET_NOCE_CONVERSION_PROFITABLE_P or1k_noce_conversion_profitable_p
 
 /* A subroutine of the atomic operation splitters.  Jump to LABEL if
    COND is true.  Mark the jump as unlikely to be taken.  */
diff --git a/gcc/config/or1k/or1k.md b/gcc/config/or1k/or1k.md
index 627e400..bf71253 100644
--- a/gcc/config/or1k/or1k.md
+++ b/gcc/config/or1k/or1k.md
@@ -515,6 +515,31 @@
 	(ne:SI (reg:BI SR_F_REGNUM) (const_int 0)))]
   "")
 
+;; Allowing "extending" the BImode SR_F to a general register
+;; avoids 'convert_mode_scalar' from trying to do subregging
+;; which we don't have support for.
+;; We require signed and unsigned extend instructions because
+;; signed comparisons require signed extention, but for SR_F
+;; it doesn't matter.
+
+(define_expand "zero_extendbisi2_sr_f"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extend:SI (match_operand:BI 1 "sr_f_reg_operand" "")))]
+  ""
+{
+  emit_insn(gen_sne_sr_f (operands[0]));
+  DONE;
+})
+
+(define_expand "extendbisi2_sr_f"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:BI 1 "sr_f_reg_operand" "")))]
+  ""
+{
+  emit_insn(gen_sne_sr_f (operands[0]));
+  DONE;
+})
+
 (define_insn_and_split "*scc"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(match_operator:SI 1 "equality_comparison_operator"
@@ -584,7 +609,7 @@
 ;; Branch instructions
 ;; -------------------------------------------------------------------------
 
-(define_expand "cbranchsi4"
+(define_insn_and_split "cbranchsi4"
   [(set (pc)
 	(if_then_else
 	  (match_operator 0 "comparison_operator"
@@ -593,13 +618,27 @@
 	  (label_ref (match_operand 3 "" ""))
 	  (pc)))]
   ""
+  "#"
+  "&& 1"
+  [(const_int 0)]
 {
+  rtx label;
+
+  /* Generate *scc */
   or1k_expand_compare (operands);
+  /* Generate *cbranch */
+  label = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
+  emit_jump_insn (gen_rtx_SET (pc_rtx,
+			       gen_rtx_IF_THEN_ELSE (VOIDmode,
+						     operands[0],
+						     label,
+						     pc_rtx)));
+  DONE;
 })
 
 ;; Support FP branching
 
-(define_expand "cbranch<F:mode>4"
+(define_insn_and_split "cbranch<F:mode>4"
   [(set (pc)
 	(if_then_else
 	  (match_operator 0 "fp_comparison_operator"
@@ -608,8 +647,22 @@
 	  (label_ref (match_operand 3 "" ""))
 	  (pc)))]
   "TARGET_HARD_FLOAT"
+  "#"
+  "&& 1"
+  [(const_int 0)]
 {
+  rtx label;
+
+  /* Generate *scc */
   or1k_expand_compare (operands);
+  /* Generate *cbranch */
+  label = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
+  emit_jump_insn (gen_rtx_SET (pc_rtx,
+			       gen_rtx_IF_THEN_ELSE (VOIDmode,
+						     operands[0],
+						     label,
+						     pc_rtx)));
+  DONE;
 })
 
 (define_insn "*cbranch"
diff --git a/gcc/config/or1k/or1k.opt b/gcc/config/or1k/or1k.opt
index 00c5560..d252de0 100644
--- a/gcc/config/or1k/or1k.opt
+++ b/gcc/config/or1k/or1k.opt
@@ -69,8 +69,8 @@ are used to perform unordered floating point compare and set flag operations.
 mcmodel=
 Target RejectNegative Joined Enum(or1k_cmodel_type) Var(or1k_code_model) Init(CMODEL_SMALL)
 Specify the code model used for accessing memory addresses.  Specifying large
-enables generating binaries with large global offset tables.  By default the
-value is small.
+enables generating binaries with large global offset tables and calling
+functions anywhere in an executable.  By default the value is small.
 
 Enum
 Name(or1k_cmodel_type) Type(enum or1k_cmodel_type)
diff --git a/gcc/config/or1k/predicates.md b/gcc/config/or1k/predicates.md
index 11bb518..7ccfd09 100644
--- a/gcc/config/or1k/predicates.md
+++ b/gcc/config/or1k/predicates.md
@@ -60,8 +60,13 @@
     (and (match_operand 0 "register_operand")
 	 (match_test "TARGET_ROR"))))
 
+(define_predicate "sr_f_reg_operand"
+  (and (match_operand 0 "register_operand")
+       (match_test "REGNO (op) == SR_F_REGNUM")))
+
 (define_predicate "call_insn_operand"
-  (ior (match_code "symbol_ref")
+  (ior (and (match_code "symbol_ref")
+	    (match_test "!TARGET_CMODEL_LARGE"))
        (match_operand 0 "register_operand")))
 
 (define_predicate "high_operand"
diff --git a/gcc/config/pa/pa-hpux.h b/gcc/config/pa/pa-hpux.h
index 74e30ed..1439447 100644
--- a/gcc/config/pa/pa-hpux.h
+++ b/gcc/config/pa/pa-hpux.h
@@ -114,3 +114,17 @@ along with GCC; see the file COPYING3.  If not see
 
 #undef TARGET_LIBC_HAS_FUNCTION
 #define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
+
+/* Assume we have libatomic if sync libcalls are disabled.  */
+#undef TARGET_HAVE_LIBATOMIC
+#define TARGET_HAVE_LIBATOMIC (!flag_sync_libcalls)
+
+/* The SYNC operations are implemented as library functions, not
+   INSN patterns.  As a result, the HAVE defines for the patterns are
+   not defined.  We need to define them to generate the corresponding
+   __GCC_HAVE_SYNC_COMPARE_AND_SWAP_* and __GCC_ATOMIC_*_LOCK_FREE
+   defines.  */
+#define HAVE_sync_compare_and_swapqi (flag_sync_libcalls)
+#define HAVE_sync_compare_and_swaphi (flag_sync_libcalls)
+#define HAVE_sync_compare_and_swapsi (flag_sync_libcalls)
+#define HAVE_sync_compare_and_swapdi (flag_sync_libcalls)
diff --git a/gcc/config/pa/pa.cc b/gcc/config/pa/pa.cc
index 9542d3b..b63ccf1 100644
--- a/gcc/config/pa/pa.cc
+++ b/gcc/config/pa/pa.cc
@@ -1123,8 +1123,7 @@ legitimize_tls_address (rtx addr)
 	else
 	  emit_insn (gen_tld_load (tmp, addr));
 	t1 = hppa_tls_call (tmp);
-	insn = get_insns ();
-	end_sequence ();
+	insn = end_sequence ();
 	t2 = gen_reg_rtx (Pmode);
 	emit_libcall_block (insn, t2, t1,
 			    gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
diff --git a/gcc/config/pru/pru-pragma.cc b/gcc/config/pru/pru-pragma.cc
index c3f3d33..9338780 100644
--- a/gcc/config/pru/pru-pragma.cc
+++ b/gcc/config/pru/pru-pragma.cc
@@ -46,21 +46,24 @@ pru_pragma_ctable_entry (cpp_reader *)
   enum cpp_ttype type;
 
   type = pragma_lex (&ctable_index);
-  if (type == CPP_NUMBER && tree_fits_uhwi_p (ctable_index))
+  if (type == CPP_NUMBER && tree_fits_shwi_p (ctable_index))
     {
       type = pragma_lex (&base_addr);
-      if (type == CPP_NUMBER  && tree_fits_uhwi_p (base_addr))
+      if (type == CPP_NUMBER && tree_fits_shwi_p (base_addr))
 	{
-	  unsigned HOST_WIDE_INT i = tree_to_uhwi (ctable_index);
-	  unsigned HOST_WIDE_INT base = tree_to_uhwi (base_addr);
+	  HOST_WIDE_INT i = tree_to_shwi (ctable_index);
+	  HOST_WIDE_INT base = sext_hwi (tree_to_shwi (base_addr),
+					 POINTER_SIZE);
 
 	  type = pragma_lex (&base_addr);
 	  if (type != CPP_EOF)
 	    error ("junk at end of %<#pragma CTABLE_ENTRY%>");
-	  else if (i >= ARRAY_SIZE (pru_ctable))
+	  else if (!IN_RANGE (i, 0, ARRAY_SIZE (pru_ctable) - 1))
 	    error ("%<CTABLE_ENTRY%> index %wd is not valid", i);
 	  else if (pru_ctable[i].valid && pru_ctable[i].base != base)
 	    error ("redefinition of %<CTABLE_ENTRY %wd%>", i);
+	  else if (!IN_RANGE (base, INT32_MIN, INT32_MAX))
+	    error ("%<CTABLE_ENTRY%> base address does not fit in 32-bits");
 	  else
 	    {
 	      if (base & 0xff)
diff --git a/gcc/config/pru/pru-protos.h b/gcc/config/pru/pru-protos.h
index c73fad8..4750f0e 100644
--- a/gcc/config/pru/pru-protos.h
+++ b/gcc/config/pru/pru-protos.h
@@ -23,7 +23,7 @@
 
 struct pru_ctable_entry {
     bool valid;
-    unsigned HOST_WIDE_INT base;
+    HOST_WIDE_INT base;
 };
 
 extern struct pru_ctable_entry pru_ctable[32];
@@ -66,9 +66,9 @@ pru_regno_ok_for_index_p (int regno, bool strict_p)
   return pru_regno_ok_for_base_p (regno, strict_p);
 }
 
-extern int pru_get_ctable_exact_base_index (unsigned HOST_WIDE_INT caddr);
-extern int pru_get_ctable_base_index (unsigned HOST_WIDE_INT caddr);
-extern int pru_get_ctable_base_offset (unsigned HOST_WIDE_INT caddr);
+extern int pru_get_ctable_exact_base_index (HOST_WIDE_INT caddr);
+extern int pru_get_ctable_base_index (HOST_WIDE_INT caddr);
+extern int pru_get_ctable_base_offset (HOST_WIDE_INT caddr);
 
 extern int pru_symref2ioregno (rtx op);
 
diff --git a/gcc/config/pru/pru.cc b/gcc/config/pru/pru.cc
index 6319108..3fdc56e 100644
--- a/gcc/config/pru/pru.cc
+++ b/gcc/config/pru/pru.cc
@@ -941,10 +941,19 @@ pru_init_libfuncs (void)
 
   /* Long long.  */
   set_optab_libfunc (ashr_optab, DImode, "__pruabi_asrll");
-  set_optab_libfunc (smul_optab, DImode, "__pruabi_mpyll");
   set_optab_libfunc (ashl_optab, DImode, "__pruabi_lslll");
   set_optab_libfunc (lshr_optab, DImode, "__pruabi_lsrll");
 
+  if (TARGET_OPT_MUL)
+    {
+      set_optab_libfunc (smul_optab, DImode, "__pruabi_mpyll");
+    }
+  else
+    {
+      set_optab_libfunc (smul_optab, DImode, "__pruabi_softmpyll");
+      set_optab_libfunc (smul_optab, SImode, "__pruabi_softmpyi");
+    }
+
   set_optab_libfunc (sdiv_optab, SImode, "__pruabi_divi");
   set_optab_libfunc (udiv_optab, SImode, "__pruabi_divu");
   set_optab_libfunc (smod_optab, SImode, "__pruabi_remi");
@@ -1040,8 +1049,7 @@ pru_expand_fp_compare (rtx comparison, machine_mode mode)
 
   cmp = emit_library_call_value (libfunc, 0, LCT_CONST, SImode,
 				 op0, op_mode, op1, op_mode);
-  insns = get_insns ();
-  end_sequence ();
+  insns = end_sequence ();
 
   emit_libcall_block (insns, cmp, cmp,
 		      gen_rtx_fmt_ee (code, SImode, op0, op1));
@@ -1429,7 +1437,7 @@ pru_valid_const_ubyte_offset (machine_mode mode, HOST_WIDE_INT offset)
 /* Recognize a CTABLE base address.  Return CTABLE entry index, or -1 if
    base was not found in the pragma-filled pru_ctable.  */
 int
-pru_get_ctable_exact_base_index (unsigned HOST_WIDE_INT caddr)
+pru_get_ctable_exact_base_index (HOST_WIDE_INT caddr)
 {
   unsigned int i;
 
@@ -1445,7 +1453,7 @@ pru_get_ctable_exact_base_index (unsigned HOST_WIDE_INT caddr)
 /* Check if the given address can be addressed via CTABLE_BASE + UBYTE_OFFS,
    and return the base CTABLE index if possible.  */
 int
-pru_get_ctable_base_index (unsigned HOST_WIDE_INT caddr)
+pru_get_ctable_base_index (HOST_WIDE_INT caddr)
 {
   unsigned int i;
 
@@ -1462,7 +1470,7 @@ pru_get_ctable_base_index (unsigned HOST_WIDE_INT caddr)
 
 /* Return the offset from some CTABLE base for this address.  */
 int
-pru_get_ctable_base_offset (unsigned HOST_WIDE_INT caddr)
+pru_get_ctable_base_offset (HOST_WIDE_INT caddr)
 {
   int i;
 
@@ -2005,7 +2013,7 @@ pru_print_operand_address (FILE *file, machine_mode mode, rtx op)
 
     case CONST_INT:
       {
-	unsigned HOST_WIDE_INT caddr = INTVAL (op);
+	HOST_WIDE_INT caddr = INTVAL (op);
 	int base = pru_get_ctable_base_index (caddr);
 	int offs = pru_get_ctable_base_offset (caddr);
 	if (base < 0)
@@ -2919,8 +2927,7 @@ pru_reorg_loop (rtx_insn *insns)
 	    LABEL_NUSES (end->label)++;
 
 	    /* Emit the whole sequence before the doloop_end.  */
-	    insn = get_insns ();
-	    end_sequence ();
+	    insn = end_sequence ();
 	    emit_insn_before (insn, end->insn);
 
 	    /* Delete the doloop_end.  */
diff --git a/gcc/config/pru/pru.h b/gcc/config/pru/pru.h
index 6c0719b..9d547ed 100644
--- a/gcc/config/pru/pru.h
+++ b/gcc/config/pru/pru.h
@@ -65,6 +65,9 @@
 #undef  ENDFILE_SPEC
 #define ENDFILE_SPEC "%{!mabi=ti:-lgloss} "
 
+#undef  MULTILIB_DEFAULTS
+#define MULTILIB_DEFAULTS { "mloop", "mmul", "mfillzero" }
+
 /* TI ABI mandates that ELF symbols do not start with any prefix.  */
 #undef USER_LABEL_PREFIX
 #define USER_LABEL_PREFIX ""
diff --git a/gcc/config/pru/pru.md b/gcc/config/pru/pru.md
index fcd3106..b8ef55b 100644
--- a/gcc/config/pru/pru.md
+++ b/gcc/config/pru/pru.md
@@ -215,7 +215,7 @@
     mov\\t%0, %1
     ldi\\t%0, %%pmem(%1)
     ldi\\t%0, %1
-    fill\\t%0, 4
+    * return TARGET_OPT_FILLZERO ? \"fill\\t%0, 4\" : \"ldi32\\t%0, 0xffffffff\";
     ldi32\\t%0, %1"
   [(set_attr "type" "st,ld,alu,alu,alu,alu,alu")
    (set_attr "length" "4,4,4,4,4,4,8")])
@@ -259,9 +259,11 @@
     case 1:
       return "lb%B1o\\t%b0, %1, %S1";
     case 2:
-      return "zero\\t%F0, 8";
+      return TARGET_OPT_FILLZERO ? "zero\\t%F0, 8"
+				 : "ldi\\t%F0, 0\;ldi\\t%N0, 0";
     case 3:
-      return "fill\\t%F0, 8";
+      return TARGET_OPT_FILLZERO ? "fill\\t%F0, 8"
+				 : "ldi32\\t%F0, 0xffffffff\;mov\\t%N0, %F0";
     case 4:
       /* careful with overlapping source and destination regs.  */
       gcc_assert (GP_REG_P (REGNO (operands[0])));
@@ -283,6 +285,83 @@
   [(set_attr "type" "st,ld,alu,alu,alu,alu,alu,alu")
    (set_attr "length" "4,4,4,4,8,8,8,16")])
 
+; Break 64-bit register-to-register moves into 32-bit moves.
+; If only a subreg of the destination is used, this split would allow
+; for the other 32-bit subreg of the DI register to be eliminated.
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(match_operand:DI 1 "register_operand"))]
+  "
+   /* TODO - LRA does not yet handle subregs efficiently.
+      So it is profitable to split only after register allocation is
+      complete.
+      Once https://gcc.gnu.org/pipermail/gcc-patches/2024-May/651366.html
+      is merged, this condition should be removed to allow splitting
+      before LRA.  */
+   reload_completed
+   /* Sign-extended paradoxical registers require expansion
+      of the proper pattern.  We can do only zero extension here.  */
+   && (SUBREG_P (operands[1]) && paradoxical_subreg_p (operands[1])
+	? SUBREG_PROMOTED_VAR_P (operands[1])
+	  && SUBREG_PROMOTED_UNSIGNED_P (operands[1]) > 0
+	: true)"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (match_dup 3))]
+  "
+  rtx dst_lo = simplify_gen_subreg (SImode, operands[0], DImode, 0);
+  rtx dst_hi = simplify_gen_subreg (SImode, operands[0], DImode, 4);
+  rtx src_lo = simplify_gen_subreg (SImode, operands[1], DImode, 0);
+  rtx src_hi = simplify_gen_subreg (SImode, operands[1], DImode, 4);
+
+  if (SUBREG_P (operands[1]) && paradoxical_subreg_p (operands[1]))
+    {
+      gcc_assert (SUBREG_PROMOTED_VAR_P (operands[1]));
+      gcc_assert (SUBREG_PROMOTED_UNSIGNED_P (operands[1]) > 0);
+
+      operands[0] = dst_lo;
+      operands[1] = src_lo;
+      operands[2] = dst_hi;
+      operands[3] = const0_rtx;
+    }
+  else if (!reg_overlap_mentioned_p (dst_lo, src_hi))
+    {
+      operands[0] = dst_lo;
+      operands[1] = src_lo;
+      operands[2] = dst_hi;
+      operands[3] = src_hi;
+    }
+  else
+    {
+      operands[0] = dst_hi;
+      operands[1] = src_hi;
+      operands[2] = dst_lo;
+      operands[3] = src_lo;
+    }
+  "
+)
+
+; Break loading of non-trivial 64-bit constant integers.  The split
+; will not generate better code sequence, but at least would allow
+; dropping a non-live 32-bit part of the destination, or better
+; constant propagation.
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(match_operand:DI 1 "const_int_operand"))]
+  "reload_completed
+   && !satisfies_constraint_Z (operands[1])
+   && !satisfies_constraint_Um (operands[1])
+   && !satisfies_constraint_T (operands[1])"
+
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (match_dup 3))]
+  "
+  operands[2] = simplify_gen_subreg (SImode, operands[0], DImode, 4);
+  operands[3] = simplify_gen_subreg (SImode, operands[1], DImode, 4);;
+  operands[0] = simplify_gen_subreg (SImode, operands[0], DImode, 0);
+  operands[1] = simplify_gen_subreg (SImode, operands[1], DImode, 0);
+  "
+)
+
 ;
 ; load_multiple pattern(s).
 ;
@@ -425,7 +504,7 @@
 (define_insn "zero_extendqidi2"
   [(set (match_operand:DI 0 "register_operand" "=r,r")
 	(zero_extend:DI (match_operand:QI 1 "register_operand" "0,r")))]
-  ""
+  "TARGET_OPT_FILLZERO"
   "@
     zero\\t%F0.b1, 7
     mov\\t%F0.b0, %1\;zero\\t%F0.b1, 7"
@@ -435,7 +514,7 @@
 (define_insn "zero_extendhidi2"
   [(set (match_operand:DI 0 "register_operand" "=r,r")
 	(zero_extend:DI (match_operand:HI 1 "register_operand" "0,r")))]
-  ""
+  "TARGET_OPT_FILLZERO"
   "@
     zero\\t%F0.b2, 6
     mov\\t%F0.w0, %1\;zero\\t%F0.b2, 6"
@@ -445,7 +524,7 @@
 (define_insn "zero_extendsidi2"
   [(set (match_operand:DI 0 "register_operand" "=r,r")
 	(zero_extend:DI (match_operand:SI 1 "register_operand" "0,r")))]
-  ""
+  "TARGET_OPT_FILLZERO"
   "@
     zero\\t%N0, 4
     mov\\t%F0, %1\;zero\\t%N0, 4"
@@ -458,7 +537,7 @@
 (define_expand "extend<EQS0:mode><EQDHIDI:mode>2"
   [(set (match_operand:EQDHIDI 0 "register_operand" "=r")
 	(sign_extend:EQDHIDI (match_operand:EQS0 1 "register_operand" "r")))]
-  ""
+  "TARGET_OPT_FILLZERO"
 {
   rtx_code_label *skip_hiset_label;
 
@@ -667,7 +746,7 @@
 	(ior:HIDI
 	   (match_operand:HIDI 1 "register_operand" "0")
 	   (match_operand:HIDI 2 "const_fillbytes_operand" "Uf")))]
-  ""
+  "TARGET_OPT_FILLZERO"
 {
   static char line[64];
   pru_byterange r;
@@ -690,7 +769,7 @@
 	(and:HIDI
 	   (match_operand:HIDI 1 "register_operand" "0")
 	   (match_operand:HIDI 2 "const_zerobytes_operand" "Uz")))]
-  ""
+  "TARGET_OPT_FILLZERO"
 {
   static char line[64];
   pru_byterange r;
@@ -1037,7 +1116,8 @@
   /* Try with the more efficient zero/fill patterns first.  */
   if (<LOGICAL_BITOP:CODE> == IOR
       && CONST_INT_P (operands[2])
-      && const_fillbytes_operand (operands[2], DImode))
+      && const_fillbytes_operand (operands[2], DImode)
+      && TARGET_OPT_FILLZERO)
     {
       rtx insn = maybe_gen_pru_ior_fillbytes (DImode,
 					      operands[0],
@@ -1053,7 +1133,8 @@
     }
   if (<LOGICAL_BITOP:CODE> == AND
       && CONST_INT_P (operands[2])
-      && const_zerobytes_operand (operands[2], DImode))
+      && const_zerobytes_operand (operands[2], DImode)
+      && TARGET_OPT_FILLZERO)
     {
       rtx insn = maybe_gen_pru_and_zerobytes (DImode,
 					      operands[0],
@@ -1135,7 +1216,7 @@
   [(set (match_operand:SI 0 "pru_muldst_operand"	   "=Rmd0")
 	(mult:SI (match_operand:SI 1 "pru_mulsrc0_operand" "%Rms0")
 		 (match_operand:SI 2 "pru_mulsrc1_operand" "Rms1")))]
-  ""
+  "TARGET_OPT_MUL"
   "nop\;xin\\t0, %0, 4"
   [(set_attr "type" "alu")
    (set_attr "length" "8")])
diff --git a/gcc/config/pru/pru.opt b/gcc/config/pru/pru.opt
index 8385beb..5206b2a 100644
--- a/gcc/config/pru/pru.opt
+++ b/gcc/config/pru/pru.opt
@@ -39,6 +39,14 @@ mloop
 Target Mask(OPT_LOOP)
 Allow (or do not allow) gcc to use the LOOP instruction.
 
+mmul
+Target Mask(OPT_MUL)
+Allow (or do not allow) gcc to use the PRU multiplier unit.
+
+mfillzero
+Target Mask(OPT_FILLZERO)
+Allow (or do not allow) gcc to use the FILL and ZERO instructions.
+
 mabi=
 Target RejectNegative Joined Enum(pru_abi_t) Var(pru_current_abi) Init(PRU_ABI_GNU) Save
 Select target ABI variant.
diff --git a/gcc/config/pru/pru.opt.urls b/gcc/config/pru/pru.opt.urls
index c87affb..5c57892 100644
--- a/gcc/config/pru/pru.opt.urls
+++ b/gcc/config/pru/pru.opt.urls
@@ -12,6 +12,12 @@ UrlSuffix(gcc/PRU-Options.html#index-mno-relax-1)
 mloop
 UrlSuffix(gcc/PRU-Options.html#index-mloop)
 
+mmul
+UrlSuffix(gcc/PRU-Options.html#index-mmul)
+
+mfillzero
+UrlSuffix(gcc/PRU-Options.html#index-mfillzero)
+
 mabi=
 UrlSuffix(gcc/PRU-Options.html#index-mabi-4)
 
diff --git a/gcc/config/pru/t-multilib b/gcc/config/pru/t-multilib
new file mode 100644
index 0000000..1e3c2b8
--- /dev/null
+++ b/gcc/config/pru/t-multilib
@@ -0,0 +1,29 @@
+# Copyright (C) 2025 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 3, or (at your option) any later
+# version.
+#
+# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+MULTILIB_OPTIONS   =
+MULTILIB_OPTIONS  += mloop/mno-loop
+MULTILIB_OPTIONS  += mmul/mno-mul
+MULTILIB_OPTIONS  += mfillzero/mno-fillzero
+
+# Build two variants:
+#   - Newer PRU core versions, present in AM335x and later.
+#   - Older PRU core versions, present in AM18xx.
+MULTILIB_REQUIRED  =
+MULTILIB_REQUIRED += mloop/mmul/mfillzero
+MULTILIB_REQUIRED += mno-loop/mno-mul/mno-fillzero
diff --git a/gcc/config/riscv/arch-canonicalize b/gcc/config/riscv/arch-canonicalize
index fd55255..15a3985 100755
--- a/gcc/config/riscv/arch-canonicalize
+++ b/gcc/config/riscv/arch-canonicalize
@@ -20,77 +20,326 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-# TODO: Extract riscv_subset_t from riscv-common.cc and make it can be compiled
-#       standalone to replace this script, that also prevents us implementing
-#       that twice and keep sync again and again.
-
 from __future__ import print_function
 import sys
 import argparse
 import collections
 import itertools
+import re
+import os
 from functools import reduce
 
 SUPPORTED_ISA_SPEC = ["2.2", "20190608", "20191213"]
-CANONICAL_ORDER = "imafdgqlcbkjtpvn"
+CANONICAL_ORDER = "imafdqlcbkjtpvnh"
 LONG_EXT_PREFIXES = ['z', 's', 'h', 'x']
 
+def parse_define_riscv_ext(content):
+  """Parse DEFINE_RISCV_EXT macros using position-based parsing."""
+  extensions = []
+
+  # Find all DEFINE_RISCV_EXT blocks
+  pattern = r'DEFINE_RISCV_EXT\s*\('
+  matches = []
+
+  pos = 0
+  while True:
+    match = re.search(pattern, content[pos:])
+    if not match:
+      break
+
+    start_pos = pos + match.start()
+    paren_count = 0
+    current_pos = pos + match.end() - 1  # Start at the opening parenthesis
+
+    # Find the matching closing parenthesis
+    while current_pos < len(content):
+      if content[current_pos] == '(':
+        paren_count += 1
+      elif content[current_pos] == ')':
+        paren_count -= 1
+        if paren_count == 0:
+          break
+      current_pos += 1
+
+    if paren_count == 0:
+      # Extract the content inside parentheses
+      macro_content = content[pos + match.end():current_pos]
+      ext_data = parse_macro_arguments(macro_content)
+      if ext_data:
+        extensions.append(ext_data)
+
+    pos = current_pos + 1
+
+  return extensions
+
+def parse_macro_arguments(macro_content):
+  """Parse the arguments of a DEFINE_RISCV_EXT macro."""
+  # Remove comments /* ... */
+  cleaned_content = re.sub(r'/\*[^*]*\*/', '', macro_content)
+
+  # Split arguments by comma, but respect nested structures
+  args = []
+  current_arg = ""
+  paren_count = 0
+  brace_count = 0
+  in_string = False
+  escape_next = False
+
+  for char in cleaned_content:
+    if escape_next:
+      current_arg += char
+      escape_next = False
+      continue
+
+    if char == '\\':
+      escape_next = True
+      current_arg += char
+      continue
+
+    if char == '"' and not escape_next:
+      in_string = not in_string
+      current_arg += char
+      continue
+
+    if in_string:
+      current_arg += char
+      continue
+
+    if char == '(':
+      paren_count += 1
+    elif char == ')':
+      paren_count -= 1
+    elif char == '{':
+      brace_count += 1
+    elif char == '}':
+      brace_count -= 1
+    elif char == ',' and paren_count == 0 and brace_count == 0:
+      args.append(current_arg.strip())
+      current_arg = ""
+      continue
+
+    current_arg += char
+
+  # Add the last argument
+  if current_arg.strip():
+    args.append(current_arg.strip())
+
+  # We need at least 6 arguments to get DEP_EXTS (position 5)
+  if len(args) < 6:
+    return None
+
+  ext_name = args[0].strip()
+  dep_exts_arg = args[5].strip()  # DEP_EXTS is at position 5
+
+  # Parse dependency extensions from the DEP_EXTS argument
+  deps = parse_dep_exts(dep_exts_arg)
+
+  return {
+    'name': ext_name,
+    'dep_exts': deps
+  }
+
+def parse_dep_exts(dep_exts_str):
+  """Parse the DEP_EXTS argument to extract dependency list with conditions."""
+  # Remove outer parentheses if present
+  dep_exts_str = dep_exts_str.strip()
+  if dep_exts_str.startswith('(') and dep_exts_str.endswith(')'):
+    dep_exts_str = dep_exts_str[1:-1].strip()
+
+  # Remove outer braces if present
+  if dep_exts_str.startswith('{') and dep_exts_str.endswith('}'):
+    dep_exts_str = dep_exts_str[1:-1].strip()
+
+  if not dep_exts_str:
+    return []
+
+  deps = []
+
+  # First, find and process conditional dependencies
+  conditional_pattern = r'\{\s*"([^"]+)"\s*,\s*(\[.*?\]\s*\([^)]*\)\s*->\s*bool.*?)\}'
+  conditional_matches = []
+
+  for match in re.finditer(conditional_pattern, dep_exts_str, re.DOTALL):
+    ext_name = match.group(1)
+    condition_code = match.group(2)
+    deps.append({'ext': ext_name, 'type': 'conditional', 'condition': condition_code})
+    # The conditional_pattern RE matches only the first code block enclosed
+    # in braces.
+    #
+    # Extend the match to the condition block's closing brace, encompassing
+    # all code blocks,  by simply trying to match the numbers of opening
+    # and closing braces.  While crude, this avoids writing a complicated
+    # parse here.
+    closing_braces_left = condition_code.count('{') - condition_code.count('}')
+    condition_end = match.end()
+    while closing_braces_left > 0:
+      condition_end = dep_exts_str.find('}', condition_end)
+      closing_braces_left -= 1
+    conditional_matches.append((match.start(), condition_end))
+
+  # Remove conditional dependency blocks from the string
+  remaining_str = dep_exts_str
+  for start, end in reversed(conditional_matches):  # Reverse order to maintain indices
+    remaining_str = remaining_str[:start] + remaining_str[end:]
+
+  # Now handle simple quoted strings in the remaining text
+  for match in re.finditer(r'"([^"]+)"', remaining_str):
+    deps.append({'ext': match.group(1), 'type': 'simple'})
+
+  # Remove duplicates while preserving order
+  seen = set()
+  unique_deps = []
+  for dep in deps:
+    key = (dep['ext'], dep['type'])
+    if key not in seen:
+      seen.add(key)
+      unique_deps.append(dep)
+
+  return unique_deps
+
+def evaluate_conditional_dependency(ext, dep, xlen, current_exts):
+  """Evaluate whether a conditional dependency should be included."""
+  ext_name = dep['ext']
+  condition = dep['condition']
+  # Parse the condition based on known patterns
+  if ext_name == 'zcf' and ext in ['zca', 'c', 'zce']:
+    # zcf depends on RV32 and F extension
+    return xlen == 32 and 'f' in current_exts
+  elif ext_name == 'zcd' and ext in ['zca', 'c']:
+    # zcd depends on D extension
+    return 'd' in current_exts
+  elif ext_name == 'c' and ext in ['zca']:
+    # Special case for zca -> c conditional dependency
+    if xlen == 32:
+      if 'd' in current_exts:
+        return 'zcf' in current_exts and 'zcd' in current_exts
+      elif 'f' in current_exts:
+        return 'zcf' in current_exts
+      else:
+        return True
+    elif xlen == 64:
+      if 'd' in current_exts:
+        return 'zcd' in current_exts
+      else:
+        return True
+    return False
+  else:
+    # Report error for unhandled conditional dependencies
+    import sys
+    print(f"ERROR: Unhandled conditional dependency: '{ext_name}' with condition:", file=sys.stderr)
+    print(f"  Condition code: {condition[:100]}...", file=sys.stderr)
+    print(f"  Current context: xlen={xlen}, exts={sorted(current_exts)}", file=sys.stderr)
+    # For now, return False to be safe
+    return False
+
+def resolve_dependencies(arch_parts, xlen):
+  """Resolve all dependencies including conditional ones."""
+  current_exts = set(arch_parts)
+  implied_deps = set()
+
+  # Keep resolving until no new dependencies are found
+  changed = True
+  while changed:
+    changed = False
+    new_deps = set()
+
+    for ext in current_exts | implied_deps:
+      if ext in IMPLIED_EXT:
+        for dep in IMPLIED_EXT[ext]:
+          if dep['type'] == 'simple':
+            if dep['ext'] not in current_exts and dep['ext'] not in implied_deps:
+              new_deps.add(dep['ext'])
+              changed = True
+          elif dep['type'] == 'conditional':
+            should_include = evaluate_conditional_dependency(ext, dep, xlen, current_exts | implied_deps)
+            if should_include:
+              if dep['ext'] not in current_exts and dep['ext'] not in implied_deps:
+                new_deps.add(dep['ext'])
+                changed = True
+
+    implied_deps.update(new_deps)
+
+  return implied_deps
+
+def parse_def_file(file_path, script_dir, processed_files=None, collect_all=False):
+  """Parse a single .def file and recursively process #include directives."""
+  if processed_files is None:
+    processed_files = set()
+
+  # Avoid infinite recursion
+  if file_path in processed_files:
+    return ({}, set()) if collect_all else {}
+  processed_files.add(file_path)
+
+  implied_ext = {}
+  all_extensions = set() if collect_all else None
+
+  if not os.path.exists(file_path):
+    return (implied_ext, all_extensions) if collect_all else implied_ext
+
+  with open(file_path, 'r') as f:
+    content = f.read()
+
+  # Process #include directives first
+  include_pattern = r'#include\s+"([^"]+)"'
+  includes = re.findall(include_pattern, content)
+
+  for include_file in includes:
+    include_path = os.path.join(script_dir, include_file)
+    if collect_all:
+      included_ext, included_all = parse_def_file(include_path, script_dir, processed_files, collect_all)
+      implied_ext.update(included_ext)
+      all_extensions.update(included_all)
+    else:
+      included_ext = parse_def_file(include_path, script_dir, processed_files, collect_all)
+      implied_ext.update(included_ext)
+
+  # Parse DEFINE_RISCV_EXT blocks using position-based parsing
+  parsed_exts = parse_define_riscv_ext(content)
+
+  for ext_data in parsed_exts:
+    ext_name = ext_data['name']
+    deps = ext_data['dep_exts']
+
+    if collect_all:
+      all_extensions.add(ext_name)
+
+    if deps:
+      implied_ext[ext_name] = deps
+
+  return (implied_ext, all_extensions) if collect_all else implied_ext
+
+def parse_def_files():
+  """Parse RISC-V extension definition files starting from riscv-ext.def."""
+  # Get directory containing this script
+  try:
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+  except NameError:
+    # When __file__ is not defined (e.g., interactive mode)
+    script_dir = os.getcwd()
+
+  # Start with the main definition file
+  main_def_file = os.path.join(script_dir, 'riscv-ext.def')
+  return parse_def_file(main_def_file, script_dir)
+
+def get_all_extensions():
+  """Get all supported extensions and their implied extensions."""
+  # Get directory containing this script
+  try:
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+  except NameError:
+    # When __file__ is not defined (e.g., interactive mode)
+    script_dir = os.getcwd()
+
+  # Start with the main definition file
+  main_def_file = os.path.join(script_dir, 'riscv-ext.def')
+  return parse_def_file(main_def_file, script_dir, collect_all=True)
+
 #
 # IMPLIED_EXT(ext) -> implied extension list.
+# This is loaded dynamically from .def files
 #
-IMPLIED_EXT = {
-  "d" : ["f", "zicsr"],
-
-  "a" : ["zaamo", "zalrsc"],
-  "zabha" : ["zaamo"],
-  "zacas" : ["zaamo"],
-
-  "f" : ["zicsr"],
-  "b" : ["zba", "zbb", "zbs"],
-  "zdinx" : ["zfinx", "zicsr"],
-  "zfinx" : ["zicsr"],
-  "zhinx" : ["zhinxmin", "zfinx", "zicsr"],
-  "zhinxmin" : ["zfinx", "zicsr"],
-
-  "zk" : ["zkn", "zkr", "zkt"],
-  "zkn" : ["zbkb", "zbkc", "zbkx", "zkne", "zknd", "zknh"],
-  "zks" : ["zbkb", "zbkc", "zbkx", "zksed", "zksh"],
-
-  "v" : ["zvl128b", "zve64d"],
-  "zve32x" : ["zvl32b"],
-  "zve64x" : ["zve32x", "zvl64b"],
-  "zve32f" : ["f", "zve32x"],
-  "zve64f" : ["f", "zve32f", "zve64x"],
-  "zve64d" : ["d", "zve64f"],
-
-  "zvl64b" : ["zvl32b"],
-  "zvl128b" : ["zvl64b"],
-  "zvl256b" : ["zvl128b"],
-  "zvl512b" : ["zvl256b"],
-  "zvl1024b" : ["zvl512b"],
-  "zvl2048b" : ["zvl1024b"],
-  "zvl4096b" : ["zvl2048b"],
-  "zvl8192b" : ["zvl4096b"],
-  "zvl16384b" : ["zvl8192b"],
-  "zvl32768b" : ["zvl16384b"],
-  "zvl65536b" : ["zvl32768b"],
-
-  "zvkn"   : ["zvkned", "zvknhb", "zvkb", "zvkt"],
-  "zvknc"  : ["zvkn", "zvbc"],
-  "zvkng"  : ["zvkn", "zvkg"],
-  "zvks"   : ["zvksed", "zvksh", "zvkb", "zvkt"],
-  "zvksc"  : ["zvks", "zvbc"],
-  "zvksg"  : ["zvks", "zvkg"],
-  "zvbb"   : ["zvkb"],
-  "zvbc"   : ["zve64x"],
-  "zvkb"   : ["zve32x"],
-  "zvkg"   : ["zve32x"],
-  "zvkned" : ["zve32x"],
-  "zvknha" : ["zve32x"],
-  "zvknhb" : ["zve64x"],
-  "zvksed" : ["zve32x"],
-  "zvksh"  : ["zve32x"],
-}
+IMPLIED_EXT = parse_def_files()
 
 def arch_canonicalize(arch, isa_spec):
   # TODO: Support extension version.
@@ -123,21 +372,31 @@ def arch_canonicalize(arch, isa_spec):
   long_exts += extra_long_ext
 
   #
-  # Handle implied extensions.
+  # Handle implied extensions using new conditional logic.
   #
-  any_change = True
-  while any_change:
-    any_change = False
-    for ext in std_exts + long_exts:
-      if ext in IMPLIED_EXT:
-        implied_exts = IMPLIED_EXT[ext]
-        for implied_ext in implied_exts:
-          if implied_ext == 'zicsr' and is_isa_spec_2p2:
-              continue
+  # Extract xlen from architecture string
+  # TODO: We should support profile here.
+  if arch.startswith('rv32'):
+    xlen = 32
+  elif arch.startswith('rv64'):
+    xlen = 64
+  else:
+    raise Exception("Unsupported prefix `%s`" % arch)
 
-          if implied_ext not in std_exts + long_exts:
-            long_exts.append(implied_ext)
-            any_change = True
+  # Get all current extensions
+  current_exts = std_exts + long_exts
+
+  # Resolve dependencies
+  implied_deps = resolve_dependencies(current_exts, xlen)
+
+  # Filter out zicsr for ISA spec 2.2
+  if is_isa_spec_2p2:
+    implied_deps.discard('zicsr')
+
+  # Add implied dependencies to long_exts
+  for dep in implied_deps:
+    if dep not in current_exts:
+      long_exts.append(dep)
 
   # Single letter extension might appear in the long_exts list,
   # because we just append extensions list to the arch string.
@@ -179,17 +438,177 @@ def arch_canonicalize(arch, isa_spec):
 
   return new_arch
 
-if len(sys.argv) < 2:
-  print ("Usage: %s <arch_str> [<arch_str>*]" % sys.argv)
-  sys.exit(1)
+def dump_all_extensions():
+  """Dump all extensions and their implied extensions."""
+  implied_ext, all_extensions = get_all_extensions()
+
+  print("All supported RISC-V extensions:")
+  print("=" * 60)
+
+  if not all_extensions:
+    print("No extensions found.")
+    return
 
-parser = argparse.ArgumentParser()
-parser.add_argument('-misa-spec', type=str,
-                    default='20191213',
-                    choices=SUPPORTED_ISA_SPEC)
-parser.add_argument('arch_strs', nargs=argparse.REMAINDER)
+  # Sort all extensions for consistent output
+  sorted_all = sorted(all_extensions)
 
-args = parser.parse_args()
+  # Print all extensions with their dependencies (if any)
+  for ext_name in sorted_all:
+    if ext_name in implied_ext:
+      deps = implied_ext[ext_name]
+      dep_strs = []
+      for dep in deps:
+        if dep['type'] == 'simple':
+          dep_strs.append(dep['ext'])
+        else:
+          dep_strs.append(f"{dep['ext']}*")  # Mark conditional deps with *
+      print(f"{ext_name:15} -> {', '.join(dep_strs)}")
+    else:
+      print(f"{ext_name:15} -> (no dependencies)")
+
+  print(f"\nTotal extensions: {len(all_extensions)}")
+  print(f"Extensions with dependencies: {len(implied_ext)}")
+  print(f"Extensions without dependencies: {len(all_extensions) - len(implied_ext)}")
+
+def run_unit_tests():
+  """Run unit tests using pytest dynamically imported."""
+  try:
+    import pytest
+  except ImportError:
+    print("Error: pytest is required for running unit tests.")
+    print("Please install pytest: pip install pytest")
+    return 1
+
+  # Define test functions
+  def test_basic_arch_parsing():
+    """Test basic architecture string parsing."""
+    result = arch_canonicalize("rv64i", "20191213")
+    assert result == "rv64i"
+
+  def test_simple_extensions():
+    """Test simple extension handling."""
+    result = arch_canonicalize("rv64im", "20191213")
+    assert "zmmul" in result
+
+  def test_implied_extensions():
+    """Test implied extension resolution."""
+    result = arch_canonicalize("rv64imaf", "20191213")
+    assert "zicsr" in result
+
+  def test_conditional_dependencies():
+    """Test conditional dependency evaluation."""
+    # Test RV32 with F extension should include zcf when c is present
+    result = arch_canonicalize("rv32ifc", "20191213")
+    parts = result.split("_")
+    if "c" in parts:
+      assert "zca" in parts
+      if "f" in parts:
+        assert "zcf" in parts
+
+  def test_parse_dep_exts():
+    """Test dependency parsing function."""
+    # Test simple dependency
+    deps = parse_dep_exts('{"ext1", "ext2"}')
+    assert len(deps) == 2
+    assert deps[0]['ext'] == 'ext1'
+    assert deps[0]['type'] == 'simple'
+
+  def test_evaluate_conditional_dependency():
+    """Test conditional dependency evaluation."""
+    # Test zcf condition for RV32 with F
+    dep = {'ext': 'zcf', 'type': 'conditional', 'condition': 'test'}
+    result = evaluate_conditional_dependency('zce', dep, 32, {'f'})
+    assert result == True
+
+    # Test zcf condition for RV64 with F (should be False)
+    result = evaluate_conditional_dependency('zce', dep, 64, {'f'})
+    assert result == False
+
+  def test_parse_define_riscv_ext():
+    """Test DEFINE_RISCV_EXT parsing."""
+    content = '''
+    DEFINE_RISCV_EXT(
+      /* NAME */ test,
+      /* UPPERCASE_NAME */ TEST,
+      /* FULL_NAME */ "Test extension",
+      /* DESC */ "",
+      /* URL */ ,
+      /* DEP_EXTS */ ({"dep1", "dep2"}),
+      /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+      /* FLAG_GROUP */ test,
+      /* BITMASK_GROUP_ID */ 0,
+      /* BITMASK_BIT_POSITION*/ 0,
+      /* EXTRA_EXTENSION_FLAGS */ 0)
+    '''
+
+    extensions = parse_define_riscv_ext(content)
+    assert len(extensions) == 1
+    assert extensions[0]['name'] == 'test'
+    assert len(extensions[0]['dep_exts']) == 2
 
-for arch in args.arch_strs:
-  print (arch_canonicalize(arch, args.misa_spec))
+  def test_parse_long_condition_block():
+    """Test condition block containing several code blocks."""
+    result = arch_canonicalize("rv32ec", "20191213")
+    assert "rv32ec_zca" in result
+
+  # Collect test functions
+  test_functions = [
+    test_basic_arch_parsing,
+    test_simple_extensions,
+    test_implied_extensions,
+    test_conditional_dependencies,
+    test_parse_dep_exts,
+    test_evaluate_conditional_dependency,
+    test_parse_define_riscv_ext,
+    test_parse_long_condition_block
+  ]
+
+  # Run tests manually first, then optionally with pytest
+  print("Running unit tests...")
+
+  passed = 0
+  failed = 0
+
+  for i, test_func in enumerate(test_functions):
+    try:
+      print(f"  Running {test_func.__name__}...", end=" ")
+      test_func()
+      print("PASSED")
+      passed += 1
+    except Exception as e:
+      print(f"FAILED: {e}")
+      failed += 1
+
+  print(f"\nTest Summary: {passed} passed, {failed} failed")
+
+  if failed == 0:
+    print("\nAll tests passed!")
+    return 0
+  else:
+    print(f"\n{failed} test(s) failed!")
+    return 1
+
+if __name__ == "__main__":
+  parser = argparse.ArgumentParser()
+  parser.add_argument('-misa-spec', type=str,
+                      default='20191213',
+                      choices=SUPPORTED_ISA_SPEC)
+  parser.add_argument('--dump-all', action='store_true',
+                      help='Dump all extensions and their implied extensions')
+  parser.add_argument('--selftest', action='store_true',
+                      help='Run unit tests using pytest')
+  parser.add_argument('arch_strs', nargs='*',
+                      help='Architecture strings to canonicalize')
+
+  args = parser.parse_args()
+
+  if args.dump_all:
+    dump_all_extensions()
+  elif args.selftest:
+    sys.exit(run_unit_tests())
+  elif args.arch_strs:
+    for arch in args.arch_strs:
+      print (arch_canonicalize(arch, args.misa_spec))
+  else:
+    parser.print_help()
+    sys.exit(1)
diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 0c3b0cc..9695fdc 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -1673,3 +1673,474 @@
     DONE;
   }
   [(set_attr "type" "vandn")])
+
+
+;; =============================================================================
+;; Combine vec_duplicate + op.vv to op.vx
+;; Include
+;; - vadd.vx
+;; - vsub.vx
+;; - vrsub.vx
+;; - vand.vx
+;; - vor.vx
+;; - vmul.vx
+;; - vdiv.vx
+;; - vdivu.vx
+;; - vrem.vx
+;; - vremu.vx
+;; - vmax.vx
+;; - vmaxu.vx
+;; - vmin.vx
+;; - vminu.vx
+;; - vsadd.vx
+;; - vsaddu.vx
+;; - vssub.vx
+;; - vssubu.vx
+;; - vaadd.vx
+;; - vaaddu.vx
+;; - vmerge.vxm
+;; =============================================================================
+(define_insn_and_split "*<optab>_vx_<mode>"
+ [(set (match_operand:V_VLSI    0 "register_operand")
+       (any_int_binop_no_shift_vdup_v:V_VLSI
+	 (vec_duplicate:V_VLSI
+	   (match_operand:<VEL> 1 "register_operand"))
+	 (match_operand:V_VLSI  2 "<binop_rhs2_predicate>")))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    riscv_vector::expand_vx_binary_vec_dup_vec (operands[0], operands[2],
+						operands[1], <CODE>,
+						<MODE>mode);
+
+    DONE;
+  }
+  [(set_attr "type" "vialu")])
+
+(define_insn_and_split "*<optab>_vx_<mode>"
+ [(set (match_operand:V_VLSI    0 "register_operand")
+       (any_int_binop_no_shift_v_vdup:V_VLSI
+	 (match_operand:V_VLSI  1 "<binop_rhs2_predicate>")
+	 (vec_duplicate:V_VLSI
+	   (match_operand:<VEL> 2 "register_operand"))))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    riscv_vector::expand_vx_binary_vec_vec_dup (operands[0], operands[1],
+						operands[2], <CODE>,
+						<MODE>mode);
+
+    DONE;
+  }
+  [(set_attr "type" "vialu")])
+
+(define_insn_and_split "*<sat_op_v_vdup>_vx_<mode>"
+ [(set (match_operand:V_VLSI   0 "register_operand")
+   (if_then_else:V_VLSI
+    (unspec:<VM>
+     [(match_operand:<VM>      1 "vector_mask_operand")
+      (match_operand           5 "vector_length_operand")
+      (match_operand           6 "const_int_operand")
+      (match_operand           7 "const_int_operand")
+      (match_operand           8 "const_int_operand")
+      (match_operand           9 "const_int_operand")
+      (reg:SI VL_REGNUM)
+      (reg:SI VTYPE_REGNUM)
+      (reg:SI VXRM_REGNUM)] UNSPEC_VPREDICATE)
+    (unspec:V_VLSI
+     [(match_operand:V_VLSI    3 "register_operand")
+      (vec_duplicate:V_VLSI
+       (match_operand:<VEL>    4 "reg_or_int_operand"))] VSAT_VX_OP_V_VDUP)
+    (unspec:V_VLSI
+     [(match_operand:DI        2 "register_operand")] UNSPEC_VUNDEF)))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    int vxrm_val = INTVAL (operands[9]);
+    riscv_vector::expand_vx_binary_vxrm_vec_vec_dup (operands[0], operands[3],
+						     operands[4],
+						     <VSAT_VX_OP_V_VDUP>,
+						     vxrm_val, <MODE>mode);
+
+    DONE;
+  }
+  [(set_attr "type" "vaalu")])
+
+(define_insn_and_split "*<sat_op_vdup_v>_vx_<mode>"
+ [(set (match_operand:V_VLSI   0 "register_operand")
+   (if_then_else:V_VLSI
+    (unspec:<VM>
+     [(match_operand:<VM>      1 "vector_mask_operand")
+      (match_operand           5 "vector_length_operand")
+      (match_operand           6 "const_int_operand")
+      (match_operand           7 "const_int_operand")
+      (match_operand           8 "const_int_operand")
+      (match_operand           9 "const_int_operand")
+      (reg:SI VL_REGNUM)
+      (reg:SI VTYPE_REGNUM)
+      (reg:SI VXRM_REGNUM)] UNSPEC_VPREDICATE)
+    (unspec:V_VLSI
+     [(vec_duplicate:V_VLSI
+       (match_operand:<VEL>    4 "reg_or_int_operand"))
+      (match_operand:V_VLSI    3 "register_operand")] VSAT_VX_OP_VDUP_V)
+    (unspec:V_VLSI
+     [(match_operand:DI        2 "register_operand")] UNSPEC_VUNDEF)))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    int vxrm_val = INTVAL (operands[9]);
+    riscv_vector::expand_vx_binary_vxrm_vec_dup_vec (operands[0], operands[3],
+						     operands[4],
+						     <VSAT_VX_OP_VDUP_V>,
+						     vxrm_val, <MODE>mode);
+
+    DONE;
+  }
+  [(set_attr "type" "vaalu")])
+
+(define_insn_and_split "*merge_vx_<mode>"
+ [(set (match_operand:V_VLSI 0 "register_operand")
+   (if_then_else:V_VLSI
+    (match_operand:<VM>      3 "vector_mask_operand")
+    (vec_duplicate:V_VLSI
+     (match_operand:<VEL>    2 "reg_or_int_operand"))
+    (match_operand:V_VLSI    1 "register_operand")))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    insn_code icode = code_for_pred_merge_scalar (<MODE>mode);
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::MERGE_OP, operands);
+    DONE;
+  }
+  [(set_attr "type" "vimerge")])
+
+(define_insn_and_split "*vmacc_vx_<mode>"
+ [(set (match_operand:V_VLSI 0 "register_operand")
+   (plus:V_VLSI
+    (mult:V_VLSI
+     (vec_duplicate:V_VLSI
+      (match_operand:<VEL>   1 "register_operand"))
+     (match_operand:V_VLSI   2 "register_operand"))
+    (match_operand:V_VLSI    3 "register_operand")))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    insn_code icode = code_for_pred_mul_plus_vx (<MODE>mode);
+    rtx ops[] = {operands[0], operands[1], operands[2], operands[3],
+		 RVV_VUNDEF(<MODE>mode)};
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::TERNARY_OP, ops);
+
+    DONE;
+  }
+  [(set_attr "type" "vimuladd")])
+
+(define_insn_and_split "*vnmsac_vx_<mode>"
+ [(set (match_operand:V_VLSI 0 "register_operand")
+   (minus:V_VLSI
+    (match_operand:V_VLSI   3 "register_operand")
+    (mult:V_VLSI
+     (vec_duplicate:V_VLSI
+      (match_operand:<VEL>   1 "register_operand"))
+     (match_operand:V_VLSI   2 "register_operand"))))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    insn_code icode = code_for_pred_vnmsac_vx (<MODE>mode);
+    rtx ops[] = {operands[0], operands[1], operands[2], operands[3],
+		 RVV_VUNDEF(<MODE>mode)};
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::TERNARY_OP, ops);
+
+    DONE;
+  }
+  [(set_attr "type" "vimuladd")])
+
+
+;; =============================================================================
+;; Combine vec_duplicate + op.vv to op.vf
+;; Include
+;; - vfmadd.vf
+;; - vfmsub.vf
+;; - vfnmadd.vf
+;; - vfnmsub.vf
+;; - vfmacc.vf
+;; - vfmsac.vf
+;; - vfnmacc.vf
+;; - vfnmsac.vf
+;; - vfwmacc.vf
+;; - vfwmsac.vf
+;; =============================================================================
+
+;; vfmadd.vf, vfmsub.vf, vfmacc.vf, vfmsac.vf
+(define_insn_and_split "*<optab>_vf_<mode>"
+  [(set (match_operand:V_VLSF 0 "register_operand")
+    (plus_minus:V_VLSF
+	    (mult:V_VLSF
+	      (vec_duplicate:V_VLSF
+		(match_operand:<VEL> 1 "register_operand"))
+	      (match_operand:V_VLSF 2 "register_operand"))
+	    (match_operand:V_VLSF 3 "register_operand")))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    rtx ops[] = {operands[0], operands[1], operands[2], operands[3],
+		 RVV_VUNDEF(<MODE>mode)};
+    riscv_vector::emit_vlmax_insn (code_for_pred_mul_scalar (<CODE>, <MODE>mode),
+				   riscv_vector::TERNARY_OP_FRM_DYN, ops);
+    DONE;
+  }
+  [(set_attr "type" "vfmuladd")]
+)
+
+;; vfnmsub.vf, vfnmsac.vf
+(define_insn_and_split "*vfnmsub_<mode>"
+  [(set (match_operand:V_VLSF 0 "register_operand")
+    (minus:V_VLSF
+      (match_operand:V_VLSF 3 "register_operand")
+      (mult:V_VLSF
+	(vec_duplicate:V_VLSF
+	  (match_operand:<VEL> 1 "register_operand"))
+	(match_operand:V_VLSF 2 "register_operand"))))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    rtx ops[] = {operands[0], operands[1], operands[2], operands[3],
+		 RVV_VUNDEF(<MODE>mode)};
+    riscv_vector::emit_vlmax_insn (code_for_pred_mul_neg_scalar (PLUS, <MODE>mode),
+				   riscv_vector::TERNARY_OP_FRM_DYN, ops);
+    DONE;
+  }
+  [(set_attr "type" "vfmuladd")]
+)
+
+;; vfnmadd.vf, vfnmacc.vf
+(define_insn_and_split "*vfnmadd_<mode>"
+  [(set (match_operand:V_VLSF 0 "register_operand")
+    (minus:V_VLSF
+      (mult:V_VLSF
+	(neg:V_VLSF
+	  (match_operand:V_VLSF 2 "register_operand"))
+	(vec_duplicate:V_VLSF
+	  (match_operand:<VEL> 1 "register_operand")))
+      (match_operand:V_VLSF 3 "register_operand")))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    rtx ops[] = {operands[0], operands[1], operands[2], operands[3],
+		 RVV_VUNDEF(<MODE>mode)};
+    riscv_vector::emit_vlmax_insn (code_for_pred_mul_neg_scalar (MINUS, <MODE>mode),
+				   riscv_vector::TERNARY_OP_FRM_DYN, ops);
+    DONE;
+  }
+  [(set_attr "type" "vfmuladd")]
+)
+
+;; vfwmacc.vf, vfwmsac.vf
+(define_insn_and_split "*vfwmacc_vf_<mode>"
+  [(set (match_operand:VWEXTF 0 "register_operand")
+    (plus_minus:VWEXTF
+	    (mult:VWEXTF
+	      (float_extend:VWEXTF
+	        (match_operand:<V_DOUBLE_TRUNC> 3 "register_operand"))
+	      (vec_duplicate:VWEXTF
+	        (float_extend:<VEL>
+		  (match_operand:<VSUBEL> 2 "register_operand"))))
+	    (match_operand:VWEXTF 1 "register_operand")))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    rtx ops[] = {operands[0], operands[1], operands[2], operands[3]};
+    riscv_vector::emit_vlmax_insn (code_for_pred_widen_mul_scalar (<CODE>, <MODE>mode),
+				   riscv_vector::WIDEN_TERNARY_OP_FRM_DYN, ops);
+    DONE;
+  }
+  [(set_attr "type" "vfwmuladd")]
+)
+
+;; Intermediate pattern for vfwmacc.vf and vfwmsac.vf used by combine
+(define_insn_and_split "*extend_vf_<mode>"
+ [(set (match_operand:VWEXTF 0 "register_operand")
+    (vec_duplicate:VWEXTF
+      (float_extend:<VEL>
+        (match_operand:<VSUBEL> 1 "register_operand"))))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    rtx tmp = gen_reg_rtx (<VEL>mode);
+    emit_insn (gen_extend<vsubel><vel>2(tmp, operands[1]));
+
+    rtx ops[] = {operands[0], tmp};
+    riscv_vector::expand_broadcast (<MODE>mode, ops);
+    DONE;
+  }
+  [(set_attr "type" "vfwmuladd")]
+)
+
+;; vfwnmacc.vf
+(define_insn_and_split "*vfwnmacc_vf_<mode>"
+  [(set (match_operand:VWEXTF 0 "register_operand")
+    (minus:VWEXTF
+      (mult:VWEXTF
+	(neg:VWEXTF
+	  (vec_duplicate:VWEXTF
+	    (float_extend:<VEL>
+	      (match_operand:<VSUBEL> 2 "register_operand"))))
+	(float_extend:VWEXTF
+	  (match_operand:<V_DOUBLE_TRUNC> 3 "register_operand")))
+      (match_operand:VWEXTF 1 "register_operand")))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    rtx ops[] = {operands[0], operands[1], operands[2], operands[3]};
+    riscv_vector::emit_vlmax_insn(
+	code_for_pred_widen_mul_neg_scalar(MINUS, <MODE>mode),
+	riscv_vector::WIDEN_TERNARY_OP_FRM_DYN, ops);
+    DONE;
+  }
+  [(set_attr "type" "vfwmuladd")]
+)
+
+;; vfwnmsac.vf
+(define_insn_and_split "*vfwnmsac_vf_<mode>"
+  [(set (match_operand:VWEXTF 0 "register_operand")
+    (minus:VWEXTF
+      (match_operand:VWEXTF 1 "register_operand")
+      (mult:VWEXTF
+	(float_extend:VWEXTF
+	  (match_operand:<V_DOUBLE_TRUNC> 3 "register_operand"))
+	(vec_duplicate:VWEXTF
+	  (float_extend:<VEL>
+	    (match_operand:<VSUBEL> 2 "register_operand"))))))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    rtx ops[] = {operands[0], operands[1], operands[2], operands[3]};
+    riscv_vector::emit_vlmax_insn(
+	code_for_pred_widen_mul_neg_scalar (PLUS, <MODE>mode),
+	riscv_vector::WIDEN_TERNARY_OP_FRM_DYN, ops);
+    DONE;
+  }
+  [(set_attr "type" "vfwmuladd")]
+)
+
+;; vfmul.vf
+(define_insn_and_split "*vfmul_vf_<mode>"
+  [(set (match_operand:V_VLSF 0 "register_operand")
+    (mult:V_VLSF
+      (vec_duplicate:V_VLSF
+        (match_operand:<VEL> 2 "register_operand"))
+      (match_operand:V_VLSF 1 "register_operand")))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    riscv_vector::emit_vlmax_insn (code_for_pred_scalar (MULT, <MODE>mode),
+				   riscv_vector::BINARY_OP_FRM_DYN, operands);
+    DONE;
+  }
+  [(set_attr "type" "vfmul")]
+)
+
+;; vfrdiv.vf
+(define_insn_and_split "*vfrdiv_vf_<mode>"
+  [(set (match_operand:V_VLSF 0 "register_operand")
+    (div:V_VLSF
+      (vec_duplicate:V_VLSF
+	(match_operand:<VEL> 2 "register_operand"))
+      (match_operand:V_VLSF 1 "register_operand")))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    riscv_vector::emit_vlmax_insn (code_for_pred_reverse_scalar (DIV, <MODE>mode),
+				   riscv_vector::BINARY_OP_FRM_DYN, operands);
+    DONE;
+  }
+  [(set_attr "type" "vfdiv")]
+)
+
+;; vfmin.vf
+(define_insn_and_split "*vfmin_vf_<mode>"
+  [(set (match_operand:V_VLSF 0 "register_operand")
+    (smin:V_VLSF
+      (vec_duplicate:V_VLSF
+	(match_operand:<VEL> 2 "register_operand"))
+      (match_operand:V_VLSF 1 "register_operand")))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    riscv_vector::emit_vlmax_insn (code_for_pred_scalar (SMIN, <MODE>mode),
+				   riscv_vector::BINARY_OP, operands);
+    DONE;
+  }
+  [(set_attr "type" "vfminmax")]
+)
+
+(define_insn_and_split "*vfmin_vf_ieee_<mode>"
+  [(set (match_operand:V_VLSF 0 "register_operand")
+    (unspec:V_VLSF [
+      (vec_duplicate:V_VLSF
+	(match_operand:<VEL> 2 "register_operand"))
+      (match_operand:V_VLSF 1 "register_operand")
+      ] UNSPEC_VFMIN))]
+  "TARGET_VECTOR && !HONOR_SNANS (<MODE>mode) && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    riscv_vector::emit_vlmax_insn (code_for_pred_scalar (UNSPEC_VFMIN, <MODE>mode),
+				   riscv_vector::BINARY_OP, operands);
+    DONE;
+  }
+  [(set_attr "type" "vfminmax")]
+)
+
+(define_insn_and_split "*vfmin_vf_ieee_<mode>"
+  [(set (match_operand:V_VLSF 0 "register_operand")
+    (unspec:V_VLSF [
+      (match_operand:V_VLSF 1 "register_operand")
+      (vec_duplicate:V_VLSF
+	(match_operand:<VEL> 2 "register_operand"))
+      ] UNSPEC_VFMIN))]
+  "TARGET_VECTOR && !HONOR_SNANS (<MODE>mode) && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    riscv_vector::emit_vlmax_insn (code_for_pred_scalar (UNSPEC_VFMIN, <MODE>mode),
+				   riscv_vector::BINARY_OP, operands);
+    DONE;
+  }
+  [(set_attr "type" "vfminmax")]
+)
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 9e51e3c..48de5ef 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -1338,7 +1338,7 @@
 (define_expand "select_vl<mode>"
   [(match_operand:P 0 "register_operand")
    (match_operand:P 1 "vector_length_operand")
-   (match_operand:P 2 "")]
+   (match_operand:P 2 "immediate_operand")]
   "TARGET_VECTOR"
 {
   riscv_vector::expand_select_vl (operands);
@@ -1359,9 +1359,7 @@
   if (operands[2] == const0_rtx)
     {
       rtx ops[] = {operands[0], operands[0], operands[1]};
-      riscv_vector::emit_nonvlmax_insn (code_for_pred_broadcast (<MODE>mode),
-					riscv_vector::SCALAR_MOVE_MERGED_OP_TU,
-					ops, CONST1_RTX (Pmode));
+      riscv_vector::expand_set_first_tu (<MODE>mode, ops);
     }
   else
     {
@@ -1385,8 +1383,7 @@
 	 VL we need for the slide.  */
       rtx tmp = gen_reg_rtx (<MODE>mode);
       rtx ops1[] = {tmp, operands[1]};
-      emit_nonvlmax_insn (code_for_pred_broadcast (<MODE>mode),
-                           riscv_vector::UNARY_OP, ops1, length);
+      riscv_vector::expand_broadcast (<MODE>mode, ops1, length);
 
       /* Slide exactly one element up leaving the tail elements
 	 unchanged.  */
@@ -2489,21 +2486,29 @@
       (sign_extend:VWEXTI
        (match_operand:<V_DOUBLE_TRUNC> 1 "register_operand"))
       (sign_extend:VWEXTI
-       (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))))))]
+       (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand")))
+     (const_int 1))))]
   "TARGET_VECTOR"
-{
-  /* First emit a widening addition.  */
-  rtx tmp1 = gen_reg_rtx (<MODE>mode);
-  rtx ops1[] = {tmp1, operands[1], operands[2]};
-  insn_code icode = code_for_pred_dual_widen (PLUS, SIGN_EXTEND, <MODE>mode);
-  riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops1);
+  {
+    insn_code icode = code_for_pred (UNSPEC_VAADD, <V_DOUBLE_TRUNC>mode);
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RDN,
+				   operands);
+    DONE;
+  }
+)
 
-  /* Then a narrowing shift.  */
-  rtx ops2[] = {operands[0], tmp1, const1_rtx};
-  icode = code_for_pred_narrow_scalar (ASHIFTRT, <MODE>mode);
-  riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops2);
-  DONE;
-})
+(define_expand "avg<mode>3_floor"
+ [(match_operand:V_VLSI_D 0 "register_operand")
+  (match_operand:V_VLSI_D 1 "register_operand")
+  (match_operand:V_VLSI_D 2 "register_operand")]
+  "TARGET_VECTOR"
+  {
+    insn_code icode = code_for_pred (UNSPEC_VAADD, <MODE>mode);
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RDN,
+				   operands);
+    DONE;
+  }
+)
 
 (define_expand "avg<v_double_trunc>3_ceil"
  [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand")
@@ -2515,27 +2520,29 @@
 	(match_operand:<V_DOUBLE_TRUNC> 1 "register_operand"))
        (sign_extend:VWEXTI
 	(match_operand:<V_DOUBLE_TRUNC> 2 "register_operand")))
-      (const_int 1)))))]
+      (const_int 1))
+     (const_int 1))))]
   "TARGET_VECTOR"
-{
-  /* First emit a widening addition.  */
-  rtx tmp1 = gen_reg_rtx (<MODE>mode);
-  rtx ops1[] = {tmp1, operands[1], operands[2]};
-  insn_code icode = code_for_pred_dual_widen (PLUS, SIGN_EXTEND, <MODE>mode);
-  riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops1);
-
-  /* Then add 1.  */
-  rtx tmp2 = gen_reg_rtx (<MODE>mode);
-  rtx ops2[] = {tmp2, tmp1, const1_rtx};
-  icode = code_for_pred_scalar (PLUS, <MODE>mode);
-  riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops2);
+  {
+    insn_code icode = code_for_pred (UNSPEC_VAADD, <V_DOUBLE_TRUNC>mode);
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RNU,
+				   operands);
+    DONE;
+  }
+)
 
-  /* Finally, a narrowing shift.  */
-  rtx ops3[] = {operands[0], tmp2, const1_rtx};
-  icode = code_for_pred_narrow_scalar (ASHIFTRT, <MODE>mode);
-  riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops3);
-  DONE;
-})
+(define_expand "avg<mode>3_ceil"
+ [(match_operand:V_VLSI_D 0 "register_operand")
+  (match_operand:V_VLSI_D 1 "register_operand")
+  (match_operand:V_VLSI_D 2 "register_operand")]
+  "TARGET_VECTOR"
+  {
+    insn_code icode = code_for_pred (UNSPEC_VAADD, <MODE>mode);
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RNU,
+				   operands);
+    DONE;
+  }
+)
 
 ;; csrwi vxrm, 2
 ;; vaaddu.vv vd, vs2, vs1
@@ -2546,7 +2553,8 @@
   "TARGET_VECTOR"
 {
   insn_code icode = code_for_pred (UNSPEC_VAADDU, <MODE>mode);
-  riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RDN, operands);
+  riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RDN,
+				 operands);
   DONE;
 })
 
@@ -2559,7 +2567,8 @@
   "TARGET_VECTOR"
 {
   insn_code icode = code_for_pred (UNSPEC_VAADDU, <MODE>mode);
-  riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RNU, operands);
+  riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RNU,
+				 operands);
   DONE;
 })
 
@@ -2584,7 +2593,8 @@
    (match_operand:V_VLSF 1 "register_operand")]
   "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
   {
-    riscv_vector::expand_vec_ceil (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+    riscv_vector::expand_vec_ceil (operands[0], operands[1], <MODE>mode,
+				   <VCONVERT>mode);
     DONE;
   }
 )
@@ -2594,7 +2604,8 @@
    (match_operand:V_VLSF 1 "register_operand")]
   "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
   {
-    riscv_vector::expand_vec_floor (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+    riscv_vector::expand_vec_floor (operands[0], operands[1], <MODE>mode,
+				    <VCONVERT>mode);
     DONE;
   }
 )
@@ -2604,7 +2615,8 @@
    (match_operand:V_VLSF 1 "register_operand")]
   "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
   {
-    riscv_vector::expand_vec_nearbyint (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+    riscv_vector::expand_vec_nearbyint (operands[0], operands[1], <MODE>mode,
+					<VCONVERT>mode);
     DONE;
   }
 )
@@ -2614,7 +2626,8 @@
    (match_operand:V_VLSF 1 "register_operand")]
   "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
   {
-    riscv_vector::expand_vec_rint (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+    riscv_vector::expand_vec_rint (operands[0], operands[1], <MODE>mode,
+				   <VCONVERT>mode);
     DONE;
   }
 )
@@ -2624,7 +2637,8 @@
    (match_operand:V_VLSF 1 "register_operand")]
   "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
   {
-    riscv_vector::expand_vec_round (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+    riscv_vector::expand_vec_round (operands[0], operands[1], <MODE>mode,
+				    <VCONVERT>mode);
     DONE;
   }
 )
@@ -2634,7 +2648,8 @@
    (match_operand:V_VLSF 1 "register_operand")]
   "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
   {
-    riscv_vector::expand_vec_trunc (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+    riscv_vector::expand_vec_trunc (operands[0], operands[1], <MODE>mode,
+				    <VCONVERT>mode);
     DONE;
   }
 )
@@ -2644,7 +2659,8 @@
    (match_operand:V_VLSF 1 "register_operand")]
   "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
   {
-    riscv_vector::expand_vec_roundeven (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+    riscv_vector::expand_vec_roundeven (operands[0], operands[1], <MODE>mode,
+					<VCONVERT>mode);
     DONE;
   }
 )
@@ -2701,7 +2717,8 @@
    (match_operand:V_VLS_F_CONVERT_SI 1 "register_operand")]
   "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
   {
-    riscv_vector::expand_vec_lceil (operands[0], operands[1], <MODE>mode, <V_F2SI_CONVERT>mode);
+    riscv_vector::expand_vec_lceil (operands[0], operands[1], <MODE>mode,
+				    <V_F2SI_CONVERT>mode);
     DONE;
   }
 )
@@ -2711,7 +2728,8 @@
    (match_operand:V_VLS_F_CONVERT_DI 1 "register_operand")]
   "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
   {
-    riscv_vector::expand_vec_lceil (operands[0], operands[1], <MODE>mode, <V_F2DI_CONVERT>mode);
+    riscv_vector::expand_vec_lceil (operands[0], operands[1], <MODE>mode,
+				    <V_F2DI_CONVERT>mode);
     DONE;
   }
 )
@@ -2721,7 +2739,8 @@
    (match_operand:V_VLS_F_CONVERT_SI 1 "register_operand")]
   "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
   {
-    riscv_vector::expand_vec_lfloor (operands[0], operands[1], <MODE>mode, <V_F2SI_CONVERT>mode);
+    riscv_vector::expand_vec_lfloor (operands[0], operands[1], <MODE>mode,
+				     <V_F2SI_CONVERT>mode);
     DONE;
   }
 )
@@ -2731,7 +2750,8 @@
    (match_operand:V_VLS_F_CONVERT_DI 1 "register_operand")]
   "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
   {
-    riscv_vector::expand_vec_lfloor (operands[0], operands[1], <MODE>mode, <V_F2DI_CONVERT>mode);
+    riscv_vector::expand_vec_lfloor (operands[0], operands[1], <MODE>mode,
+				     <V_F2DI_CONVERT>mode);
     DONE;
   }
 )
@@ -2763,7 +2783,8 @@
    (match_operand:V_VLSI 2 "register_operand")]
   "TARGET_VECTOR"
   {
-    riscv_vector::expand_vec_usadd (operands[0], operands[1], operands[2], <MODE>mode);
+    riscv_vector::expand_vec_usadd (operands[0], operands[1], operands[2],
+				    <MODE>mode);
     DONE;
   }
 )
@@ -2774,7 +2795,8 @@
    (match_operand:V_VLSI 2 "register_operand")]
   "TARGET_VECTOR"
   {
-    riscv_vector::expand_vec_ssadd (operands[0], operands[1], operands[2], <MODE>mode);
+    riscv_vector::expand_vec_ssadd (operands[0], operands[1], operands[2],
+				    <MODE>mode);
     DONE;
   }
 )
@@ -2785,7 +2807,8 @@
    (match_operand:V_VLSI 2 "register_operand")]
   "TARGET_VECTOR"
   {
-    riscv_vector::expand_vec_ussub (operands[0], operands[1], operands[2], <MODE>mode);
+    riscv_vector::expand_vec_ussub (operands[0], operands[1], operands[2],
+				    <MODE>mode);
     DONE;
   }
 )
@@ -2796,7 +2819,8 @@
    (match_operand:V_VLSI 2 "register_operand")]
   "TARGET_VECTOR"
   {
-    riscv_vector::expand_vec_sssub (operands[0], operands[1], operands[2], <MODE>mode);
+    riscv_vector::expand_vec_sssub (operands[0], operands[1], operands[2],
+				    <MODE>mode);
     DONE;
   }
 )
diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 5ed5e18..21426f4 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -1,4 +1,4 @@
-;; Machine description for RISC-V Bit Manipulation operations.
+;); Machine description for RISC-V Bit Manipulation operations.
 ;; Copyright (C) 2021-2025 Free Software Foundation, Inc.
 
 ;; This file is part of GCC.
@@ -68,23 +68,25 @@
   [(set (match_operand:DI 0 "register_operand")
 	(zero_extend:DI (plus:SI (ashift:SI (subreg:SI (match_operand:DI 1 "register_operand") 0)
 						       (match_operand:QI 2 "imm123_operand"))
-				 (subreg:SI (match_operand:DI 3 "register_operand") 0))))]
+				 (subreg:SI (match_operand:DI 3 "register_operand") 0))))
+   (clobber (match_operand:DI 4 "register_operand"))]
   "TARGET_64BIT && TARGET_ZBA"
-  [(set (match_dup 0) (plus:DI (ashift:DI (match_dup 1) (match_dup 2)) (match_dup 3)))
-   (set (match_dup 0) (zero_extend:DI (subreg:SI (match_dup 0) 0)))])
+  [(set (match_dup 4) (plus:DI (ashift:DI (match_dup 1) (match_dup 2)) (match_dup 3)))
+   (set (match_dup 0) (zero_extend:DI (subreg:SI (match_dup 4) 0)))])
 
 (define_split
   [(set (match_operand:DI 0 "register_operand")
 	(zero_extend:DI (plus:SI (subreg:SI (and:DI (ashift:DI (match_operand:DI 1 "register_operand")
 							       (match_operand:QI 2 "imm123_operand"))
 						    (match_operand:DI 3 "consecutive_bits_operand")) 0)
-				 (subreg:SI (match_operand:DI 4 "register_operand") 0))))]
+				 (subreg:SI (match_operand:DI 4 "register_operand") 0))))
+   (clobber (match_operand:DI 5 "register_operand"))]
   "TARGET_64BIT && TARGET_ZBA
    && riscv_shamt_matches_mask_p (INTVAL (operands[2]), INTVAL (operands[3]))
    /* Ensure the mask includes all the bits in SImode.  */
    && ((INTVAL (operands[3]) & (HOST_WIDE_INT_1U << 31)) != 0)"
-  [(set (match_dup 0) (plus:DI (ashift:DI (match_dup 1) (match_dup 2)) (match_dup 4)))
-   (set (match_dup 0) (zero_extend:DI (subreg:SI (match_dup 0) 0)))])
+  [(set (match_dup 5) (plus:DI (ashift:DI (match_dup 1) (match_dup 2)) (match_dup 4)))
+   (set (match_dup 0) (zero_extend:DI (subreg:SI (match_dup 5) 0)))])
 
 ; Make sure that an andi followed by a sh[123]add remains a two instruction
 ; sequence--and is not torn apart into slli, slri, add.
@@ -195,13 +197,14 @@
 					     (match_operand:QI 2 "imm123_operand"))
 				  (match_operand 3 "consecutive_bits32_operand"))
 			  (match_operand:DI 4 "register_operand"))
-		 (match_operand 5 "immediate_operand")))]
+		 (match_operand 5 "immediate_operand")))
+   (clobber (match_operand:DI 6 "register_operand"))]
   "TARGET_64BIT && TARGET_ZBA"
-  [(set (match_dup 0)
+  [(set (match_dup 6)
 	(plus:DI (and:DI (ashift:DI (match_dup 1) (match_dup 2))
 			 (match_dup 3))
 		 (match_dup 4)))
-   (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 5)))])
+   (set (match_dup 0) (plus:DI (match_dup 6) (match_dup 5)))])
 
 ;; ZBB extension.
 
@@ -423,39 +426,40 @@
   "rolw\t%0,%1,%2"
   [(set_attr "type" "bitmanip")])
 
-(define_insn_and_split "*<bitmanip_optab><GPR:mode>3_mask"
-  [(set (match_operand:GPR     0 "register_operand" "= r")
-        (bitmanip_rotate:GPR
-            (match_operand:GPR 1 "register_operand" "  r")
-            (match_operator 4 "subreg_lowpart_operator"
-             [(and:GPR2
-               (match_operand:GPR2 2 "register_operand"  "r")
-               (match_operand 3 "<GPR:shiftm1>" "<GPR:shiftm1p>"))])))]
+(define_insn "*<bitmanip_optab><mode>3_mask"
+  [(set (match_operand:X 0 "register_operand" "=r")
+	(bitmanip_rotate:X
+	  (match_operand:X 1 "register_operand" "r")
+	  (match_operator 4 "subreg_lowpart_operator"
+	    [(and:X (match_operand:X 2 "register_operand"  "r")
+		    (match_operand 3 "<X:shiftm1>" "<X:shiftm1p>"))])))]
   "TARGET_ZBB || TARGET_ZBKB"
-  "#"
-  "&& 1"
-  [(set (match_dup 0)
-        (bitmanip_rotate:GPR (match_dup 1)
-                             (match_dup 2)))]
-  "operands[2] = gen_lowpart (QImode, operands[2]);"
+  "<bitmanip_insn>\t%0,%1,%2"
   [(set_attr "type" "bitmanip")
-   (set_attr "mode" "<GPR:MODE>")])
+   (set_attr "mode" "<X:MODE>")])
 
-(define_insn_and_split "*<bitmanip_optab>si3_sext_mask"
-  [(set (match_operand:DI     0 "register_operand" "= r")
-  (sign_extend:DI (bitmanip_rotate:SI
-            (match_operand:SI 1 "register_operand" "  r")
-            (match_operator 4 "subreg_lowpart_operator"
-             [(and:GPR
-               (match_operand:GPR 2 "register_operand"  "r")
-               (match_operand 3 "const_si_mask_operand"))]))))]
+(define_insn "*<bitmanip_optab>3_mask_si"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(bitmanip_rotate:SI
+	  (match_operand:SI 1 "register_operand" "r")
+	  (match_operator 3 "subreg_lowpart_operator"
+	    [(and:X (match_operand:SI 2 "register_operand"  "r")
+		    (const_int 31))])))]
   "TARGET_64BIT && (TARGET_ZBB || TARGET_ZBKB)"
-  "#"
-  "&& 1"
-  [(set (match_dup 0)
-  (sign_extend:DI (bitmanip_rotate:SI (match_dup 1)
-                           (match_dup 2))))]
-  "operands[2] = gen_lowpart (QImode, operands[2]);"
+  "<bitmanip_insn>w\t%0,%1,%2"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "SI")])
+
+(define_insn "*<bitmanip_optab>si3_sext_mask"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI
+	  (bitmanip_rotate:SI
+	    (match_operand:SI 1 "register_operand" "r")
+	    (match_operator 3 "subreg_lowpart_operator"
+	      [(and:X (match_operand:GPR 2 "register_operand"  "r")
+		      (const_int 31))]))))]
+  "TARGET_64BIT && (TARGET_ZBB || TARGET_ZBKB)"
+  "<bitmanip_insn>w\t%0,%1,%2"
   [(set_attr "type" "bitmanip")
    (set_attr "mode" "DI")])
 
@@ -842,44 +846,40 @@
   [(set_attr "type" "bitmanip")])
 
 ;; In case we have "val & ~IMM" where ~IMM has 2 bits set.
-(define_insn_and_split "*bclri<mode>_nottwobits"
-  [(set (match_operand:X 0 "register_operand" "=r")
-	(and:X (match_operand:X 1 "register_operand" "r")
-	       (match_operand:X 2 "const_nottwobits_not_arith_operand" "i")))]
+(define_split
+  [(set (match_operand:X 0 "register_operand")
+	(and:X (match_operand:X 1 "register_operand")
+	       (match_operand:X 2 "const_nottwobits_not_arith_operand")))
+   (clobber (match_operand:X 3 "register_operand"))]
   "TARGET_ZBS && !paradoxical_subreg_p (operands[1])"
-  "#"
-  "&& reload_completed"
-  [(set (match_dup 0) (and:X (match_dup 1) (match_dup 3)))
-   (set (match_dup 0) (and:X (match_dup 0) (match_dup 4)))]
+  [(set (match_dup 3) (and:X (match_dup 1) (match_dup 4)))
+   (set (match_dup 0) (and:X (match_dup 3) (match_dup 5)))]
 {
-	unsigned HOST_WIDE_INT bits = ~UINTVAL (operands[2]);
-	unsigned HOST_WIDE_INT topbit = HOST_WIDE_INT_1U << floor_log2 (bits);
+  unsigned HOST_WIDE_INT bits = ~UINTVAL (operands[2]);
+  unsigned HOST_WIDE_INT topbit = HOST_WIDE_INT_1U << floor_log2 (bits);
 
-	operands[3] = GEN_INT (~bits | topbit);
-	operands[4] = GEN_INT (~topbit);
-}
-[(set_attr "type" "bitmanip")])
+  operands[4] = GEN_INT (~bits | topbit);
+  operands[5] = GEN_INT (~topbit);
+})
 
 ;; In case of a paradoxical subreg, the sign bit and the high bits are
 ;; not allowed to be changed
-(define_insn_and_split "*bclridisi_nottwobits"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(and:DI (match_operand:DI 1 "register_operand" "r")
-		(match_operand:DI 2 "const_nottwobits_not_arith_operand" "i")))]
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(and:DI (match_operand:DI 1 "register_operand")
+		(match_operand:DI 2 "const_nottwobits_not_arith_operand")))
+   (clobber (match_operand:DI 3 "register_operand"))]
   "TARGET_64BIT && TARGET_ZBS
    && clz_hwi (~UINTVAL (operands[2])) > 33"
-  "#"
-  "&& reload_completed"
-  [(set (match_dup 0) (and:DI (match_dup 1) (match_dup 3)))
-   (set (match_dup 0) (and:DI (match_dup 0) (match_dup 4)))]
+  [(set (match_dup 3) (and:DI (match_dup 1) (match_dup 4)))
+   (set (match_dup 0) (and:DI (match_dup 3) (match_dup 5)))]
 {
-	unsigned HOST_WIDE_INT bits = ~UINTVAL (operands[2]);
-	unsigned HOST_WIDE_INT topbit = HOST_WIDE_INT_1U << floor_log2 (bits);
+  unsigned HOST_WIDE_INT bits = ~UINTVAL (operands[2]);
+  unsigned HOST_WIDE_INT topbit = HOST_WIDE_INT_1U << floor_log2 (bits);
 
-	operands[3] = GEN_INT (~bits | topbit);
-	operands[4] = GEN_INT (~topbit);
-}
-[(set_attr "type" "bitmanip")])
+  operands[4] = GEN_INT (~bits | topbit);
+  operands[5] = GEN_INT (~topbit);
+})
 
 ;; An outer AND with a constant where bits 31..63 are 0 can be seen as
 ;; a virtual zero extension from 31 to 64 bits.
@@ -908,6 +908,24 @@
   "bext\t%0,%1,%2"
   [(set_attr "type" "bitmanip")])
 
+;; We do not define SHIFT_COUNT_TRUNCATED, so we have to have variants
+;; that mask/extend the count if we want to eliminate those ops
+;;      
+;; We could (in theory) use GPR for the various modes, but I haven't
+;; seen those cases appear in practice.  Without a testcase I've
+;; elected to keep the modes X which is easy to reason about.
+(define_insn "*bext<mode>_mask_pos"
+  [(set (match_operand:X 0 "register_operand" "=r")
+	(zero_extract:X (match_operand:X 1 "register_operand" "r")
+			(const_int 1)
+			(and:X
+			  (match_operand:X 2 "register_operand" "r")
+			  (match_operand 3 "const_int_operand"))))]
+  "(TARGET_ZBS
+    && INTVAL (operands[3]) + 1 == GET_MODE_BITSIZE (<MODE>mode))"
+  "bext\t%0,%1,%2"
+  [(set_attr "type" "bitmanip")])
+
 ;; This is a bext followed by a seqz.  Normally this would be a 3->2 split
 ;; But the and-not pattern with a constant operand is a define_insn_and_split,
 ;; so this looks like a 2->2 split, which combine rejects.  So implement it
@@ -992,12 +1010,13 @@
   [(set (match_operand:X 0 "register_operand")
 	(and:X (not:X (lshiftrt:X (match_operand:X 1 "register_operand")
 				  (match_operand:QI 2 "register_operand")))
-	       (const_int 1)))]
+	       (const_int 1)))
+   (clobber (match_operand:X 3 "register_operand"))]
   "TARGET_ZBS"
-  [(set (match_dup 0) (zero_extract:X (match_dup 1)
+  [(set (match_dup 3) (zero_extract:X (match_dup 1)
 				      (const_int 1)
 				      (match_dup 2)))
-   (set (match_dup 0) (xor:X (match_dup 0) (const_int 1)))]
+   (set (match_dup 0) (xor:X (match_dup 3) (const_int 1)))]
   "operands[2] = gen_lowpart (<MODE>mode, operands[2]);")
 
 ;; We can create a polarity-reversed mask (i.e. bit N -> { set = 0, clear = -1 })
@@ -1008,49 +1027,49 @@
        (neg:GPR (eq:GPR (zero_extract:GPR (match_operand:GPR 1 "register_operand")
                                           (const_int 1)
                                           (match_operand 2))
-                        (const_int 0))))]
+			(const_int 0))))
+   (clobber (match_operand:X 3 "register_operand"))]
   "TARGET_ZBS"
-  [(set (match_dup 0) (zero_extract:GPR (match_dup 1) (const_int 1) (match_dup 2)))
-   (set (match_dup 0) (plus:GPR (match_dup 0) (const_int -1)))])
+  [(set (match_dup 3) (zero_extract:GPR (match_dup 1) (const_int 1) (match_dup 2)))
+   (set (match_dup 0) (plus:GPR (match_dup 3) (const_int -1)))])
 
 ;; Catch those cases where we can use a bseti/binvi + ori/xori or
 ;; bseti/binvi + bseti/binvi instead of a lui + addi + or/xor sequence.
 (define_insn_and_split "*<or_optab>i<mode>_extrabit"
   [(set (match_operand:X 0 "register_operand" "=r")
 	(any_or:X (match_operand:X 1 "register_operand" "r")
-	          (match_operand:X 2 "uimm_extra_bit_or_twobits" "i")))]
+		  (match_operand:X 2 "uimm_extra_bit_or_twobits" "i")))
+   (clobber (match_scratch:X 3 "=&r"))]
   "TARGET_ZBS && !single_bit_mask_operand (operands[2], VOIDmode)"
   "#"
   "&& reload_completed"
-  [(set (match_dup 0) (<or_optab>:X (match_dup 1) (match_dup 3)))
-   (set (match_dup 0) (<or_optab>:X (match_dup 0) (match_dup 4)))]
+  [(set (match_dup 3) (<or_optab>:X (match_dup 1) (match_dup 4)))
+   (set (match_dup 0) (<or_optab>:X (match_dup 3) (match_dup 5)))]
 {
   unsigned HOST_WIDE_INT bits = UINTVAL (operands[2]);
   unsigned HOST_WIDE_INT topbit = HOST_WIDE_INT_1U << floor_log2 (bits);
 
-  operands[3] = GEN_INT (bits &~ topbit);
-  operands[4] = GEN_INT (topbit);
+  operands[4] = GEN_INT (bits &~ topbit);
+  operands[5] = GEN_INT (topbit);
 }
 [(set_attr "type" "bitmanip")])
 
 ;; Same to use blcri + andi and blcri + bclri
-(define_insn_and_split "*andi<mode>_extrabit"
-  [(set (match_operand:X 0 "register_operand" "=r")
-	(and:X (match_operand:X 1 "register_operand" "r")
-	       (match_operand:X 2 "not_uimm_extra_bit_or_nottwobits" "i")))]
+(define_split
+  [(set (match_operand:X 0 "register_operand")
+	(and:X (match_operand:X 1 "register_operand")
+	       (match_operand:X 2 "not_uimm_extra_bit_or_nottwobits")))
+   (clobber (match_operand:X 3 "register_operand"))]
   "TARGET_ZBS && !not_single_bit_mask_operand (operands[2], VOIDmode)"
-  "#"
-  "&& reload_completed"
-  [(set (match_dup 0) (and:X (match_dup 1) (match_dup 3)))
-   (set (match_dup 0) (and:X (match_dup 0) (match_dup 4)))]
+  [(set (match_dup 3) (and:X (match_dup 1) (match_dup 4)))
+   (set (match_dup 0) (and:X (match_dup 3) (match_dup 5)))]
 {
   unsigned HOST_WIDE_INT bits = UINTVAL (operands[2]);
   unsigned HOST_WIDE_INT topbit = HOST_WIDE_INT_1U << floor_log2 (~bits);
 
-  operands[3] = GEN_INT (bits | topbit);
-  operands[4] = GEN_INT (~topbit);
-}
-[(set_attr "type" "bitmanip")])
+  operands[4] = GEN_INT (bits | topbit);
+  operands[5] = GEN_INT (~topbit);
+})
 
 ;; If we have the ZBA extension, then we can clear the upper half of a 64
 ;; bit object with a zext.w.  So if we have AND where the constant would
@@ -1203,7 +1222,7 @@
      we can't keep it in 64 bit variable.)
      then use clmul instruction to implement the CRC,
      otherwise (TARGET_ZBKB) generate table based using brev.  */
-  if ((TARGET_ZBKC || TARGET_ZBC) && <ANYI:MODE>mode < word_mode)
+  if ((TARGET_ZBKC || TARGET_ZBC || TARGET_ZVBC) && <ANYI:MODE>mode < word_mode)
     expand_reversed_crc_using_clmul (<ANYI:MODE>mode, <ANYI1:MODE>mode,
 				     operands);
   else if (TARGET_ZBKB)
@@ -1235,7 +1254,8 @@
 		      (match_operand:SUBX 3)]
 		      UNSPEC_CRC))]
   /* We don't support the case when data's size is bigger than CRC's size.  */
-  "(TARGET_ZBKC || TARGET_ZBC) && <SUBX:MODE>mode >= <SUBX1:MODE>mode"
+  "(TARGET_ZBKC || TARGET_ZBC || TARGET_ZVBC)
+   && <SUBX:MODE>mode >= <SUBX1:MODE>mode"
 {
   /* If we have the ZBC or ZBKC extension (ie, clmul) and
      it is possible to store the quotient within a single variable
@@ -1245,3 +1265,89 @@
   expand_crc_using_clmul (<SUBX:MODE>mode, <SUBX1:MODE>mode, operands);
   DONE;
 })
+
+;; If we have an XOR/IOR with a constant operand (C) and the we can
+;; synthesize ~C more efficiently than C, then synthesize ~C and use
+;; xnor/orn instead.
+;;
+;; The same can be done for AND, but mvconst_internal's issues get in
+;; the way.  That's future work.
+(define_split
+  [(set (match_operand:X 0 "register_operand")
+	(any_or:X (match_operand:X 1 "register_operand")
+		  (match_operand:X 2 "const_int_operand")))
+   (clobber (match_operand:X 3 "register_operand"))]
+  "TARGET_ZBB
+   && (riscv_const_insns (operands[2], true)
+       > riscv_const_insns (GEN_INT (~INTVAL (operands[2])), true))"
+  [(const_int 0)]
+{
+  /* Get the inverted constant into the temporary register.  */
+  riscv_emit_move (operands[3], GEN_INT (~INTVAL (operands[2])));
+
+  /* For xnor, the NOT operation is in a different position.  So
+     we have to customize the split code we generate a bit.
+
+     It is expected that AND will be handled like IOR in the future.  */
+  if (<CODE> == XOR)
+    {
+      rtx x = gen_rtx_XOR (<X:MODE>mode, operands[1], operands[3]);
+      x = gen_rtx_NOT (<X:MODE>mode, x);
+      emit_insn (gen_rtx_SET (operands[0], x));
+    }
+  else
+    {
+      rtx x = gen_rtx_NOT (<X:MODE>mode, operands[3]);
+      x = gen_rtx_IOR (<X:MODE>mode, x, operands[1]);
+      emit_insn (gen_rtx_SET (operands[0], x));
+    }
+  DONE;
+})
+
+;; More forms of single bit extraction.  The RISC-V port does not
+;; define SHIFT_COUNT_TRUNCATED so we need forms where the bit position
+;; is masked.
+;;
+;; We could in theory use this for rv32 as well, but it probably does
+;; not occur in practice.  The bit position would need to be QI/HI mode,
+;; otherwise we would not need the zero extension.
+;;
+;; One could also argue that the zero extension is redundant and should
+;; have been optimized away during RTL simplification.
+(define_insn "*bextdi_position_ze_masked"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extract:DI (match_operand:DI 1 "register_operand" "r")
+			 (const_int 1)
+			 (zero_extend:DI
+			  (and:SI (match_operand:SI 2 "register_operand" "r")
+				  (const_int 63)))))]
+  "TARGET_64BIT && TARGET_ZBS"
+  "bext\t%0,%1,%2"
+  [(set_attr "type" "bitmanip")])
+
+;; Same as above, but without the extraneous zero_extend.
+(define_insn "*bextdi_position_ze_masked"
+  [(set (match_operand:X 0 "register_operand" "=r")
+	(zero_extract:X
+	  (match_operand:X 1 "register_operand" "r")
+	  (const_int 1)
+	  (and:X (match_operand:SI 2 "register_operand" "r")
+		 (match_operand:SI 3 "bitpos_mask_operand" "n"))))]
+  "TARGET_64BIT && TARGET_ZBS"
+  "bext\t%0,%1,%2"
+  [(set_attr "type" "bitmanip")])
+
+;; This has shown up in testing.  In particular we end up with an
+;; immediate input.  We can load that into a register and target
+;; one of the above bext patterns.
+(define_split
+  [(set (match_operand:X 0 "register_operand")
+	(and:X (lshiftrt:X (match_operand 1 "immediate_operand")
+			   (match_operand:QI 2 "register_operand"))
+	       (const_int 1)))
+   (clobber (match_operand:X 3 "register_operand"))]
+  ""
+  [(set (match_dup 3) (match_dup 1))
+   (set (match_dup 0) (zero_extract:X (match_dup 3)
+				      (const_int 1)
+				      (zero_extend:X (match_dup 2))))])
diff --git a/gcc/config/riscv/constraints.md b/gcc/config/riscv/constraints.md
index ba3c6e6..979e0df 100644
--- a/gcc/config/riscv/constraints.md
+++ b/gcc/config/riscv/constraints.md
@@ -43,6 +43,10 @@
 (define_register_constraint "cf" "TARGET_HARD_FLOAT ? RVC_FP_REGS : (TARGET_ZFINX ? RVC_GR_REGS : NO_REGS)"
   "RVC floating-point registers (f8-f15), if available, reuse GPR as FPR when use zfinx.")
 
+(define_register_constraint "cR" "RVC_GR_REGS"
+  "Even-odd RVC general purpose register (x8-x15)."
+  "regno % 2 == 0")
+
 ;; General constraints
 
 (define_constraint "I"
@@ -233,10 +237,11 @@
  (and (match_code "const_vector")
       (match_test "rtx_equal_p (op, riscv_vector::gen_scalar_move_mask (GET_MODE (op)))")))
 
-(define_memory_constraint "Wdm"
+(define_constraint "Wdm"
   "Vector duplicate memory operand"
-  (and (match_code "mem")
-       (match_code "reg" "0")))
+  (and (match_test "strided_load_broadcast_p ()")
+       (and (match_code "mem")
+	    (match_code "reg" "0"))))
 
 ;; Vendor ISA extension constraints.
 
@@ -311,3 +316,21 @@
   "Shifting immediate for SIMD shufflei3."
   (and (match_code "const_int")
        (match_test "IN_RANGE (ival, -64, -1)")))
+
+(define_constraint "Ou01"
+  "A 1-bit unsigned immediate."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 1)")))
+
+(define_constraint "Ou02"
+  "A 2-bit unsigned immediate."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 3)")))
+
+(define_constraint "Q"
+  "An address operand that is valid for a prefetch instruction"
+  (match_operand 0 "prefetch_operand"))
+
+(define_address_constraint "ZD"
+  "An address operand that is valid for a mips prefetch instruction"
+  (match_test "riscv_prefetch_offset_address_p (op, mode)"))
diff --git a/gcc/config/riscv/gen-riscv-ext-opt.cc b/gcc/config/riscv/gen-riscv-ext-opt.cc
new file mode 100644
index 0000000..17b8f5b
--- /dev/null
+++ b/gcc/config/riscv/gen-riscv-ext-opt.cc
@@ -0,0 +1,105 @@
+#include <vector>
+#include <string>
+#include <set>
+#include <stdio.h>
+#include "riscv-opts.h"
+
+struct version_t
+{
+  int major;
+  int minor;
+  version_t (int major, int minor,
+	     enum riscv_isa_spec_class spec = ISA_SPEC_CLASS_NONE)
+    : major (major), minor (minor)
+  {}
+  bool operator<(const version_t &other) const
+  {
+    if (major != other.major)
+      return major < other.major;
+    return minor < other.minor;
+  }
+
+  bool operator== (const version_t &other) const
+  {
+    return major == other.major && minor == other.minor;
+  }
+};
+
+static void
+print_ext_doc_entry (const std::string &ext_name, const std::string &full_name,
+		     const std::string &desc,
+		     const std::vector<version_t> &supported_versions)
+{
+  // Implementation of the function to print the documentation entry
+  // for the extension.
+  std::set<version_t> unique_versions;
+  for (const auto &version : supported_versions)
+    unique_versions.insert (version);
+  printf ("@item %s\n", ext_name.c_str ());
+  printf ("@tab");
+  for (const auto &version : unique_versions)
+    {
+      printf (" %d.%d", version.major, version.minor);
+    }
+  printf ("\n");
+  printf ("@tab %s", full_name.c_str ());
+  if (desc.size ())
+    printf (", %s", desc.c_str ());
+  printf ("\n\n");
+}
+
+int
+main ()
+{
+  puts ("; Target options for the RISC-V port of the compiler");
+  puts (";");
+  puts ("; Copyright (C) 2025 Free Software Foundation, Inc.");
+  puts (";");
+  puts ("; This file is part of GCC.");
+  puts (";");
+  puts (
+    "; GCC is free software; you can redistribute it and/or modify it under");
+  puts (
+    "; the terms of the GNU General Public License as published by the Free");
+  puts (
+    "; Software Foundation; either version 3, or (at your option) any later");
+  puts ("; version.");
+  puts (";");
+  puts ("; GCC is distributed in the hope that it will be useful, but WITHOUT");
+  puts ("; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY");
+  puts ("; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public");
+  puts ("; License for more details.");
+  puts (";");
+  puts ("; You should have received a copy of the GNU General Public License");
+  puts ("; along with GCC; see the file COPYING3.  If not see ");
+  puts ("; <http://www.gnu.org/licenses/>.");
+
+  puts ("; This file is generated automatically using");
+  puts (";  gcc/config/riscv/gen-riscv-ext-opt.cc from:");
+  puts (";       gcc/config/riscv/riscv-ext.def");
+  puts ("");
+  puts ("; Please *DO NOT* edit manually.");
+
+  std::set<std::string> all_vars;
+#define DEFINE_RISCV_EXT(NAME, UPPERCAE_NAME, FULL_NAME, DESC, URL, DEP_EXTS,  \
+			 SUPPORTED_VERSIONS, FLAG_GROUP, BITMASK_GROUP_ID,     \
+			 BITMASK_BIT_POSITION, EXTRA_EXTENSION_FLAGS)          \
+  all_vars.insert ("riscv_" #FLAG_GROUP "_subext");
+#include "riscv-ext.def"
+#undef DEFINE_RISCV_EXT
+
+  for (auto var : all_vars)
+    {
+      puts ("TargetVariable");
+      printf ("int %s\n\n", var.c_str ());
+    }
+
+#define DEFINE_RISCV_EXT(NAME, UPPERCAE_NAME, FULL_NAME, DESC, URL, DEP_EXTS,  \
+			 SUPPORTED_VERSIONS, FLAG_GROUP, BITMASK_GROUP_ID,     \
+			 BITMASK_BIT_POSITION, EXTRA_EXTENSION_FLAGS)          \
+  puts ("Mask(" #UPPERCAE_NAME ") Var(riscv_" #FLAG_GROUP "_subext)\n");
+#include "riscv-ext.def"
+#undef DEFINE_RISCV_EXT
+
+  return 0;
+}
diff --git a/gcc/config/riscv/gen-riscv-ext-texi.cc b/gcc/config/riscv/gen-riscv-ext-texi.cc
new file mode 100644
index 0000000..c29a375
--- /dev/null
+++ b/gcc/config/riscv/gen-riscv-ext-texi.cc
@@ -0,0 +1,88 @@
+#include <vector>
+#include <string>
+#include <set>
+#include <stdio.h>
+#include "riscv-opts.h"
+
+struct version_t
+{
+  int major_version;
+  int minor_version;
+  version_t (int major, int minor,
+	     enum riscv_isa_spec_class spec = ISA_SPEC_CLASS_NONE)
+    : major_version (major), minor_version (minor)
+  {}
+  bool operator<(const version_t &other) const
+  {
+    if (major_version != other.major_version)
+      return major_version < other.major_version;
+    return minor_version < other.minor_version;
+  }
+
+  bool operator== (const version_t &other) const
+  {
+    return major_version == other.major_version && minor_version == other.minor_version;
+  }
+};
+
+static void
+print_ext_doc_entry (const std::string &ext_name, const std::string &full_name,
+		     const std::string &desc,
+		     const std::vector<version_t> &supported_versions)
+{
+  // Implementation of the function to print the documentation entry
+  // for the extension.
+  std::set<version_t> unique_versions;
+  for (const auto &version : supported_versions)
+    unique_versions.insert (version);
+  printf ("@item %s\n", ext_name.c_str ());
+  printf ("@tab");
+  for (const auto &version : unique_versions)
+    {
+      printf (" %d.%d", version.major_version, version.minor_version);
+    }
+  printf ("\n");
+  printf ("@tab %s", full_name.c_str ());
+  if (desc.size ())
+    printf (", %s", desc.c_str ());
+  printf ("\n\n");
+}
+
+int
+main ()
+{
+  puts ("@c Copyright (C) 2025 Free Software Foundation, Inc.");
+  puts ("@c This is part of the GCC manual.");
+  puts ("@c For copying conditions, see the file gcc/doc/include/fdl.texi.");
+  puts ("");
+  puts ("@c This file is generated automatically using");
+  puts ("@c  gcc/config/riscv/gen-riscv-ext-texi.cc from:");
+  puts ("@c       gcc/config/riscv/riscv-ext.def");
+  puts ("@c       gcc/config/riscv/riscv-opts.h");
+  puts ("");
+  puts ("@c Please *DO NOT* edit manually.");
+  puts ("");
+  puts ("@multitable @columnfractions .10 .10 .80");
+  puts ("@headitem Extension Name @tab Supported Version @tab Description");
+  puts ("");
+
+  /* g extension is a very speical extension that no clear version...  */
+  puts ("@item g");
+  puts ("@tab -");
+  puts (
+    "@tab General-purpose computing base extension, @samp{g} will expand to");
+  puts ("@samp{i}, @samp{m}, @samp{a}, @samp{f}, @samp{d}, @samp{zicsr} and");
+  puts ("@samp{zifencei}.");
+  puts ("");
+
+#define DEFINE_RISCV_EXT(NAME, UPPERCAE_NAME, FULL_NAME, DESC, URL, DEP_EXTS,  \
+			 SUPPORTED_VERSIONS, FLAG_GROUP, BITMASK_GROUP_ID,     \
+			 BITMASK_BIT_POSITION, EXTRA_EXTENSION_FLAGS)          \
+  print_ext_doc_entry (#NAME, FULL_NAME, DESC,                                 \
+		       std::vector<version_t> SUPPORTED_VERSIONS);
+#include "riscv-ext.def"
+#undef DEFINE_RISCV_EXT
+
+  puts ("@end multitable");
+  return 0;
+}
diff --git a/gcc/config/riscv/gen-riscv-mcpu-texi.cc b/gcc/config/riscv/gen-riscv-mcpu-texi.cc
new file mode 100644
index 0000000..9681438
--- /dev/null
+++ b/gcc/config/riscv/gen-riscv-mcpu-texi.cc
@@ -0,0 +1,43 @@
+#include <string>
+#include <vector>
+#include <stdio.h>
+
+int
+main ()
+{
+  puts ("@c Copyright (C) 2025 Free Software Foundation, Inc.");
+  puts ("@c This is part of the GCC manual.");
+  puts ("@c For copying conditions, see the file gcc/doc/include/fdl.texi.");
+  puts ("");
+  puts ("@c This file is generated automatically using");
+  puts ("@c  gcc/config/riscv/gen-riscv-mcpu-texi.cc from:");
+  puts ("@c       gcc/config/riscv/riscv-cores.def");
+  puts ("");
+  puts ("@c Please *DO NOT* edit manually.");
+  puts ("");
+  puts ("@samp{Core Name}");
+  puts ("");
+  puts ("@opindex mcpu");
+  puts ("@item -mcpu=@var{processor-string}");
+  puts ("Use architecture of and optimize the output for the given processor, specified");
+  puts ("by particular CPU name. Permissible values for this option are:");
+  puts ("");
+  puts ("");
+
+  std::vector<std::string> coreNames;
+
+#define RISCV_CORE(CORE_NAME, ARCH, MICRO_ARCH) \
+  coreNames.push_back (CORE_NAME);
+#include "riscv-cores.def"
+#undef RISCV_CORE
+
+  for (size_t i = 0; i < coreNames.size(); ++i) {
+    if (i == coreNames.size() - 1) {
+      printf("@samp{%s}.\n", coreNames[i].c_str());
+    } else {
+      printf("@samp{%s},\n\n", coreNames[i].c_str());
+    }
+  }
+
+  return 0;
+}
diff --git a/gcc/config/riscv/gen-riscv-mtune-texi.cc b/gcc/config/riscv/gen-riscv-mtune-texi.cc
new file mode 100644
index 0000000..1bdfe2a
--- /dev/null
+++ b/gcc/config/riscv/gen-riscv-mtune-texi.cc
@@ -0,0 +1,41 @@
+#include <string>
+#include <vector>
+#include <stdio.h>
+
+int
+main ()
+{
+  puts ("@c Copyright (C) 2025 Free Software Foundation, Inc.");
+  puts ("@c This is part of the GCC manual.");
+  puts ("@c For copying conditions, see the file gcc/doc/include/fdl.texi.");
+  puts ("");
+  puts ("@c This file is generated automatically using");
+  puts ("@c  gcc/config/riscv/gen-riscv-mtune-texi.cc from:");
+  puts ("@c       gcc/config/riscv/riscv-cores.def");
+  puts ("");
+  puts ("@c Please *DO NOT* edit manually.");
+  puts ("");
+  puts ("@samp{Tune Name}");
+  puts ("");
+  puts ("@opindex mtune");
+  puts ("@item -mtune=@var{processor-string}");
+  puts ("Optimize the output for the given processor, specified by microarchitecture or");
+  puts ("particular CPU name.  Permissible values for this option are:");
+  puts ("");
+  puts ("");
+
+  std::vector<std::string> tuneNames;
+
+#define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO) \
+  tuneNames.push_back (TUNE_NAME);
+#include "riscv-cores.def"
+#undef RISCV_TUNE
+
+  for (size_t i = 0; i < tuneNames.size(); ++i) {
+    printf("@samp{%s},\n\n", tuneNames[i].c_str());
+  }
+
+  puts ("and all valid options for @option{-mcpu=}.");
+
+  return 0;
+}
diff --git a/gcc/config/riscv/generic-vector-ooo.md b/gcc/config/riscv/generic-vector-ooo.md
index cb71941..773003b 100644
--- a/gcc/config/riscv/generic-vector-ooo.md
+++ b/gcc/config/riscv/generic-vector-ooo.md
@@ -17,6 +17,9 @@
 ;; <http://www.gnu.org/licenses/>.
 ;; Vector load/store
 
+;; The insn reservations include "generic" as we won't have a in-order
+;; generic definition for vector instructions.
+
 (define_automaton "vector_ooo")
 
 ;; Separate issue queue for vector instructions.
@@ -29,115 +32,141 @@
 (define_cpu_unit "vxu_ooo_multicycle" "vector_ooo")
 
 (define_insn_reservation "vec_load" 6
-  (eq_attr "type" "vlde,vldm,vlds,vldux,vldox,vldff,vldr")
+  (and (eq_attr "tune" "generic_ooo,generic")
+       (eq_attr "type" "vlde,vldm,vlds,vldux,vldox,vldff,vldr"))
   "vxu_ooo_issue,vxu_ooo_alu")
 
 (define_insn_reservation "vec_store" 6
-  (eq_attr "type" "vste,vstm,vsts,vstux,vstox,vstr")
+  (and (eq_attr "tune" "generic_ooo,generic")
+       (eq_attr "type" "vste,vstm,vsts,vstux,vstox,vstr"))
   "vxu_ooo_issue,vxu_ooo_alu")
 
 ;; Vector segment loads/stores.
 (define_insn_reservation "vec_loadstore_seg" 10
-  (eq_attr "type" "vlsegde,vlsegds,vlsegdux,vlsegdox,vlsegdff,\
-		   vssegte,vssegts,vssegtux,vssegtox")
+  (and (eq_attr "tune" "generic_ooo,generic")
+       (eq_attr "type" "vlsegde,vlsegds,vlsegdux,vlsegdox,vlsegdff,\
+		        vssegte,vssegts,vssegtux,vssegtox"))
   "vxu_ooo_issue,vxu_ooo_alu")
 
 ;; Regular vector operations and integer comparisons.
 (define_insn_reservation "vec_alu" 3
-  (eq_attr "type" "vialu,viwalu,vext,vicalu,vshift,vnshift,viminmax,vicmp,\
-		   vimov,vsalu,vaalu,vsshift,vnclip,vmov,vfmov,vector,\
-		   vandn,vbrev,vbrev8,vrev8,vclz,vctz,vrol,vror,vwsll")
+  (and (eq_attr "tune" "generic_ooo,generic")
+       (eq_attr "type" "vialu,viwalu,vext,vicalu,vshift,vnshift,viminmax,vicmp,\
+		        vimov,vsalu,vaalu,vsshift,vnclip,vmov,vfmov,vector,\
+		        vandn,vbrev,vbrev8,vrev8,vclz,vctz,vrol,vror,vwsll"))
   "vxu_ooo_issue,vxu_ooo_alu")
 
 ;; Vector float comparison, conversion etc.
 (define_insn_reservation "vec_fcmp" 3
-  (eq_attr "type" "vfrecp,vfminmax,vfcmp,vfsgnj,vfclass,vfcvtitof,\
-                   vfcvtftoi,vfwcvtitof,vfwcvtftoi,vfwcvtftof,vfncvtitof,\
-                   vfncvtftoi,vfncvtftof,vfncvtbf16,vfwcvtbf16")
+  (and (eq_attr "tune" "generic_ooo,generic")
+       (eq_attr "type" "vfrecp,vfminmax,vfcmp,vfsgnj,vfclass,vfcvtitof,\
+                        vfcvtftoi,vfwcvtitof,vfwcvtftoi,vfwcvtftof,vfncvtitof,\
+                        vfncvtftoi,vfncvtftof,vfncvtbf16,vfwcvtbf16"))
   "vxu_ooo_issue,vxu_ooo_alu")
 
 ;; Vector integer multiplication.
 (define_insn_reservation "vec_imul" 4
-  (eq_attr "type" "vimul,viwmul,vimuladd,viwmuladd,vsmul,vclmul,vclmulh,\
-                   vghsh,vgmul")
+  (and (eq_attr "tune" "generic_ooo,generic")
+       (eq_attr "type" "vimul,viwmul,vimuladd,viwmuladd,vsmul,vclmul,vclmulh,\
+                        vghsh,vgmul"))
   "vxu_ooo_issue,vxu_ooo_alu")
 
 ;; Vector float addition.
 (define_insn_reservation "vec_fadd" 4
-  (eq_attr "type" "vfalu,vfwalu")
+  (and (eq_attr "tune" "generic_ooo,generic")
+       (eq_attr "type" "vfalu,vfwalu"))
   "vxu_ooo_issue,vxu_ooo_alu")
 
 ;; Vector float multiplication and FMA.
 (define_insn_reservation "vec_fmul" 6
-  (eq_attr "type" "vfmul,vfwmul,vfmuladd,vfwmuladd,vfwmaccbf16,sf_vqmacc,sf_vfnrclip")
+  (and (eq_attr "tune" "generic_ooo,generic")
+       (eq_attr "type" "vfmul,vfwmul,vfmuladd,vfwmuladd,vfwmaccbf16,sf_vqmacc,sf_vfnrclip"))
   "vxu_ooo_issue,vxu_ooo_alu")
 
 ;; Vector crypto, assumed to be a generic operation for now.
 (define_insn_reservation "vec_crypto" 4
-  (eq_attr "type" "crypto,vclz,vctz,vcpop")
+  (and (eq_attr "tune" "generic_ooo,generic")
+       (eq_attr "type" "crypto,vclz,vctz,vcpop"))
   "vxu_ooo_issue,vxu_ooo_alu")
 
 ;; Vector crypto, AES
 (define_insn_reservation "vec_crypto_aes" 4
-  (eq_attr "type" "vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,vaesz")
+  (and (eq_attr "tune" "generic_ooo,generic")
+       (eq_attr "type" "vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,vaesz"))
   "vxu_ooo_issue,vxu_ooo_alu")
 
 ;; Vector crypto, sha
 (define_insn_reservation "vec_crypto_sha" 4
-  (eq_attr "type" "vsha2ms,vsha2ch,vsha2cl")
+  (and (eq_attr "tune" "generic_ooo,generic")
+       (eq_attr "type" "vsha2ms,vsha2ch,vsha2cl"))
   "vxu_ooo_issue,vxu_ooo_alu")
 
 ;; Vector crypto, SM3/4
 (define_insn_reservation "vec_crypto_sm" 4
-  (eq_attr "type" "vsm4k,vsm4r,vsm3me,vsm3c")
+  (and (eq_attr "tune" "generic_ooo,generic")
+       (eq_attr "type" "vsm4k,vsm4r,vsm3me,vsm3c"))
   "vxu_ooo_issue,vxu_ooo_alu")
 
 ;; Vector permute.
 (define_insn_reservation "vec_perm" 3
-  (eq_attr "type" "vimerge,vfmerge,vslideup,vslidedown,vislide1up,\
-                   vislide1down,vfslide1up,vfslide1down,vgather,vcompress")
+  (and (eq_attr "tune" "generic_ooo,generic")
+       (eq_attr "type" "vimerge,vfmerge,vslideup,vslidedown,vislide1up,\
+                        vislide1down,vfslide1up,vfslide1down,vgather,vcompress"))
   "vxu_ooo_issue,vxu_ooo_alu")
 
 ;; Vector reduction.
 (define_insn_reservation "vec_reduction" 8
-  (eq_attr "type" "vired,viwred,vfredu,vfwredu")
+  (and (eq_attr "tune" "generic_ooo,generic")
+       (eq_attr "type" "vired,viwred,vfredu,vfwredu"))
   "vxu_ooo_issue,vxu_ooo_multicycle")
 
 ;; Vector ordered reduction, assume the latency number is for
 ;; a 128-bit vector.  It is scaled in riscv_sched_adjust_cost
 ;; for larger vectors.
 (define_insn_reservation "vec_ordered_reduction" 10
-  (eq_attr "type" "vfredo,vfwredo")
+  (and (eq_attr "tune" "generic_ooo,generic")
+       (eq_attr "type" "vfredo,vfwredo"))
   "vxu_ooo_issue,vxu_ooo_multicycle*3")
 
 ;; Vector integer division, assume not pipelined.
 (define_insn_reservation "vec_idiv" 16
-  (eq_attr "type" "vidiv")
+  (and (eq_attr "tune" "generic_ooo,generic")
+       (eq_attr "type" "vidiv"))
   "vxu_ooo_issue,vxu_ooo_multicycle*3")
 
 ;; Vector float divisions and sqrt, assume not pipelined.
 (define_insn_reservation "vec_float_divsqrt" 16
-  (eq_attr "type" "vfdiv,vfsqrt")
+  (and (eq_attr "tune" "generic_ooo,generic")
+       (eq_attr "type" "vfdiv,vfsqrt"))
   "vxu_ooo_issue,vxu_ooo_multicycle*3")
 
 ;; Vector mask operations.
 (define_insn_reservation "vec_mask" 2
-  (eq_attr "type" "vmalu,vmpop,vmffs,vmsfs,vmiota,vmidx,vimovvx,vimovxv,\
-                   vfmovvf,vfmovfv")
+  (and (eq_attr "tune" "generic_ooo,generic")
+       (eq_attr "type" "vmalu,vmpop,vmffs,vmsfs,vmiota,vmidx,vimovvx,vimovxv,\
+                        vfmovvf,vfmovfv"))
   "vxu_ooo_issue,vxu_ooo_alu")
 
 ;; Vector vsetvl.
 (define_insn_reservation "vec_vesetvl" 1
-  (eq_attr "type" "vsetvl,vsetvl_pre")
+  (and (eq_attr "tune" "generic_ooo,generic")
+       (eq_attr "type" "vsetvl,vsetvl_pre"))
   "vxu_ooo_issue")
 
 ;; Vector rounding mode setters, assume pipeline barrier.
 (define_insn_reservation "vec_setrm" 20
-  (eq_attr "type" "wrvxrm,wrfrm")
+  (and (eq_attr "tune" "generic_ooo,generic")
+       (eq_attr "type" "wrvxrm,wrfrm"))
   "vxu_ooo_issue,vxu_ooo_issue*3")
 
 ;; Vector read vlen/vlenb.
 (define_insn_reservation "vec_readlen" 4
-  (eq_attr "type" "rdvlenb,rdvl")
+  (and (eq_attr "tune" "generic_ooo,generic")
+       (eq_attr "type" "rdvlenb,rdvl"))
   "vxu_ooo_issue,vxu_ooo_issue")
 
+;; Vector sf_vcp.
+(define_insn_reservation "vec_sf_vcp" 2
+  (and (eq_attr "tune" "generic_ooo,generic")
+       (eq_attr "type" "sf_vc,sf_vc_se"))
+  "vxu_ooo_issue")
diff --git a/gcc/config/riscv/genrvv-type-indexer.cc b/gcc/config/riscv/genrvv-type-indexer.cc
index 6de23cb6..f296089 100644
--- a/gcc/config/riscv/genrvv-type-indexer.cc
+++ b/gcc/config/riscv/genrvv-type-indexer.cc
@@ -23,8 +23,14 @@ along with GCC; see the file COPYING3.  If not see
 #include <assert.h>
 #include <math.h>
 
-#define BOOL_SIZE_LIST {1, 2, 4, 8, 16, 32, 64}
-#define EEW_SIZE_LIST {8, 16, 32, 64}
+#define BOOL_SIZE_LIST                                                         \
+  {                                                                            \
+    1, 2, 4, 8, 16, 32, 64                                                     \
+  }
+#define EEW_SIZE_LIST                                                          \
+  {                                                                            \
+    8, 16, 32, 64                                                              \
+  }
 #define LMUL1_LOG2 0
 
 std::string
@@ -167,7 +173,7 @@ floattype (unsigned sew, int lmul_log2)
 std::string
 expand_floattype (unsigned sew, int lmul_log2, unsigned nf)
 {
-  if (sew != 8 || nf!= 1
+  if (sew != 8 || nf != 1
       || (!valid_type (sew * 4, lmul_log2 + 2, /*float_t*/ true)))
     return "INVALID";
 
@@ -297,11 +303,13 @@ main (int argc, const char **argv)
 
       for (unsigned eew : EEW_SIZE_LIST)
 	fprintf (fp, "  /*SIGNED_EEW%d_LMUL1_INTERPRET*/ %s,\n", eew,
-		 inttype (eew, LMUL1_LOG2, /* unsigned_p */false).c_str ());
+		 inttype (eew, LMUL1_LOG2, /* unsigned_p */ false).c_str ());
 
       for (unsigned eew : EEW_SIZE_LIST)
 	fprintf (fp, "  /*UNSIGNED_EEW%d_LMUL1_INTERPRET*/ %s,\n", eew,
-		 inttype (eew, LMUL1_LOG2, /* unsigned_p */true).c_str ());
+		 inttype (eew, LMUL1_LOG2, /* unsigned_p */ true).c_str ());
+
+      fprintf (fp, "  /*X2*/ INVALID,\n");
 
       for (unsigned lmul_log2_offset : {1, 2, 3, 4, 5, 6})
 	{
@@ -426,6 +434,10 @@ main (int argc, const char **argv)
 	      fprintf (fp, "  /*UNSIGNED_EEW%d_LMUL1_INTERPRET*/ INVALID,\n",
 		       eew);
 
+	    fprintf (
+	      fp, "  /*X2*/ %s,\n",
+	      inttype (sew * 2, lmul_log2 + 1, /*unsigned_p*/ true).c_str ());
+
 	    for (unsigned lmul_log2_offset : {1, 2, 3, 4, 5, 6})
 	      {
 		unsigned multiple_of_lmul = 1 << lmul_log2_offset;
@@ -501,6 +513,8 @@ main (int argc, const char **argv)
 	for (unsigned eew : EEW_SIZE_LIST)
 	  fprintf (fp, "  /*UNSIGNED_EEW%d_LMUL1_INTERPRET*/ INVALID,\n", eew);
 
+	fprintf (fp, "  /*X2*/ INVALID,\n");
+
 	for (unsigned lmul_log2_offset : {1, 2, 3, 4, 5, 6})
 	  {
 	    unsigned multiple_of_lmul = 1 << lmul_log2_offset;
@@ -588,6 +602,8 @@ main (int argc, const char **argv)
 	    fprintf (fp, "  /*UNSIGNED_EEW%d_LMUL1_INTERPRET*/ INVALID,\n",
 		     eew);
 
+	  fprintf (fp, "  /*X2*/ INVALID,\n");
+
 	  for (unsigned lmul_log2_offset : {1, 2, 3, 4, 5, 6})
 	    {
 	      unsigned multiple_of_lmul = 1 << lmul_log2_offset;
diff --git a/gcc/config/riscv/gnu.h b/gcc/config/riscv/gnu.h
new file mode 100644
index 0000000..047399b
--- /dev/null
+++ b/gcc/config/riscv/gnu.h
@@ -0,0 +1,59 @@
+/* Definitions for RISC-V GNU/Hurd systems with ELF format.
+   Copyright (C) 1998-2025 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_OS_CPP_BUILTINS()				\
+  do {								\
+    GNU_USER_TARGET_OS_CPP_BUILTINS();				\
+  } while (0)
+
+#define GNU_USER_DYNAMIC_LINKER "/lib/ld-riscv" XLEN_SPEC "-" ABI_SPEC ".so.1"
+
+#define ICACHE_FLUSH_FUNC "__riscv_flush_icache"
+
+#define CPP_SPEC "%{pthread:-D_REENTRANT}"
+
+#define LD_EMUL_SUFFIX \
+  "%{mabi=lp64d:}" \
+  "%{mabi=lp64f:_lp64f}" \
+  "%{mabi=lp64:_lp64}" \
+  "%{mabi=ilp32d:}" \
+  "%{mabi=ilp32f:_ilp32f}" \
+  "%{mabi=ilp32:_ilp32}"
+
+#define LINK_SPEC "\
+-melf" XLEN_SPEC DEFAULT_ENDIAN_SPEC "riscv" LD_EMUL_SUFFIX " \
+%{mno-relax:--no-relax} \
+-X \
+%{mbig-endian:-EB} \
+%{mlittle-endian:-EL} \
+%{shared} \
+  %{!shared: \
+    %{!static: \
+      %{!static-pie: \
+	%{rdynamic:-export-dynamic} \
+	-dynamic-linker " GNU_USER_DYNAMIC_LINKER "}} \
+    %{static:-static} %{static-pie:-static -pie --no-dynamic-linker -z text}}"
+
+#define STARTFILE_PREFIX_SPEC 			\
+   "/lib" XLEN_SPEC "/" ABI_SPEC "/ "		\
+   "/usr/lib" XLEN_SPEC "/" ABI_SPEC "/ "	\
+   "/lib/ "					\
+   "/usr/lib/ "
+
+#define RISCV_USE_CUSTOMISED_MULTI_LIB select_by_abi
diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index 214c20b..584b345 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -262,6 +262,9 @@
 
 (define_code_attr fix_uns [(fix "fix") (unsigned_fix "fixuns")])
 
+(define_code_attr OPTAB [(ior "IOR")
+                         (xor "XOR")])
+
 
 ;; -------------------------------------------------------------------
 ;; Code Attributes
diff --git a/gcc/config/riscv/mips-insn.md b/gcc/config/riscv/mips-insn.md
new file mode 100644
index 0000000..ad46026
--- /dev/null
+++ b/gcc/config/riscv/mips-insn.md
@@ -0,0 +1,35 @@
+;; Machine description for MIPS custom instructions.
+;; Copyright (C) 2025 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_insn "*mov<GPR:mode><X:mode>cc_bitmanip"
+  [(set (match_operand:GPR 0 "register_operand" "=r")
+	(if_then_else:GPR (any_eq:X (match_operand:X 1 "register_operand" "r")
+				    (match_operand:X 2 "const_0_operand" "J"))
+			  (match_operand:GPR 3 "reg_or_0_operand" "rJ")
+			  (match_operand:GPR 4 "reg_or_0_operand" "rJ")))]
+  "TARGET_XMIPSCMOV"
+{
+  enum rtx_code code = <CODE>;
+  if (code == NE)
+    return "mips.ccmov\t%0,%1,%z3,%z4";
+  else
+    return "mips.ccmov\t%0,%1,%z4,%z3";
+}
+[(set_attr "type" "condmove")
+ (set_attr "mode" "<GPR:MODE>")])
diff --git a/gcc/config/riscv/mips-p8700.md b/gcc/config/riscv/mips-p8700.md
new file mode 100644
index 0000000..fac9abb
--- /dev/null
+++ b/gcc/config/riscv/mips-p8700.md
@@ -0,0 +1,167 @@
+;; DFA-based pipeline description for MIPS P8700.
+;;
+;; Copyright (C) 2025 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "mips_p8700_agen_alq_pipe, mips_p8700_mdu_pipe, mips_p8700_fpu_pipe")
+
+;; The address generation queue (AGQ) has AL2, CTISTD and LDSTA pipes
+(define_cpu_unit "mips_p8700_agq, mips_p8700_al2, mips_p8700_ctistd, mips_p8700_lsu"
+		 "mips_p8700_agen_alq_pipe")
+
+(define_cpu_unit "mips_p8700_gpmul, mips_p8700_gpdiv" "mips_p8700_mdu_pipe")
+
+;; The arithmetic-logic-unit queue (ALQ) has ALU pipe
+(define_cpu_unit "mips_p8700_alq, mips_p8700_alu" "mips_p8700_agen_alq_pipe")
+
+;; The floating-point-unit queue (FPQ) has short and long pipes
+(define_cpu_unit "mips_p8700_fpu_short, mips_p8700_fpu_long" "mips_p8700_fpu_pipe")
+
+;; Long FPU pipeline.
+(define_cpu_unit "mips_p8700_fpu_apu" "mips_p8700_fpu_pipe")
+
+;; P8700 unsupported insns are mapped to dummies reservations
+(define_reservation "mips_p8700_dummies"
+ "mips_p8700_agq |  mips_p8700_al2 |  mips_p8700_ctistd |  mips_p8700_lsu |
+ mips_p8700_fpu_short |  mips_p8700_fpu_long")
+
+(define_reservation "mips_p8700_agq_al2" "mips_p8700_agq, mips_p8700_al2")
+(define_reservation "mips_p8700_agq_ctistd" "mips_p8700_agq, mips_p8700_ctistd")
+(define_reservation "mips_p8700_agq_lsu" "mips_p8700_agq, mips_p8700_lsu")
+(define_reservation "mips_p8700_alq_alu" "mips_p8700_alq, mips_p8700_alu")
+
+;;
+;; FPU pipe
+;;
+
+(define_insn_reservation "mips_p8700_fpu_fadd" 4
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "fadd"))
+  "mips_p8700_fpu_long, mips_p8700_fpu_apu")
+
+(define_insn_reservation "mips_p8700_fpu_fabs" 2
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "fcmp,fmove"))
+  "mips_p8700_fpu_short, mips_p8700_fpu_apu")
+
+(define_insn_reservation "mips_p8700_fpu_fload" 8
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "fpload"))
+  "mips_p8700_agq_lsu")
+
+(define_insn_reservation "mips_p8700_fpu_fstore" 1
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "fpstore"))
+  "mips_p8700_agq_lsu")
+
+(define_insn_reservation "mips_p8700_fpu_fmadd" 8
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "fmadd"))
+  "mips_p8700_fpu_long, mips_p8700_fpu_apu")
+
+(define_insn_reservation "mips_p8700_fpu_fmul" 5
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "fmul"))
+  "mips_p8700_fpu_long, mips_p8700_fpu_apu")
+
+(define_insn_reservation "mips_p8700_fpu_div" 17
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "fdiv,fsqrt"))
+  "mips_p8700_fpu_long, mips_p8700_fpu_apu*17")
+
+(define_insn_reservation "mips_p8700_fpu_fcvt" 4
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "fcvt,fcvt_i2f,fcvt_f2i"))
+  "mips_p8700_fpu_long, mips_p8700_fpu_apu")
+
+(define_insn_reservation "mips_p8700_fpu_fmtc" 7
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "mtc"))
+  "mips_p8700_agq_lsu")
+
+(define_insn_reservation "mips_p8700_fpu_fmfc" 7
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "mfc"))
+  "mips_p8700_agq_lsu")
+
+;;
+;; Integer pipe
+;;
+
+(define_insn_reservation "mips_p8700_int_load" 4
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "load"))
+  "mips_p8700_agq_lsu")
+
+(define_insn_reservation "mips_p8700_int_store" 3
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "store"))
+  "mips_p8700_agq_lsu")
+
+(define_insn_reservation "mips_p8700_int_arith_1" 1
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "unknown,const,arith,shift,slt,multi,auipc,logical,move,bitmanip,min,max,minu,maxu,clz,ctz,rotate,atomic,condmove,crypto,mvpair,zicond"))
+  "mips_p8700_alq_alu | mips_p8700_agq_al2")
+
+(define_insn_reservation "mips_p8700_int_nop" 0
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "nop"))
+  "mips_p8700_alq_alu | mips_p8700_agq_al2")
+
+(define_insn_reservation "mips_p8700_dsp_mult" 4
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "imul,cpop,clmul"))
+  "mips_p8700_gpmul")
+
+(define_insn_reservation "mips_p8700_int_div" 8
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "idiv"))
+  "mips_p8700_gpdiv*5")
+
+(define_insn_reservation "mips_p8700_int_branch" 1
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "branch,jump,ret,sfb_alu,trap"))
+  "mips_p8700_agq_ctistd")
+
+(define_insn_reservation "mips_p8700_int_call" 2
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "call,jalr"))
+  "mips_p8700_agq_ctistd")
+
+;; mips-p8700 dummies insn and placeholder that had no mapping to p8700 hardware.
+(define_insn_reservation "mips_p8700_unknown" 1
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "rdvlenb,rdvl,wrvxrm,wrfrm,
+   rdfrm,vsetvl,vsetvl_pre,vlde,vste,vldm,vstm,vlds,vsts,
+   vldux,vldox,vstux,vstox,vldff,vldr,vstr,
+   vlsegde,vssegte,vlsegds,vssegts,vlsegdux,vlsegdox,vssegtux,vssegtox,vlsegdff,
+   vialu,viwalu,vext,vicalu,vshift,vnshift,vicmp,viminmax,
+   vimul,vidiv,viwmul,vimuladd,sf_vqmacc,viwmuladd,vimerge,vimov,
+   vsalu,vaalu,vsmul,vsshift,vnclip,sf_vfnrclip,
+   vfalu,vfwalu,vfmul,vfdiv,vfwmul,vfmuladd,vfwmuladd,vfsqrt,vfrecp,
+   vfcmp,vfminmax,vfsgnj,vfclass,vfmerge,vfmov,
+   vfcvtitof,vfcvtftoi,vfwcvtitof,vfwcvtftoi,
+   vfwcvtftof,vfncvtitof,vfncvtftoi,vfncvtftof,
+   vired,viwred,vfredu,vfredo,vfwredu,vfwredo,
+   vmalu,vmpop,vmffs,vmsfs,vmiota,vmidx,vimovvx,vimovxv,vfmovvf,vfmovfv,
+   vslideup,vslidedown,vislide1up,vislide1down,vfslide1up,vfslide1down,
+   vgather,vcompress,vmov,vector,vandn,vbrev,vbrev8,vrev8,vclz,vctz,vcpop,vrol,vror,vwsll,
+   vclmul,vclmulh,vghsh,vgmul,vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,vaesz,
+   vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,vfncvtbf16,vfwcvtbf16,vfwmaccbf16,
+   sf_vc,sf_vc_se,ghost"))
+  "mips_p8700_dummies")
diff --git a/gcc/config/riscv/pipeline-checker b/gcc/config/riscv/pipeline-checker
new file mode 100755
index 0000000..815698b
--- /dev/null
+++ b/gcc/config/riscv/pipeline-checker
@@ -0,0 +1,191 @@
+#!/usr/bin/env python3
+
+# RISC-V pipeline model checker.
+# Copyright (C) 2025 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+import re
+import sys
+import argparse
+from pathlib import Path
+from typing import List
+import pprint
+
+def remove_line_comments(text: str) -> str:
+    # Remove ';;' and everything after it on each line
+    cleaned_lines = []
+    for line in text.splitlines():
+        comment_index = line.find(';;')
+        if comment_index != -1:
+            line = line[:comment_index]
+        cleaned_lines.append(line)
+    return '\n'.join(cleaned_lines)
+
+
+def tokenize_sexpr(s: str) -> List[str]:
+    # Tokenize input string, including support for balanced {...} C blocks
+    tokens = []
+    i = 0
+    while i < len(s):
+        c = s[i]
+        if c.isspace():
+            i += 1
+        elif c == '(' or c == ')':
+            tokens.append(c)
+            i += 1
+        elif c == '"':
+            # Parse quoted string
+            j = i + 1
+            while j < len(s) and s[j] != '"':
+                if s[j] == '\\':
+                    j += 1  # Skip escape
+                j += 1
+            tokens.append(s[i:j+1])
+            i = j + 1
+        elif c == '{':
+            # Parse balanced C block
+            depth = 1
+            j = i + 1
+            while j < len(s) and depth > 0:
+                if s[j] == '{':
+                    depth += 1
+                elif s[j] == '}':
+                    depth -= 1
+                j += 1
+            tokens.append(s[i:j])  # Include enclosing braces
+            i = j
+        else:
+            # Parse atom
+            j = i
+            while j < len(s) and not s[j].isspace() and s[j] not in '()"{}':
+                j += 1
+            tokens.append(s[i:j])
+            i = j
+    return tokens
+
+
+def parse_sexpr(tokens: List[str]) -> any:
+    # Recursively parse tokenized S-expression
+    token = tokens.pop(0)
+    if token == '(':
+        lst = []
+        while tokens[0] != ')':
+            lst.append(parse_sexpr(tokens))
+        tokens.pop(0)  # Discard closing parenthesis
+        return lst
+    elif token.startswith('"') and token.endswith('"'):
+        return token[1:-1]  # Remove surrounding quotes
+    elif token.startswith('{') and token.endswith('}'):
+        return token  # Keep C code block as-is
+    else:
+        return token
+
+
+def find_define_attr_type(ast: any) -> List[List[str]]:
+    # Traverse AST to find all (define_attr "type" ...) entries
+    result = []
+    if isinstance(ast, list):
+        if ast and ast[0] == 'define_attr' and len(ast) >= 2 and ast[1] == 'type':
+            result.append(ast)
+        for elem in ast:
+            result.extend(find_define_attr_type(elem))
+    return result
+
+
+def parse_md_file(path: Path):
+    # Read file, remove comments, and parse all top-level S-expressions
+    with open(path, encoding='utf-8') as f:
+        raw_content = f.read()
+    clean_content = remove_line_comments(raw_content)
+    tokens = tokenize_sexpr(clean_content)
+    items = []
+    while tokens:
+        items.append(parse_sexpr(tokens))
+    return items
+
+def parsing_str_set(s: str) -> set:
+    s = s.replace('\\','').split(',')
+    s = set(map(lambda x: x.strip(), s))
+    return s
+
+def get_avaliable_types(md_file_path: str):
+    # Main logic: parse input file and print define_attr "type" expressions
+    ast = parse_md_file(Path(md_file_path))
+
+    # Get all type from define_attr type
+    define_attr_types = find_define_attr_type(ast)
+    types = parsing_str_set (define_attr_types[0][2])
+    return types
+
+def get_consumed_type(entry: List[str]) -> set:
+    # Extract the consumed type from a define_insn_reservation entry
+    current_type = entry[0]
+    if current_type in ['and', 'or']:
+        return get_consumed_type(entry[1]) | get_consumed_type(entry[2])
+    elif current_type == 'eq_attr' and entry[1] == 'type':
+        return parsing_str_set(entry[2])
+    return set()
+
+def check_pipemodel(md_file_path: str):
+    # Load the RISCV MD file and check for pipemodel
+    ast = parse_md_file(Path(md_file_path))
+
+    consumed_type = set()
+
+    for entry in ast:
+        entry_type = entry[0]
+        if entry_type not in ["define_insn_reservation"]:
+            continue
+        consumed_type |= get_consumed_type(entry[3])
+    return consumed_type
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Check GCC pipeline model for instruction type coverage')
+    parser.add_argument('pipeline_model', help='Pipeline model file to check')
+    parser.add_argument('--base-md',
+                        help='Base machine description file (default: riscv.md in script directory)',
+                        default=None)
+    parser.add_argument('-v', '--verbose',
+                        help='Show detailed type information',
+                        action='store_true')
+    args = parser.parse_args()
+
+    # Set default base-md path if not provided
+    if args.base_md is None:
+        script_dir = Path(__file__).parent
+        base_md_path = script_dir / "riscv.md"
+    else:
+        base_md_path = Path(args.base_md)
+    avaliable_types = get_avaliable_types(str(base_md_path))
+    consumed_type = check_pipemodel(args.pipeline_model)
+
+    if args.verbose:
+        print("Available types:\n", avaliable_types)
+        print("Consumed types:\n", consumed_type)
+
+    if not avaliable_types.issubset(consumed_type):
+        print("Error: Some types are not consumed by the pipemodel")
+        print("Missing types:\n", avaliable_types - consumed_type)
+        sys.exit(1)
+    else:
+        print("All available types are consumed by the pipemodel.")
+
+
+if __name__ == '__main__':
+    main()
diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index f26bafc..bdb3d22 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -27,6 +27,36 @@
   (ior (match_operand 0 "const_arith_operand")
        (match_operand 0 "register_operand")))
 
+(define_predicate "prefetch_const_operand"
+  (and (match_code "const_int")
+       (match_test "(IN_RANGE (INTVAL (op),  0, 511))")))
+
+;; REG or REG+D where D fits in a simm12 and has the low 5 bits
+;; off.  The REG+D form can be reloaded into a temporary if needed
+;; after FP elimination if that exposes an invalid offset.
+(define_predicate "zicbop_prefetch_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_test "const_arith_operand (op, VOIDmode)")
+	    (match_test "(INTVAL (op) & 0x1f) == 0"))
+       (and (match_code "plus")
+	    (match_test "register_operand (XEXP (op, 0), word_mode)")
+	    (match_test "const_arith_operand (XEXP (op, 1), VOIDmode)")
+	    (match_test "(INTVAL (XEXP (op, 1)) & 0x1f) == 0"))))
+
+;; REG or REG+D where D fits in a uimm9
+(define_predicate "mips_prefetch_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_test "prefetch_const_operand (op, VOIDmode)")
+       (and (match_code "plus")
+	(match_test "register_operand (XEXP (op, 0), word_mode)")
+	(match_test "prefetch_const_operand (XEXP (op, 1), VOIDmode)"))))
+
+;; MIPS specific or Standard RISCV Extension
+(define_predicate "prefetch_operand"
+  (if_then_else (match_test "TARGET_XMIPSCBOP")
+      (match_operand 0 "mips_prefetch_operand")
+      (match_operand 0 "zicbop_prefetch_operand")))
+
 (define_predicate "lui_operand"
   (and (match_code "const_int")
        (match_test "LUI_OPERAND (INTVAL (op))")))
@@ -380,14 +410,6 @@
   (and (match_code "const_int")
        (match_test "SINGLE_BIT_MASK_OPERAND (UINTVAL (op))")))
 
-;; Register, small constant or single bit constant for use in
-;; bseti/binvi.
-(define_predicate "arith_or_zbs_operand"
-  (ior (match_operand 0 "const_arith_operand")
-       (match_operand 0 "register_operand")
-       (and (match_test "TARGET_ZBS")
-	    (match_operand 0 "single_bit_mask_operand"))))
-
 (define_predicate "not_single_bit_mask_operand"
   (and (match_code "const_int")
        (match_test "SINGLE_BIT_MASK_OPERAND (~UINTVAL (op))")))
@@ -514,6 +536,10 @@
 
 (define_predicate "vector_broadcast_mask_operand"
   (ior (match_operand 0 "vector_least_significant_set_mask_operand")
+       (match_operand 0 "vector_all_trues_mask_operand")))
+
+(define_predicate "strided_broadcast_mask_operand"
+  (ior (match_operand 0 "vector_least_significant_set_mask_operand")
     (ior (match_operand 0 "register_operand")
          (match_operand 0 "vector_all_trues_mask_operand"))))
 
@@ -613,7 +639,16 @@
 
 ;; The scalar operand can be directly broadcast by RVV instructions.
 (define_predicate "direct_broadcast_operand"
-  (match_test "riscv_vector::can_be_broadcasted_p (op)"))
+  (match_test "riscv_vector::can_be_broadcast_p (op)"))
+
+;; A strided broadcast is just a fallback pattern that loads from
+;; memory.
+(define_predicate "strided_broadcast_operand"
+  (match_test "riscv_vector::strided_broadcast_p (op)"))
+
+(define_predicate "any_broadcast_operand"
+  (ior (match_operand 0 "direct_broadcast_operand")
+       (match_operand 0 "strided_broadcast_operand")))
 
 ;; A CONST_INT operand that has exactly two bits cleared.
 (define_predicate "const_nottwobits_operand"
@@ -685,3 +720,11 @@
   (and (match_operand 0 "register_operand")
        (match_test "REGNO (op) == RETURN_ADDR_REGNUM
 		    || REGNO (op) == T0_REGNUM")))
+
+(define_predicate "bitpos_mask_operand"
+  (and (match_code "const_int")
+       (match_test "TARGET_64BIT ? INTVAL (op) == 63 : INTVAL (op) == 31")))
+
+(define_predicate "reg_or_const_int_operand"
+  (ior (match_operand 0 "const_int_operand")
+       (match_operand 0 "register_operand")))
diff --git a/gcc/config/riscv/riscv-avlprop.cc b/gcc/config/riscv/riscv-avlprop.cc
index bb4aceb..e31fdeb 100644
--- a/gcc/config/riscv/riscv-avlprop.cc
+++ b/gcc/config/riscv/riscv-avlprop.cc
@@ -156,6 +156,7 @@ get_insn_vtype_mode (rtx_insn *rinsn)
   extract_insn_cached (rinsn);
   int mode_idx = get_attr_mode_idx (rinsn);
   gcc_assert (mode_idx != INVALID_ATTRIBUTE);
+  gcc_assert (mode_idx < recog_data.n_operands);
   return GET_MODE (recog_data.operand[mode_idx]);
 }
 
@@ -205,6 +206,7 @@ simplify_replace_vlmax_avl (rtx_insn *rinsn, rtx new_avl)
     {
       int index = get_attr_avl_type_idx (rinsn);
       gcc_assert (index != INVALID_ATTRIBUTE);
+      gcc_assert (index < recog_data.n_operands);
       validate_change_or_fail (rinsn, recog_data.operand_loc[index],
 			       get_avl_type_rtx (avl_type::NONVLMAX), false);
     }
@@ -361,6 +363,8 @@ pass_avlprop::get_vlmax_ta_preferred_avl (insn_info *insn) const
 	     is not depend on.  */
 	  extract_insn_cached (use_insn->rtl ());
 	  int merge_op_idx = get_attr_merge_op_idx (use_insn->rtl ());
+	  gcc_assert (merge_op_idx == INVALID_ATTRIBUTE
+		      || merge_op_idx < recog_data.n_operands);
 	  if (merge_op_idx != INVALID_ATTRIBUTE
 	      && !satisfies_constraint_vu (recog_data.operand[merge_op_idx])
 	      && refers_to_regno_p (set->regno (),
@@ -508,7 +512,7 @@ pass_avlprop::execute (function *fn)
       simplify_replace_vlmax_avl (rinsn, prop.second);
     }
 
-  if (rvv_vector_bits == RVV_VECTOR_BITS_ZVL)
+  if (rvv_vector_bits == RVV_VECTOR_BITS_ZVL && !TARGET_XTHEADVECTOR)
     {
       /* Simplify VLMAX AVL into immediate AVL.
 	 E.g. Simplify this following case:
diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc
index 7912b10..d2c0af3 100644
--- a/gcc/config/riscv/riscv-c.cc
+++ b/gcc/config/riscv/riscv-c.cc
@@ -36,10 +36,10 @@ along with GCC; see the file COPYING3.  If not see
 
 struct pragma_intrinsic_flags
 {
-  int intrinsic_target_flags;
+  int intrinsic_riscv_isa_flags;
 
   int intrinsic_riscv_vector_elen_flags;
-  int intrinsic_riscv_zvl_flags;
+  int intrinsic_riscv_zvl_subext;
   int intrinsic_riscv_zvb_subext;
   int intrinsic_riscv_zvk_subext;
 };
@@ -47,16 +47,16 @@ struct pragma_intrinsic_flags
 static void
 riscv_pragma_intrinsic_flags_pollute (struct pragma_intrinsic_flags *flags)
 {
-  flags->intrinsic_target_flags = target_flags;
+  flags->intrinsic_riscv_isa_flags = riscv_isa_flags;
   flags->intrinsic_riscv_vector_elen_flags = riscv_vector_elen_flags;
-  flags->intrinsic_riscv_zvl_flags = riscv_zvl_flags;
+  flags->intrinsic_riscv_zvl_subext = riscv_zvl_subext;
   flags->intrinsic_riscv_zvb_subext = riscv_zvb_subext;
   flags->intrinsic_riscv_zvk_subext = riscv_zvk_subext;
 
-  target_flags = target_flags
+  riscv_isa_flags = riscv_isa_flags
     | MASK_VECTOR;
 
-  riscv_zvl_flags = riscv_zvl_flags
+  riscv_zvl_subext = riscv_zvl_subext
     | MASK_ZVL32B
     | MASK_ZVL64B
     | MASK_ZVL128B
@@ -97,10 +97,10 @@ riscv_pragma_intrinsic_flags_pollute (struct pragma_intrinsic_flags *flags)
 static void
 riscv_pragma_intrinsic_flags_restore (struct pragma_intrinsic_flags *flags)
 {
-  target_flags = flags->intrinsic_target_flags;
+  riscv_isa_flags = flags->intrinsic_riscv_isa_flags;
 
   riscv_vector_elen_flags = flags->intrinsic_riscv_vector_elen_flags;
-  riscv_zvl_flags = flags->intrinsic_riscv_zvl_flags;
+  riscv_zvl_subext = flags->intrinsic_riscv_zvl_subext;
   riscv_zvb_subext = flags->intrinsic_riscv_zvb_subext;
   riscv_zvk_subext = flags->intrinsic_riscv_zvk_subext;
 }
@@ -239,26 +239,22 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile)
   size_t max_ext_len = 0;
 
   /* Figure out the max length of extension name for reserving buffer.   */
-  for (const riscv_subset_t *subset = subset_list->begin ();
-       subset != subset_list->end ();
-       subset = subset->next)
-    max_ext_len = MAX (max_ext_len, subset->name.length ());
+  for (auto &subset : *subset_list)
+    max_ext_len = MAX (max_ext_len, subset.name.length ());
 
   char *buf = (char *)alloca (max_ext_len + 10 /* For __riscv_ and '\0'.  */);
 
-  for (const riscv_subset_t *subset = subset_list->begin ();
-       subset != subset_list->end ();
-       subset = subset->next)
+  for (auto &subset : *subset_list)
     {
-      int version_value = riscv_ext_version_value (subset->major_version,
-						   subset->minor_version);
+      int version_value = riscv_ext_version_value (subset.major_version,
+						   subset.minor_version);
       /* Special rule for zicsr and zifencei, it's used for ISA spec 2.2 or
 	 earlier.  */
-      if ((subset->name == "zicsr" || subset->name == "zifencei")
+      if ((subset.name == "zicsr" || subset.name == "zifencei")
 	  && version_value == 0)
 	version_value = riscv_ext_version_value (2, 0);
 
-      sprintf (buf, "__riscv_%s", subset->name.c_str ());
+      sprintf (buf, "__riscv_%s", subset.name.c_str ());
       builtin_define_with_int_value (buf, version_value);
     }
 }
@@ -279,7 +275,8 @@ riscv_pragma_intrinsic (cpp_reader *)
   const char *name = TREE_STRING_POINTER (x);
 
   if (strcmp (name, "vector") == 0
-      || strcmp (name, "xtheadvector") == 0)
+      || strcmp (name, "xtheadvector") == 0
+      || strcmp (name, "xsfvcp") == 0)
     {
       struct pragma_intrinsic_flags backup_flags;
 
diff --git a/gcc/config/riscv/riscv-cores.def b/gcc/config/riscv/riscv-cores.def
index 2918496..8f0f630 100644
--- a/gcc/config/riscv/riscv-cores.def
+++ b/gcc/config/riscv/riscv-cores.def
@@ -33,6 +33,7 @@
 #define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO)
 #endif
 
+RISCV_TUNE("generic", generic, generic_tune_info)
 RISCV_TUNE("rocket", generic, rocket_tune_info)
 RISCV_TUNE("sifive-3-series", generic, rocket_tune_info)
 RISCV_TUNE("sifive-5-series", generic, rocket_tune_info)
@@ -41,9 +42,17 @@ RISCV_TUNE("sifive-p400-series", sifive_p400, sifive_p400_tune_info)
 RISCV_TUNE("sifive-p600-series", sifive_p600, sifive_p600_tune_info)
 RISCV_TUNE("tt-ascalon-d8", generic_ooo, tt_ascalon_d8_tune_info)
 RISCV_TUNE("thead-c906", generic, thead_c906_tune_info)
+RISCV_TUNE("xt-c908", generic, generic_ooo_tune_info)
+RISCV_TUNE("xt-c908v", generic, generic_ooo_tune_info)
+RISCV_TUNE("xt-c910", generic, generic_ooo_tune_info)
+RISCV_TUNE("xt-c910v2", generic, generic_ooo_tune_info)
+RISCV_TUNE("xt-c920", generic, generic_ooo_tune_info)
+RISCV_TUNE("xt-c920v2", generic, generic_ooo_tune_info)
 RISCV_TUNE("xiangshan-nanhu", xiangshan, xiangshan_nanhu_tune_info)
+RISCV_TUNE("xiangshan-kunminghu", xiangshan, generic_ooo_tune_info)
 RISCV_TUNE("generic-ooo", generic_ooo, generic_ooo_tune_info)
 RISCV_TUNE("size", generic, optimize_size_tune_info)
+RISCV_TUNE("mips-p8700", mips_p8700, mips_p8700_tune_info)
 
 #undef RISCV_TUNE
 
@@ -93,6 +102,48 @@ RISCV_CORE("thead-c906",      "rv64imafdc_xtheadba_xtheadbb_xtheadbs_xtheadcmo_"
 			      "xtheadmemidx_xtheadmempair_xtheadsync",
 			      "thead-c906")
 
+RISCV_CORE("xt-c908",         "rv64imafdc_zicbom_zicbop_zicboz_zicntr_zicsr_"
+			      "zifencei_zihintpause_zihpm_zfh_zba_zbb_zbc_zbs_"
+			      "sstc_svinval_svnapot_svpbmt_xtheadba_xtheadbb_"
+			      "xtheadbs_xtheadcmo_xtheadcondmov_xtheadfmemidx_"
+			      "xtheadmac_xtheadmemidx_xtheadmempair_xtheadsync",
+			      "xt-c908")
+RISCV_CORE("xt-c908v",        "rv64imafdcv_zicbom_zicbop_zicboz_zicntr_zicsr_"
+			      "zifencei_zihintpause_zihpm_zfh_zba_zbb_zbc_zbs_"
+			      "zvfh_sstc_svinval_svnapot_svpbmt__xtheadba_"
+			      "xtheadbb_xtheadbs_xtheadcmo_xtheadcondmov_"
+			      "xtheadfmemidx_xtheadmac_xtheadmemidx_"
+			      "xtheadmempair_xtheadsync",
+			      "xt-c908")
+RISCV_CORE("xt-c910",         "rv64imafdc_zicntr_zicsr_zifencei_zihpm_zfh_"
+			      "xtheadba_xtheadbb_xtheadbs_xtheadcmo_"
+			      "xtheadcondmov_xtheadfmemidx_xtheadmac_"
+			      "xtheadmemidx_xtheadmempair_xtheadsync",
+			      "xt-c910")
+RISCV_CORE("xt-c910v2",       "rv64imafdc_zicbom_zicbop_zicboz_zicntr_zicond_"
+			      "zicsr_zifencei_zihintntl_zihintpause_zihpm_"
+			      "zawrs_zfa_zfbfmin_zfh_zca_zcb_zcd_zba_zbb_zbc_"
+			      "zbs_sscofpmf_sstc_svinval_svnapot_svpbmt_"
+			      "xtheadba_xtheadbb_xtheadbs_xtheadcmo_"
+			      "xtheadcondmov_xtheadfmemidx_xtheadmac_"
+			      "xtheadmemidx_xtheadmempair_xtheadsync",
+			      "xt-c910v2")
+RISCV_CORE("xt-c920",         "rv64imafdc_zicntr_zicsr_zifencei_zihpm_zfh_"
+			      "xtheadba_xtheadbb_xtheadbs_xtheadcmo_"
+			      "xtheadcondmov_xtheadfmemidx_xtheadmac_"
+			      "xtheadmemidx_xtheadmempair_xtheadsync_"
+			      "xtheadvector",
+			      "xt-c910")
+RISCV_CORE("xt-c920v2",       "rv64imafdcv_zicbom_zicbop_zicboz_zicntr_zicond_"
+			      "zicsr_zifencei_zihintntl_zihintpause_zihpm_"
+			      "zawrs_zfa_zfbfmin_zfh_zca_zcb_zcd_zba_zbb_zbc_"
+			      "zbs_zvfbfmin_zvfbfwma_zvfh_sscofpmf_sstc_"
+			      "svinval_svnapot_svpbmt_xtheadba_xtheadbb_"
+			      "xtheadbs_xtheadcmo_xtheadcondmov_xtheadfmemidx_"
+			      "xtheadmac_xtheadmemidx_xtheadmempair_"
+			      "xtheadsync",
+			       "xt-c920v2")
+
 RISCV_CORE("tt-ascalon-d8",   "rv64imafdcv_zic64b_zicbom_zicbop_zicboz_"
 			      "ziccamoa_ziccif_zicclsm_ziccrse_zicond_zicsr_"
 			      "zifencei_zihintntl_zihintpause_zimop_za64rs_"
@@ -104,4 +155,20 @@ RISCV_CORE("xiangshan-nanhu",      "rv64imafdc_zba_zbb_zbc_zbs_"
 			      "zbkb_zbkc_zbkx_zknd_zkne_zknh_zksed_zksh_"
 			      "svinval_zicbom_zicboz",
 			      "xiangshan-nanhu")
+
+RISCV_CORE("xiangshan-kunminghu",   "rv64imafdcbvh_sdtrig_sha_shcounterenw_"
+			      "shgatpa_shlcofideleg_shtvala_shvsatpa_shvstvala_shvstvecd_"
+			      "smaia_smcsrind_smdbltrp_smmpm_smnpm_smrnmi_smstateen_"
+			      "ssaia_ssccptr_sscofpmf_sscounterenw_sscsrind_ssdbltrp_"
+			      "ssnpm_sspm_ssstateen_ssstrict_sstc_sstvala_sstvecd_"
+			      "ssu64xl_supm_svade_svbare_svinval_svnapot_svpbmt_za64rs_"
+			      "zacas_zawrs_zba_zbb_zbc_zbkb_zbkc_zbkx_zbs_zcb_zcmop_"
+			      "zfa_zfh_zfhmin_zic64b_zicbom_zicbop_zicboz_ziccif_"
+			      "zicclsm_ziccrse_zicntr_zicond_zicsr_zifencei_zihintpause_"
+			      "zihpm_zimop_zkn_zknd_zkne_zknh_zksed_zksh_zkt_zvbb_zvfh_"
+			      "zvfhmin_zvkt_zvl128b_zvl32b_zvl64b",
+			      "xiangshan-kunminghu")
+
+RISCV_CORE("mips-p8700",      "rv64imfd_zicsr_zifencei_zalrsc_zba_zbb",
+			      "mips-p8700")
 #undef RISCV_CORE
diff --git a/gcc/config/riscv/riscv-ext-corev.def b/gcc/config/riscv/riscv-ext-corev.def
new file mode 100644
index 0000000..eb97399
--- /dev/null
+++ b/gcc/config/riscv/riscv-ext-corev.def
@@ -0,0 +1,87 @@
+/* CORE-V extension definition file for RISC-V.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.
+
+Please run `make riscv-regen` in build folder to make sure updated anything.
+
+Format of DEFINE_RISCV_EXT, please refer to riscv-ext.def.  */
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xcvalu,
+  /* UPPERCAE_NAME */ XCVALU,
+  /* FULL_NAME */ "Core-V miscellaneous ALU extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xcv,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xcvbi,
+  /* UPPERCAE_NAME */ XCVBI,
+  /* FULL_NAME */ "xcvbi extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xcv,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xcvelw,
+  /* UPPERCAE_NAME */ XCVELW,
+  /* FULL_NAME */ "Core-V event load word extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xcv,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xcvmac,
+  /* UPPERCAE_NAME */ XCVMAC,
+  /* FULL_NAME */ "Core-V multiply-accumulate extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xcv,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xcvsimd,
+  /* UPPERCAE_NAME */ XCVSIMD,
+  /* FULL_NAME */ "xcvsimd extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xcv,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
diff --git a/gcc/config/riscv/riscv-ext-mips.def b/gcc/config/riscv/riscv-ext-mips.def
new file mode 100644
index 0000000..132f6c1
--- /dev/null
+++ b/gcc/config/riscv/riscv-ext-mips.def
@@ -0,0 +1,48 @@
+/* MIPS extension definition file for RISC-V.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.
+
+Please run `make riscv-regen` in build folder to make sure updated anything.
+
+Format of DEFINE_RISCV_EXT, please refer to riscv-ext.def.  */
+
+DEFINE_RISCV_EXT (
+  /* NAME.  */ xmipscmov,
+  /* UPPERCASE_NAME.  */ XMIPSCMOV,
+  /* FULL_NAME.  */ "Mips conditional move extension",
+  /* DESC.  */ "",
+  /* URL.  */ ,
+  /* DEP_EXTS.  */ ({}),
+  /* SUPPORTED_VERSIONS.  */ ({{1, 0}}),
+  /* FLAG_GROUP.  */ xmips,
+  /* BITMASK_GROUP_ID.  */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION.  */ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS.  */ 0)
+
+DEFINE_RISCV_EXT (
+  /* NAME.  */ xmipscbop,
+  /* UPPERCASE_NAME.  */ XMIPSCBOP,
+  /* FULL_NAME.  */ "Mips Prefetch extension",
+  /* DESC.  */ "",
+  /* URL.  */ ,
+  /* DEP_EXTS.  */ ({}),
+  /* SUPPORTED_VERSIONS.  */ ({{1, 0}}),
+  /* FLAG_GROUP.  */ xmips,
+  /* BITMASK_GROUP_ID.  */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION.  */ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS.  */ 0)
diff --git a/gcc/config/riscv/riscv-ext-sifive.def b/gcc/config/riscv/riscv-ext-sifive.def
new file mode 100644
index 0000000..c8d79da
--- /dev/null
+++ b/gcc/config/riscv/riscv-ext-sifive.def
@@ -0,0 +1,87 @@
+/* SiFive extension definition file for RISC-V.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.
+
+Please run `make riscv-regen` in build folder to make sure updated anything.
+
+Format of DEFINE_RISCV_EXT, please refer to riscv-ext.def.  */
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xsfcease,
+  /* UPPERCAE_NAME */ XSFCEASE,
+  /* FULL_NAME */ "xsfcease extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xsf,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xsfvcp,
+  /* UPPERCAE_NAME */ XSFVCP,
+  /* FULL_NAME */ "xsfvcp extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zve32x"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xsf,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xsfvfnrclipxfqf,
+  /* UPPERCAE_NAME */ XSFVFNRCLIPXFQF,
+  /* FULL_NAME */ "xsfvfnrclipxfqf extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xsf,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xsfvqmaccdod,
+  /* UPPERCAE_NAME */ XSFVQMACCDOD,
+  /* FULL_NAME */ "xsfvqmaccdod extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xsf,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xsfvqmaccqoq,
+  /* UPPERCAE_NAME */ XSFVQMACCQOQ,
+  /* FULL_NAME */ "xsfvqmaccqoq extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xsf,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
diff --git a/gcc/config/riscv/riscv-ext-thead.def b/gcc/config/riscv/riscv-ext-thead.def
new file mode 100644
index 0000000..327d2ae
--- /dev/null
+++ b/gcc/config/riscv/riscv-ext-thead.def
@@ -0,0 +1,191 @@
+/* T-head extension definition file for RISC-V.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.
+
+Please run `make riscv-regen` in build folder to make sure updated anything.
+
+Format of DEFINE_RISCV_EXT, please refer to riscv-ext.def.  */
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xtheadba,
+  /* UPPERCAE_NAME */ XTHEADBA,
+  /* FULL_NAME */ "T-head address calculation extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xthead,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xtheadbb,
+  /* UPPERCAE_NAME */ XTHEADBB,
+  /* FULL_NAME */ "T-head basic bit-manipulation extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xthead,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xtheadbs,
+  /* UPPERCAE_NAME */ XTHEADBS,
+  /* FULL_NAME */ "T-head single-bit instructions extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xthead,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xtheadcmo,
+  /* UPPERCAE_NAME */ XTHEADCMO,
+  /* FULL_NAME */ "T-head cache management operations extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xthead,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xtheadcondmov,
+  /* UPPERCAE_NAME */ XTHEADCONDMOV,
+  /* FULL_NAME */ "T-head conditional move extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xthead,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xtheadfmemidx,
+  /* UPPERCAE_NAME */ XTHEADFMEMIDX,
+  /* FULL_NAME */ "T-head indexed memory operations for floating-point registers extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xthead,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xtheadfmv,
+  /* UPPERCAE_NAME */ XTHEADFMV,
+  /* FULL_NAME */ "T-head double floating-point high-bit data transmission extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xthead,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xtheadint,
+  /* UPPERCAE_NAME */ XTHEADINT,
+  /* FULL_NAME */ "T-head acceleration interruption extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xthead,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xtheadmac,
+  /* UPPERCAE_NAME */ XTHEADMAC,
+  /* FULL_NAME */ "T-head multiply-accumulate extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xthead,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xtheadmemidx,
+  /* UPPERCAE_NAME */ XTHEADMEMIDX,
+  /* FULL_NAME */ "T-head indexed memory operation extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xthead,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xtheadmempair,
+  /* UPPERCAE_NAME */ XTHEADMEMPAIR,
+  /* FULL_NAME */ "T-head two-GPR memory operation extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xthead,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xtheadsync,
+  /* UPPERCAE_NAME */ XTHEADSYNC,
+  /* FULL_NAME */ "T-head multi-core synchronization extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xthead,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xtheadvector,
+  /* UPPERCAE_NAME */ XTHEADVECTOR,
+  /* FULL_NAME */ "xtheadvector extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xthead,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
diff --git a/gcc/config/riscv/riscv-ext-ventana.def b/gcc/config/riscv/riscv-ext-ventana.def
new file mode 100644
index 0000000..deed47f
--- /dev/null
+++ b/gcc/config/riscv/riscv-ext-ventana.def
@@ -0,0 +1,35 @@
+/* Ventana extension definition file for RISC-V.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.
+
+Please run `make riscv-regen` in build folder to make sure updated anything.
+
+Format of DEFINE_RISCV_EXT, please refer to riscv-ext.def.  */
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xventanacondops,
+  /* UPPERCAE_NAME */ XVENTANACONDOPS,
+  /* FULL_NAME */ "Ventana integer conditional operations extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xventana,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
diff --git a/gcc/config/riscv/riscv-ext.def b/gcc/config/riscv/riscv-ext.def
new file mode 100644
index 0000000..09f18ad
--- /dev/null
+++ b/gcc/config/riscv/riscv-ext.def
@@ -0,0 +1,2085 @@
+/* RISC-V extension definition file for RISC-V.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.
+
+Please run `make riscv-regen` in build folder to make sure updated anything.
+
+Format of DEFINE_RISCV_EXT:
+  NAME:
+    The name of the extension, e.g. "i".
+  UPPERCASE_NAME:
+    The name of the extension in uppercase, e.g. "ZBA", this used
+    for generate TARGET_<ext-name> marco and MASK_<ext-name> macro.
+    For those extension only named with single letter, it should also come with
+    'RV', e.g. 'v' should use 'RVV' here.
+    Some of old extension like 'i' and 'm' are not follow the rule.
+  FULL_NAME:
+    The full name of the extension, e.g. "Base integer extension".
+  DESC:
+    A short description of the extension, this will used during generating
+    documentation, GNU Texinfo format can be used this field.
+  URL:
+    A URL for the extension.
+  DEP_EXTS:
+    A list of dependent extensions, this is a list of strings or
+    a list of tuples.  The first element of the tuple is the name
+    of the extension and the second element is a function that
+    takes a subset_list and returns true if the extension should be added as
+    a dependent extension, `c` and `zca` are examples of this.
+  SUPPORTED_VERSIONS:
+    A list of tuples, each tuple contains the major version number, minor
+    version number and the class of the specification.  The version number is a
+    list of integers, e.g. {2, 0} for version 2.0.  The class is
+    a string, e.g. "ISA_SPEC_CLASS_20191213", the class of the
+    specification is not required for any new extension.
+  FLAG_GROUP:
+    The group of the extension, this is used to group extensions
+    together.  The group is a string, e.g. "base", "zi", "zm", "za", "zf",
+    "zc", "zb", "zk" and "zi".
+    This should be auto generated in theory in some day...
+  BITMASK_GROUP_ID:
+    The group id of the extension for the __riscv_feature_bits.
+    this field should sync with riscv-c-api-doc, and keep BITMASK_NOT_YET_ALLOCATED
+    if not got allocated.
+    https://github.com/riscv-non-isa/riscv-c-api-doc/blob/main/src/c-api.adoc#extension-bitmask-definitions
+  BITMASK_BIT_POSITION:
+    The bit position of the extension for the __riscv_feature_bits.
+    this field should sync with riscv-c-api-doc, and keep BITMASK_NOT_YET_ALLOCATED
+    if not got allocated.
+    https://github.com/riscv-non-isa/riscv-c-api-doc/blob/main/src/c-api.adoc#extension-bitmask-definitions
+  EXTRA_EXTENSION_FLAGS:
+    Extra flags for the extension, this is a bitmask of the
+    extra flags.  The extra flags are:
+    - EXT_FLAG_MACRO: Set this flag if this extension is just a macro of set of
+       extensions, and not define any new instrcutions, new CSRs or new
+       behaviors, the example is `b` extension is just a macro of `zba`, `zbb`
+       and `zbs`.
+*/
+
+DEFINE_RISCV_EXT(
+  /* NAME */ e,
+  /* UPPERCASE_NAME */ RVE,
+  /* FULL_NAME */ "Reduced base integer extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{2, 0}}),
+  /* FLAG_GROUP */ base,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 4,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ i,
+  /* UPPERCASE_NAME */ RVI,
+  /* FULL_NAME */ "Base integer extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{2, 1, ISA_SPEC_CLASS_20191213},
+			     {2, 1, ISA_SPEC_CLASS_20190608},
+			     {2, 0, ISA_SPEC_CLASS_2P2}}),
+  /* FLAG_GROUP */ base,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 8,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ m,
+  /* UPPERCASE_NAME */ MUL,
+  /* FULL_NAME */ "Integer multiplication and division extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zmmul"}),
+  /* SUPPORTED_VERSIONS */ ({{2, 0}}),
+  /* FLAG_GROUP */ base,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 12,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ a,
+  /* UPPERCASE_NAME */ ATOMIC,
+  /* FULL_NAME */ "Atomic extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zaamo", "zalrsc"}),
+  /* SUPPORTED_VERSIONS */ ({{2, 1, ISA_SPEC_CLASS_20191213},
+			     {2, 0, ISA_SPEC_CLASS_20190608},
+			     {2, 0, ISA_SPEC_CLASS_2P2}}),
+  /* FLAG_GROUP */ base,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 0,
+  /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ f,
+  /* UPPERCASE_NAME */ HARD_FLOAT,
+  /* FULL_NAME */ "Single-precision floating-point extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{2, 2, ISA_SPEC_CLASS_20191213},
+			     {2, 2, ISA_SPEC_CLASS_20190608},
+			     {2, 0, ISA_SPEC_CLASS_2P2}}),
+  /* FLAG_GROUP */ base,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 5,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ d,
+  /* UPPERCASE_NAME */ DOUBLE_FLOAT,
+  /* FULL_NAME */ "Double-precision floating-point extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"f", "zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{2, 2, ISA_SPEC_CLASS_20191213},
+			     {2, 2, ISA_SPEC_CLASS_20190608},
+			     {2, 0, ISA_SPEC_CLASS_2P2}}),
+  /* FLAG_GROUP */ base,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 3,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ c,
+  /* UPPERCASE_NAME */ RVC,
+  /* FULL_NAME */ "Compressed extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zca",
+		   {"zcf",
+		    [] (const riscv_subset_list *subset_list) -> bool
+		      {
+			return subset_list->xlen () == 32
+			       && subset_list->lookup ("f");
+		      }},
+		   {"zcd",
+		    [] (const riscv_subset_list *subset_list) -> bool
+		      {
+			return subset_list->lookup ("d");
+		      }}}),
+  /* SUPPORTED_VERSIONS */ ({{2, 0}}),
+  /* FLAG_GROUP */ base,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 2,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ b,
+  /* UPPERCASE_NAME */ RVB,
+  /* FULL_NAME */ "b extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zba", "zbb", "zbs"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ base,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 1,
+  /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ v,
+  /* UPPERCASE_NAME */ RVV,
+  /* FULL_NAME */ "Vector extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvl128b", "zve64d"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ base,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 21,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ h,
+  /* UPPERCASE_NAME */ RVH,
+  /* FULL_NAME */ "Hypervisor extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ base,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 7,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zic64b,
+  /* UPPERCASE_NAME */ ZIC64B,
+  /* FULL_NAME */ "Cache block size isf 64 bytes",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zicbom,
+  /* UPPERCASE_NAME */ ZICBOM,
+  /* FULL_NAME */ "Cache-block management extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zicbop,
+  /* UPPERCASE_NAME */ ZICBOP,
+  /* FULL_NAME */ "Cache-block prefetch extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zicboz,
+  /* UPPERCASE_NAME */ ZICBOZ,
+  /* FULL_NAME */ "Cache-block zero extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 37,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ ziccamoa,
+  /* UPPERCASE_NAME */ ZICCAMOA,
+  /* FULL_NAME */ "Main memory supports all atomics in A",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ ziccif,
+  /* UPPERCASE_NAME */ ZICCIF,
+  /* FULL_NAME */ "Main memory supports instruction fetch with atomicity requirement",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zicclsm,
+  /* UPPERCASE_NAME */ ZICCLSM,
+  /* FULL_NAME */ "Main memory supports misaligned loads/stores",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ ziccrse,
+  /* UPPERCASE_NAME */ ZICCRSE,
+  /* FULL_NAME */ "Main memory supports forward progress on LR/SC sequences",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zicfilp,
+  /* UPPERCASE_NAME */ ZICFILP,
+  /* FULL_NAME */ "zicfilp extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zicfiss,
+  /* UPPERCASE_NAME */ ZICFISS,
+  /* FULL_NAME */ "zicfiss extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr", "zimop"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zicntr,
+  /* UPPERCASE_NAME */ ZICNTR,
+  /* FULL_NAME */ "Standard extension for base counters and timers",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{2, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zicond,
+  /* UPPERCASE_NAME */ ZICOND,
+  /* FULL_NAME */ "Integer conditional operations extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 38,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zicsr,
+  /* UPPERCASE_NAME */ ZICSR,
+  /* FULL_NAME */ "Control and status register access extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{2, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zifencei,
+  /* UPPERCASE_NAME */ ZIFENCEI,
+  /* FULL_NAME */ "Instruction-fetch fence extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{2, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ 1,
+  /* BITMASK_BIT_POSITION*/ 11,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zihintntl,
+  /* UPPERCASE_NAME */ ZIHINTNTL,
+  /* FULL_NAME */ "Non-temporal locality hints extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 39,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zihintpause,
+  /* UPPERCASE_NAME */ ZIHINTPAUSE,
+  /* FULL_NAME */ "Pause hint extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{2, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 40,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zihpm,
+  /* UPPERCASE_NAME */ ZIHPM,
+  /* FULL_NAME */ "Standard extension for hardware performance counters",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{2, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zimop,
+  /* UPPERCASE_NAME */ ZIMOP,
+  /* FULL_NAME */ "zimop extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ 1,
+  /* BITMASK_BIT_POSITION*/ 1,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zilsd,
+  /* UPPERCASE_NAME */ ZILSD,
+  /* FULL_NAME */ "Load/Store pair instructions extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ 1,
+  /* BITMASK_BIT_POSITION*/ 8,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zmmul,
+  /* UPPERCASE_NAME */ ZMMUL,
+  /* FULL_NAME */ "Integer multiplication extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zm,
+  /* BITMASK_GROUP_ID */ 1,
+  /* BITMASK_BIT_POSITION*/ 12,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ za128rs,
+  /* UPPERCASE_NAME */ ZA128RS,
+  /* FULL_NAME */ "Reservation set size of 128 bytes",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ za,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ za64rs,
+  /* UPPERCASE_NAME */ ZA64RS,
+  /* FULL_NAME */ "Reservation set size of 64 bytes",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ za,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zaamo,
+  /* UPPERCASE_NAME */ ZAAMO,
+  /* FULL_NAME */ "zaamo extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ za,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zabha,
+  /* UPPERCASE_NAME */ ZABHA,
+  /* FULL_NAME */ "zabha extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zaamo"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ za,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zacas,
+  /* UPPERCASE_NAME */ ZACAS,
+  /* FULL_NAME */ "zacas extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zaamo"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ za,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 26,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zalrsc,
+  /* UPPERCASE_NAME */ ZALRSC,
+  /* FULL_NAME */ "zalrsc extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ za,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zawrs,
+  /* UPPERCASE_NAME */ ZAWRS,
+  /* FULL_NAME */ "Wait-on-reservation-set extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zalrsc"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ za,
+  /* BITMASK_GROUP_ID */ 1,
+  /* BITMASK_BIT_POSITION*/ 7,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zama16b,
+  /* UPPERCASE_NAME */ ZAMA16B,
+  /* FULL_NAME */ "Zama16b extension",
+  /* DESC */ "Misaligned loads, stores, and AMOs to main memory regions that do"
+	     " not cross a naturally aligned 16-byte boundary are atomic.",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ za,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zfa,
+  /* UPPERCASE_NAME */ ZFA,
+  /* FULL_NAME */ "Additional floating-point extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"f"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zf,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 34,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zfbfmin,
+  /* UPPERCASE_NAME */ ZFBFMIN,
+  /* FULL_NAME */ "zfbfmin extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zfhmin"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zf,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zfh,
+  /* UPPERCASE_NAME */ ZFH,
+  /* FULL_NAME */ "Half-precision floating-point extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zfhmin"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zf,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 35,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zfhmin,
+  /* UPPERCASE_NAME */ ZFHMIN,
+  /* FULL_NAME */ "Minimal half-precision floating-point extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"f"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zf,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 36,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zfinx,
+  /* UPPERCASE_NAME */ ZFINX,
+  /* FULL_NAME */ "Single-precision floating-point in integer registers extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zinx,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zdinx,
+  /* UPPERCASE_NAME */ ZDINX,
+  /* FULL_NAME */ "Double-precision floating-point in integer registers extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zfinx", "zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zinx,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zca,
+  /* UPPERCASE_NAME */ ZCA,
+  /* FULL_NAME */ "Integer compressed instruction extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({{"c",
+[] (const riscv_subset_list *subset_list) -> bool
+{
+  /* For RV32 Zca implies C for one of these combinations of
+     extensions: Zca, F_Zca_Zcf and FD_Zca_Zcf_Zcd.  */
+  if (subset_list->xlen () == 32)
+    {
+      if (subset_list->lookup ("d"))
+	return subset_list->lookup ("zcf") && subset_list->lookup ("zcd");
+
+      if (subset_list->lookup ("f"))
+	return subset_list->lookup ("zcf");
+
+      return true;
+    }
+
+  /* For RV64 Zca implies C for one of these combinations of
+     extensions: Zca and FD_Zca_Zcd (Zcf is not available
+     for RV64).  */
+  if (subset_list->xlen () == 64)
+    {
+      if (subset_list->lookup ("d"))
+	return subset_list->lookup ("zcd");
+
+      return true;
+    }
+
+  /* Do nothing for future RV128 specification. Behaviour
+     for this case is not yet well defined.  */
+  return false;
+
+}}}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zc,
+  /* BITMASK_GROUP_ID */ 1,
+  /* BITMASK_BIT_POSITION*/ 2,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zcb,
+  /* UPPERCASE_NAME */ ZCB,
+  /* FULL_NAME */ "Simple compressed instruction extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zca"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zc,
+  /* BITMASK_GROUP_ID */ 1,
+  /* BITMASK_BIT_POSITION*/ 3,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zcd,
+  /* UPPERCASE_NAME */ ZCD,
+  /* FULL_NAME */ "Compressed double-precision floating point loads and stores extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zca"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zc,
+  /* BITMASK_GROUP_ID */ 1,
+  /* BITMASK_BIT_POSITION*/ 4,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zce,
+  /* UPPERCASE_NAME */ ZCE,
+  /* FULL_NAME */ "Compressed instruction extensions for embedded processors",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zca", "zcb", "zcmp", "zcmt",
+		  {"zcf",
+		   [] (const riscv_subset_list *subset_list) -> bool
+		     {
+		       return subset_list->xlen () == 32
+			      && subset_list->lookup ("f");
+		     }}}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zc,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zcf,
+  /* UPPERCASE_NAME */ ZCF,
+  /* FULL_NAME */ "Compressed single-precision floating point loads and stores extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zca"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zc,
+  /* BITMASK_GROUP_ID */ 1,
+  /* BITMASK_BIT_POSITION*/ 5,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zcmop,
+  /* UPPERCASE_NAME */ ZCMOP,
+  /* FULL_NAME */ "zcmop extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zca"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zc,
+  /* BITMASK_GROUP_ID */ 1,
+  /* BITMASK_BIT_POSITION*/ 6,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zcmp,
+  /* UPPERCASE_NAME */ ZCMP,
+  /* FULL_NAME */ "Compressed push pop extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zca"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zc,
+  /* BITMASK_GROUP_ID */ 1,
+  /* BITMASK_BIT_POSITION*/ 10,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zcmt,
+  /* UPPERCASE_NAME */ ZCMT,
+  /* FULL_NAME */ "Table jump instruction extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zca", "zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zc,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zclsd,
+  /* UPPERCASE_NAME */ ZCLSD,
+  /* FULL_NAME */ "Compressed load/store pair instructions extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zca", "zilsd"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zc,
+  /* BITMASK_GROUP_ID */ 1,
+  /* BITMASK_BIT_POSITION*/ 9,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zba,
+  /* UPPERCASE_NAME */ ZBA,
+  /* FULL_NAME */ "Address calculation extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zb,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 27,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zbb,
+  /* UPPERCASE_NAME */ ZBB,
+  /* FULL_NAME */ "Basic bit manipulation extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zb,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 28,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zbc,
+  /* UPPERCASE_NAME */ ZBC,
+  /* FULL_NAME */ "Carry-less multiplication extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zb,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 29,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zbkb,
+  /* UPPERCASE_NAME */ ZBKB,
+  /* FULL_NAME */ "Cryptography bit-manipulation extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zb,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 30,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zbkc,
+  /* UPPERCASE_NAME */ ZBKC,
+  /* FULL_NAME */ "Cryptography carry-less multiply extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zb,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 31,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zbkx,
+  /* UPPERCASE_NAME */ ZBKX,
+  /* FULL_NAME */ "Cryptography crossbar permutation extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zb,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 32,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zbs,
+  /* UPPERCASE_NAME */ ZBS,
+  /* FULL_NAME */ "Single-bit operation extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zb,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 33,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zk,
+  /* UPPERCASE_NAME */ ZK,
+  /* FULL_NAME */ "Standard scalar cryptography extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zkn", "zkr", "zkt"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zk,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zkn,
+  /* UPPERCASE_NAME */ ZKN,
+  /* FULL_NAME */ "NIST algorithm suite extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zbkb", "zbkc", "zbkx", "zkne", "zknd", "zknh"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zk,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zknd,
+  /* UPPERCASE_NAME */ ZKND,
+  /* FULL_NAME */ "AES Decryption extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zk,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 41,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zkne,
+  /* UPPERCASE_NAME */ ZKNE,
+  /* FULL_NAME */ "AES Encryption extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zk,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 42,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zknh,
+  /* UPPERCASE_NAME */ ZKNH,
+  /* FULL_NAME */ "Hash function extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zk,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 43,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zkr,
+  /* UPPERCASE_NAME */ ZKR,
+  /* FULL_NAME */ "Entropy source extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zk,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zks,
+  /* UPPERCASE_NAME */ ZKS,
+  /* FULL_NAME */ "ShangMi algorithm suite extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zbkb", "zbkc", "zbkx", "zksed", "zksh"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zk,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zksed,
+  /* UPPERCASE_NAME */ ZKSED,
+  /* FULL_NAME */ "SM4 block cipher extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zk,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 44,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zksh,
+  /* UPPERCASE_NAME */ ZKSH,
+  /* FULL_NAME */ "SM3 hash function extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zk,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 45,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zkt,
+  /* UPPERCASE_NAME */ ZKT,
+  /* FULL_NAME */ "Data independent execution latency extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zk,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 46,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ ztso,
+  /* UPPERCASE_NAME */ ZTSO,
+  /* FULL_NAME */ "Total store ordering extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zt,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 47,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvbb,
+  /* UPPERCASE_NAME */ ZVBB,
+  /* FULL_NAME */ "Vector basic bit-manipulation extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvkb"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvb,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 48,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvbc,
+  /* UPPERCASE_NAME */ ZVBC,
+  /* FULL_NAME */ "Vector carryless multiplication extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zve64x"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvb,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 49,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zve32f,
+  /* UPPERCASE_NAME */ ZVE32F,
+  /* FULL_NAME */ "Vector extensions for embedded processors",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"f", "zve32x", "zvl32b"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zve,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 61,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zve32x,
+  /* UPPERCASE_NAME */ ZVE32X,
+  /* FULL_NAME */ "Vector extensions for embedded processors",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr", "zvl32b"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zve,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 60,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zve64d,
+  /* UPPERCASE_NAME */ ZVE64D,
+  /* FULL_NAME */ "Vector extensions for embedded processors",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"d", "zve64f", "zvl64b"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zve,
+  /* BITMASK_GROUP_ID */ 1,
+  /* BITMASK_BIT_POSITION*/ 0,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zve64f,
+  /* UPPERCASE_NAME */ ZVE64F,
+  /* FULL_NAME */ "Vector extensions for embedded processors",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"f", "zve32f", "zve64x", "zvl64b"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zve,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 63,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zve64x,
+  /* UPPERCASE_NAME */ ZVE64X,
+  /* FULL_NAME */ "Vector extensions for embedded processors",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zve32x", "zvl64b"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zve,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 62,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvfbfmin,
+  /* UPPERCASE_NAME */ ZVFBFMIN,
+  /* FULL_NAME */ "Vector BF16 converts extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zve32f"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvf,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvfbfwma,
+  /* UPPERCASE_NAME */ ZVFBFWMA,
+  /* FULL_NAME */ "zvfbfwma extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvfbfmin", "zfbfmin"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvf,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvfh,
+  /* UPPERCASE_NAME */ ZVFH,
+  /* FULL_NAME */ "Vector half-precision floating-point extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zve32f", "zfhmin"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvf,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 50,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvfhmin,
+  /* UPPERCASE_NAME */ ZVFHMIN,
+  /* FULL_NAME */ "Vector minimal half-precision floating-point extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zve32f"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvf,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 51,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvkb,
+  /* UPPERCASE_NAME */ ZVKB,
+  /* FULL_NAME */ "Vector cryptography bit-manipulation extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zve32x"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvk,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 52,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvkg,
+  /* UPPERCASE_NAME */ ZVKG,
+  /* FULL_NAME */ "Vector GCM/GMAC extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zve32x"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvk,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 53,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvkn,
+  /* UPPERCASE_NAME */ ZVKN,
+  /* FULL_NAME */ "Vector NIST Algorithm Suite extension",
+  /* DESC */ "@samp{zvkn} will expand to",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvkned", "zvknhb", "zvkb", "zvkt"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvk,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvknc,
+  /* UPPERCASE_NAME */ ZVKNC,
+  /* FULL_NAME */ "Vector NIST Algorithm Suite with carryless multiply extension, @samp{zvknc}",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvkn", "zvbc"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvk,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvkned,
+  /* UPPERCASE_NAME */ ZVKNED,
+  /* FULL_NAME */ "Vector AES block cipher extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zve32x"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvk,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 54,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvkng,
+  /* UPPERCASE_NAME */ ZVKNG,
+  /* FULL_NAME */ "Vector NIST Algorithm Suite with GCM extension, @samp{zvkng} will expand",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvkn", "zvkg"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvk,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvknha,
+  /* UPPERCASE_NAME */ ZVKNHA,
+  /* FULL_NAME */ "Vector SHA-2 secure hash extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zve32x"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvk,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 55,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvknhb,
+  /* UPPERCASE_NAME */ ZVKNHB,
+  /* FULL_NAME */ "Vector SHA-2 secure hash extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zve64x"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvk,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 56,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvks,
+  /* UPPERCASE_NAME */ ZVKS,
+  /* FULL_NAME */ "Vector ShangMi algorithm suite extension, @samp{zvks} will expand",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvksed", "zvksh", "zvkb", "zvkt"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvk,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvksc,
+  /* UPPERCASE_NAME */ ZVKSC,
+  /* FULL_NAME */ "Vector ShangMi algorithm suite with carryless multiplication extension,",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvks", "zvbc"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvk,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvksed,
+  /* UPPERCASE_NAME */ ZVKSED,
+  /* FULL_NAME */ "Vector SM4 Block Cipher extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zve32x"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvk,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 57,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvksg,
+  /* UPPERCASE_NAME */ ZVKSG,
+  /* FULL_NAME */ "Vector ShangMi algorithm suite with GCM extension, @samp{zvksg} will expand",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvks", "zvkg"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvk,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvksh,
+  /* UPPERCASE_NAME */ ZVKSH,
+  /* FULL_NAME */ "Vector SM3 Secure Hash extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zve32x"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvk,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 58,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvkt,
+  /* UPPERCASE_NAME */ ZVKT,
+  /* FULL_NAME */ "Vector data independent execution latency extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvk,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 59,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvl1024b,
+  /* UPPERCASE_NAME */ ZVL1024B,
+  /* FULL_NAME */ "Minimum vector length standard extensions",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvl512b"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvl,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvl128b,
+  /* UPPERCASE_NAME */ ZVL128B,
+  /* FULL_NAME */ "Minimum vector length standard extensions",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvl64b"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvl,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvl16384b,
+  /* UPPERCASE_NAME */ ZVL16384B,
+  /* FULL_NAME */ "zvl16384b extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvl8192b"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvl,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvl2048b,
+  /* UPPERCASE_NAME */ ZVL2048B,
+  /* FULL_NAME */ "Minimum vector length standard extensions",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvl1024b"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvl,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvl256b,
+  /* UPPERCASE_NAME */ ZVL256B,
+  /* FULL_NAME */ "Minimum vector length standard extensions",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvl128b"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvl,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvl32768b,
+  /* UPPERCASE_NAME */ ZVL32768B,
+  /* FULL_NAME */ "zvl32768b extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvl16384b"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvl,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvl32b,
+  /* UPPERCASE_NAME */ ZVL32B,
+  /* FULL_NAME */ "Minimum vector length standard extensions",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvl,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvl4096b,
+  /* UPPERCASE_NAME */ ZVL4096B,
+  /* FULL_NAME */ "Minimum vector length standard extensions",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvl2048b"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvl,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvl512b,
+  /* UPPERCASE_NAME */ ZVL512B,
+  /* FULL_NAME */ "Minimum vector length standard extensions",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvl256b"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvl,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvl64b,
+  /* UPPERCASE_NAME */ ZVL64B,
+  /* FULL_NAME */ "Minimum vector length standard extensions",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvl32b"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvl,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvl65536b,
+  /* UPPERCASE_NAME */ ZVL65536B,
+  /* FULL_NAME */ "zvl65536b extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvl32768b"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvl,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvl8192b,
+  /* UPPERCASE_NAME */ ZVL8192B,
+  /* FULL_NAME */ "zvl8192b extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvl4096b"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvl,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zhinx,
+  /* UPPERCASE_NAME */ ZHINX,
+  /* FULL_NAME */ "Half-precision floating-point in integer registers extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zhinxmin"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zinx,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zhinxmin,
+  /* UPPERCASE_NAME */ ZHINXMIN,
+  /* FULL_NAME */ "Minimal half-precision floating-point in integer registers extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zfinx"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zinx,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ sdtrig,
+  /* UPPERCASE_NAME */ SDTRIG,
+  /* FULL_NAME */ "sdtrig extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sd,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ sha,
+  /* UPPERCASE_NAME */ SHA,
+  /* FULL_NAME */ "The augmented hypervisor extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"h", "shcounterenw", "shgatpa", "shtvala", "shvstvala", "shvstvecd", "shvsatpa", "ssstateen"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sh,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ shcounterenw,
+  /* UPPERCASE_NAME */ SHCOUNTERENW,
+  /* FULL_NAME */ "Support writeable enables for any supported counter",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"h", "zihpm"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sh,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ shgatpa,
+  /* UPPERCASE_NAME */ SHGATPA,
+  /* FULL_NAME */ "SvNNx4 mode supported for all modes supported by satp",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"h", "ssstateen"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sh,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ shlcofideleg,
+  /* UPPERCASE_NAME */ SHLCOFIDELEG,
+  /* FULL_NAME */ "Delegating LCOFI interrupts to VS-mode",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"h"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sh,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ shtvala,
+  /* UPPERCASE_NAME */ SHTVALA,
+  /* FULL_NAME */ "The htval register provides all needed values",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"h"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sh,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ shvstvala,
+  /* UPPERCASE_NAME */ SHVSTVALA,
+  /* FULL_NAME */ "The vstval register provides all needed values",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"h"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sh,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ shvstvecd,
+  /* UPPERCASE_NAME */ SHVSTVECD,
+  /* FULL_NAME */ "The vstvec register supports Direct mode",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"h"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sh,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ shvsatpa,
+  /* UPPERCASE_NAME */ SHVSATPA,
+  /* FULL_NAME */ "The vsatp register supports all modes supported by satp",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"h"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sh,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ smaia,
+  /* UPPERCASE_NAME */ SMAIA,
+  /* FULL_NAME */ "Advanced interrupt architecture extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"ssaia"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sm,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ smcntrpmf,
+  /* UPPERCASE_NAME */ SMCNTRPMF,
+  /* FULL_NAME */ "Cycle and instret privilege mode filtering",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sm,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ smcsrind,
+  /* UPPERCASE_NAME */ SMCSRIND,
+  /* FULL_NAME */ "Machine-Level Indirect CSR Access",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr", "sscsrind"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sm,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ smepmp,
+  /* UPPERCASE_NAME */ SMEPMP,
+  /* FULL_NAME */ "PMP Enhancements for memory access and execution prevention on Machine mode",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sm,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ smmpm,
+  /* UPPERCASE_NAME */ SMMPM,
+  /* FULL_NAME */ "smmpm extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sm,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ smnpm,
+  /* UPPERCASE_NAME */ SMNPM,
+  /* FULL_NAME */ "smnpm extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sm,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ smrnmi,
+  /* UPPERCASE_NAME */ SMRNMI,
+  /* FULL_NAME */ "Resumable non-maskable interrupts",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sm,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ smstateen,
+  /* UPPERCASE_NAME */ SMSTATEEN,
+  /* FULL_NAME */ "State enable extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"ssstateen"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sm,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ smdbltrp,
+  /* UPPERCASE_NAME */ SMDBLTRP,
+  /* FULL_NAME */ "Double Trap Extensions",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sm,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ ssaia,
+  /* UPPERCASE_NAME */ SSAIA,
+  /* FULL_NAME */ "Advanced interrupt architecture extension for supervisor-mode",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ ss,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ ssccptr,
+  /* UPPERCASE_NAME */ SSCCPTR,
+  /* FULL_NAME */ "Main memory supports page table reads",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ ss,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ sscofpmf,
+  /* UPPERCASE_NAME */ SSCOFPMF,
+  /* FULL_NAME */ "Count overflow & filtering extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ ss,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ sscounterenw,
+  /* UPPERCASE_NAME */ SSCOUNTERENW,
+  /* FULL_NAME */ "Support writeable enables for any supported counter",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ ss,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ sscsrind,
+  /* UPPERCASE_NAME */ SSCSRIND,
+  /* FULL_NAME */ "Supervisor-Level Indirect CSR Access",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ ss,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ ssnpm,
+  /* UPPERCASE_NAME */ SSNPM,
+  /* FULL_NAME */ "ssnpm extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ ss,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ sspm,
+  /* UPPERCASE_NAME */ SSPM,
+  /* FULL_NAME */ "sspm extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ ss,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ ssstateen,
+  /* UPPERCASE_NAME */ SSSTATEEN,
+  /* FULL_NAME */ "State-enable extension for supervisor-mode",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ ss,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ sstc,
+  /* UPPERCASE_NAME */ SSTC,
+  /* FULL_NAME */ "Supervisor-mode timer interrupts extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ ss,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ sstvala,
+  /* UPPERCASE_NAME */ SSTVALA,
+  /* FULL_NAME */ "Stval provides all needed values",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ ss,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ sstvecd,
+  /* UPPERCASE_NAME */ SSTVECD,
+  /* FULL_NAME */ "Stvec supports Direct mode",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ ss,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ ssstrict,
+  /* UPPERCASE_NAME */ SSSTRICT,
+  /* FULL_NAME */ "ssstrict extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ ss,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ ssdbltrp,
+  /* UPPERCASE_NAME */ SSDBLTRP,
+  /* FULL_NAME */ "Double Trap Extensions",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ ss,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ ssu64xl,
+  /* UPPERCASE_NAME */ SSU64XL,
+  /* FULL_NAME */ "UXLEN=64 must be supported",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ ss,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ supm,
+  /* UPPERCASE_NAME */ SUPM,
+  /* FULL_NAME */ "supm extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ su,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ svinval,
+  /* UPPERCASE_NAME */ SVINVAL,
+  /* FULL_NAME */ "Fine-grained address-translation cache invalidation extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sv,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ svnapot,
+  /* UPPERCASE_NAME */ SVNAPOT,
+  /* FULL_NAME */ "NAPOT translation contiguity extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sv,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ svpbmt,
+  /* UPPERCASE_NAME */ SVPBMT,
+  /* FULL_NAME */ "Page-based memory types extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sv,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ svvptc,
+  /* UPPERCASE_NAME */ SVVPTC,
+  /* FULL_NAME */ "svvptc extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sv,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ svadu,
+  /* UPPERCASE_NAME */ SVADU,
+  /* FULL_NAME */ "Hardware Updating of A/D Bits extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sv,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ svade,
+  /* UPPERCASE_NAME */ SVADE,
+  /* FULL_NAME */ "Cause exception when hardware updating of A/D bits is disabled",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sv,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ svbare,
+  /* UPPERCASE_NAME */ SVBARE,
+  /* FULL_NAME */ "Satp mode bare is supported",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sv,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+#include "riscv-ext-corev.def"
+#include "riscv-ext-sifive.def"
+#include "riscv-ext-thead.def"
+#include "riscv-ext-ventana.def"
+#include "riscv-ext-mips.def"
diff --git a/gcc/config/riscv/riscv-ext.opt b/gcc/config/riscv/riscv-ext.opt
new file mode 100644
index 0000000..ced05d2
--- /dev/null
+++ b/gcc/config/riscv/riscv-ext.opt
@@ -0,0 +1,453 @@
+; Target options for the RISC-V port of the compiler
+;
+; Copyright (C) 2025 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see 
+; <http://www.gnu.org/licenses/>.
+; This file is generated automatically using
+;  gcc/config/riscv/gen-riscv-ext-opt.cc from:
+;       gcc/config/riscv/riscv-ext.def
+
+; Please *DO NOT* edit manually.
+TargetVariable
+int riscv_base_subext
+
+TargetVariable
+int riscv_sd_subext
+
+TargetVariable
+int riscv_sh_subext
+
+TargetVariable
+int riscv_sm_subext
+
+TargetVariable
+int riscv_ss_subext
+
+TargetVariable
+int riscv_su_subext
+
+TargetVariable
+int riscv_sv_subext
+
+TargetVariable
+int riscv_xcv_subext
+
+TargetVariable
+int riscv_xmips_subext
+
+TargetVariable
+int riscv_xsf_subext
+
+TargetVariable
+int riscv_xthead_subext
+
+TargetVariable
+int riscv_xventana_subext
+
+TargetVariable
+int riscv_za_subext
+
+TargetVariable
+int riscv_zb_subext
+
+TargetVariable
+int riscv_zc_subext
+
+TargetVariable
+int riscv_zf_subext
+
+TargetVariable
+int riscv_zi_subext
+
+TargetVariable
+int riscv_zinx_subext
+
+TargetVariable
+int riscv_zk_subext
+
+TargetVariable
+int riscv_zm_subext
+
+TargetVariable
+int riscv_zt_subext
+
+TargetVariable
+int riscv_zvb_subext
+
+TargetVariable
+int riscv_zve_subext
+
+TargetVariable
+int riscv_zvf_subext
+
+TargetVariable
+int riscv_zvk_subext
+
+TargetVariable
+int riscv_zvl_subext
+
+Mask(RVE) Var(riscv_base_subext)
+
+Mask(RVI) Var(riscv_base_subext)
+
+Mask(MUL) Var(riscv_base_subext)
+
+Mask(ATOMIC) Var(riscv_base_subext)
+
+Mask(HARD_FLOAT) Var(riscv_base_subext)
+
+Mask(DOUBLE_FLOAT) Var(riscv_base_subext)
+
+Mask(RVC) Var(riscv_base_subext)
+
+Mask(RVB) Var(riscv_base_subext)
+
+Mask(RVV) Var(riscv_base_subext)
+
+Mask(RVH) Var(riscv_base_subext)
+
+Mask(ZIC64B) Var(riscv_zi_subext)
+
+Mask(ZICBOM) Var(riscv_zi_subext)
+
+Mask(ZICBOP) Var(riscv_zi_subext)
+
+Mask(ZICBOZ) Var(riscv_zi_subext)
+
+Mask(ZICCAMOA) Var(riscv_zi_subext)
+
+Mask(ZICCIF) Var(riscv_zi_subext)
+
+Mask(ZICCLSM) Var(riscv_zi_subext)
+
+Mask(ZICCRSE) Var(riscv_zi_subext)
+
+Mask(ZICFILP) Var(riscv_zi_subext)
+
+Mask(ZICFISS) Var(riscv_zi_subext)
+
+Mask(ZICNTR) Var(riscv_zi_subext)
+
+Mask(ZICOND) Var(riscv_zi_subext)
+
+Mask(ZICSR) Var(riscv_zi_subext)
+
+Mask(ZIFENCEI) Var(riscv_zi_subext)
+
+Mask(ZIHINTNTL) Var(riscv_zi_subext)
+
+Mask(ZIHINTPAUSE) Var(riscv_zi_subext)
+
+Mask(ZIHPM) Var(riscv_zi_subext)
+
+Mask(ZIMOP) Var(riscv_zi_subext)
+
+Mask(ZILSD) Var(riscv_zi_subext)
+
+Mask(ZMMUL) Var(riscv_zm_subext)
+
+Mask(ZA128RS) Var(riscv_za_subext)
+
+Mask(ZA64RS) Var(riscv_za_subext)
+
+Mask(ZAAMO) Var(riscv_za_subext)
+
+Mask(ZABHA) Var(riscv_za_subext)
+
+Mask(ZACAS) Var(riscv_za_subext)
+
+Mask(ZALRSC) Var(riscv_za_subext)
+
+Mask(ZAWRS) Var(riscv_za_subext)
+
+Mask(ZAMA16B) Var(riscv_za_subext)
+
+Mask(ZFA) Var(riscv_zf_subext)
+
+Mask(ZFBFMIN) Var(riscv_zf_subext)
+
+Mask(ZFH) Var(riscv_zf_subext)
+
+Mask(ZFHMIN) Var(riscv_zf_subext)
+
+Mask(ZFINX) Var(riscv_zinx_subext)
+
+Mask(ZDINX) Var(riscv_zinx_subext)
+
+Mask(ZCA) Var(riscv_zc_subext)
+
+Mask(ZCB) Var(riscv_zc_subext)
+
+Mask(ZCD) Var(riscv_zc_subext)
+
+Mask(ZCE) Var(riscv_zc_subext)
+
+Mask(ZCF) Var(riscv_zc_subext)
+
+Mask(ZCMOP) Var(riscv_zc_subext)
+
+Mask(ZCMP) Var(riscv_zc_subext)
+
+Mask(ZCMT) Var(riscv_zc_subext)
+
+Mask(ZCLSD) Var(riscv_zc_subext)
+
+Mask(ZBA) Var(riscv_zb_subext)
+
+Mask(ZBB) Var(riscv_zb_subext)
+
+Mask(ZBC) Var(riscv_zb_subext)
+
+Mask(ZBKB) Var(riscv_zb_subext)
+
+Mask(ZBKC) Var(riscv_zb_subext)
+
+Mask(ZBKX) Var(riscv_zb_subext)
+
+Mask(ZBS) Var(riscv_zb_subext)
+
+Mask(ZK) Var(riscv_zk_subext)
+
+Mask(ZKN) Var(riscv_zk_subext)
+
+Mask(ZKND) Var(riscv_zk_subext)
+
+Mask(ZKNE) Var(riscv_zk_subext)
+
+Mask(ZKNH) Var(riscv_zk_subext)
+
+Mask(ZKR) Var(riscv_zk_subext)
+
+Mask(ZKS) Var(riscv_zk_subext)
+
+Mask(ZKSED) Var(riscv_zk_subext)
+
+Mask(ZKSH) Var(riscv_zk_subext)
+
+Mask(ZKT) Var(riscv_zk_subext)
+
+Mask(ZTSO) Var(riscv_zt_subext)
+
+Mask(ZVBB) Var(riscv_zvb_subext)
+
+Mask(ZVBC) Var(riscv_zvb_subext)
+
+Mask(ZVE32F) Var(riscv_zve_subext)
+
+Mask(ZVE32X) Var(riscv_zve_subext)
+
+Mask(ZVE64D) Var(riscv_zve_subext)
+
+Mask(ZVE64F) Var(riscv_zve_subext)
+
+Mask(ZVE64X) Var(riscv_zve_subext)
+
+Mask(ZVFBFMIN) Var(riscv_zvf_subext)
+
+Mask(ZVFBFWMA) Var(riscv_zvf_subext)
+
+Mask(ZVFH) Var(riscv_zvf_subext)
+
+Mask(ZVFHMIN) Var(riscv_zvf_subext)
+
+Mask(ZVKB) Var(riscv_zvk_subext)
+
+Mask(ZVKG) Var(riscv_zvk_subext)
+
+Mask(ZVKN) Var(riscv_zvk_subext)
+
+Mask(ZVKNC) Var(riscv_zvk_subext)
+
+Mask(ZVKNED) Var(riscv_zvk_subext)
+
+Mask(ZVKNG) Var(riscv_zvk_subext)
+
+Mask(ZVKNHA) Var(riscv_zvk_subext)
+
+Mask(ZVKNHB) Var(riscv_zvk_subext)
+
+Mask(ZVKS) Var(riscv_zvk_subext)
+
+Mask(ZVKSC) Var(riscv_zvk_subext)
+
+Mask(ZVKSED) Var(riscv_zvk_subext)
+
+Mask(ZVKSG) Var(riscv_zvk_subext)
+
+Mask(ZVKSH) Var(riscv_zvk_subext)
+
+Mask(ZVKT) Var(riscv_zvk_subext)
+
+Mask(ZVL1024B) Var(riscv_zvl_subext)
+
+Mask(ZVL128B) Var(riscv_zvl_subext)
+
+Mask(ZVL16384B) Var(riscv_zvl_subext)
+
+Mask(ZVL2048B) Var(riscv_zvl_subext)
+
+Mask(ZVL256B) Var(riscv_zvl_subext)
+
+Mask(ZVL32768B) Var(riscv_zvl_subext)
+
+Mask(ZVL32B) Var(riscv_zvl_subext)
+
+Mask(ZVL4096B) Var(riscv_zvl_subext)
+
+Mask(ZVL512B) Var(riscv_zvl_subext)
+
+Mask(ZVL64B) Var(riscv_zvl_subext)
+
+Mask(ZVL65536B) Var(riscv_zvl_subext)
+
+Mask(ZVL8192B) Var(riscv_zvl_subext)
+
+Mask(ZHINX) Var(riscv_zinx_subext)
+
+Mask(ZHINXMIN) Var(riscv_zinx_subext)
+
+Mask(SDTRIG) Var(riscv_sd_subext)
+
+Mask(SHA) Var(riscv_sh_subext)
+
+Mask(SHCOUNTERENW) Var(riscv_sh_subext)
+
+Mask(SHGATPA) Var(riscv_sh_subext)
+
+Mask(SHLCOFIDELEG) Var(riscv_sh_subext)
+
+Mask(SHTVALA) Var(riscv_sh_subext)
+
+Mask(SHVSTVALA) Var(riscv_sh_subext)
+
+Mask(SHVSTVECD) Var(riscv_sh_subext)
+
+Mask(SHVSATPA) Var(riscv_sh_subext)
+
+Mask(SMAIA) Var(riscv_sm_subext)
+
+Mask(SMCNTRPMF) Var(riscv_sm_subext)
+
+Mask(SMCSRIND) Var(riscv_sm_subext)
+
+Mask(SMEPMP) Var(riscv_sm_subext)
+
+Mask(SMMPM) Var(riscv_sm_subext)
+
+Mask(SMNPM) Var(riscv_sm_subext)
+
+Mask(SMRNMI) Var(riscv_sm_subext)
+
+Mask(SMSTATEEN) Var(riscv_sm_subext)
+
+Mask(SMDBLTRP) Var(riscv_sm_subext)
+
+Mask(SSAIA) Var(riscv_ss_subext)
+
+Mask(SSCCPTR) Var(riscv_ss_subext)
+
+Mask(SSCOFPMF) Var(riscv_ss_subext)
+
+Mask(SSCOUNTERENW) Var(riscv_ss_subext)
+
+Mask(SSCSRIND) Var(riscv_ss_subext)
+
+Mask(SSNPM) Var(riscv_ss_subext)
+
+Mask(SSPM) Var(riscv_ss_subext)
+
+Mask(SSSTATEEN) Var(riscv_ss_subext)
+
+Mask(SSTC) Var(riscv_ss_subext)
+
+Mask(SSTVALA) Var(riscv_ss_subext)
+
+Mask(SSTVECD) Var(riscv_ss_subext)
+
+Mask(SSSTRICT) Var(riscv_ss_subext)
+
+Mask(SSDBLTRP) Var(riscv_ss_subext)
+
+Mask(SSU64XL) Var(riscv_ss_subext)
+
+Mask(SUPM) Var(riscv_su_subext)
+
+Mask(SVINVAL) Var(riscv_sv_subext)
+
+Mask(SVNAPOT) Var(riscv_sv_subext)
+
+Mask(SVPBMT) Var(riscv_sv_subext)
+
+Mask(SVVPTC) Var(riscv_sv_subext)
+
+Mask(SVADU) Var(riscv_sv_subext)
+
+Mask(SVADE) Var(riscv_sv_subext)
+
+Mask(SVBARE) Var(riscv_sv_subext)
+
+Mask(XCVALU) Var(riscv_xcv_subext)
+
+Mask(XCVBI) Var(riscv_xcv_subext)
+
+Mask(XCVELW) Var(riscv_xcv_subext)
+
+Mask(XCVMAC) Var(riscv_xcv_subext)
+
+Mask(XCVSIMD) Var(riscv_xcv_subext)
+
+Mask(XSFCEASE) Var(riscv_xsf_subext)
+
+Mask(XSFVCP) Var(riscv_xsf_subext)
+
+Mask(XSFVFNRCLIPXFQF) Var(riscv_xsf_subext)
+
+Mask(XSFVQMACCDOD) Var(riscv_xsf_subext)
+
+Mask(XSFVQMACCQOQ) Var(riscv_xsf_subext)
+
+Mask(XTHEADBA) Var(riscv_xthead_subext)
+
+Mask(XTHEADBB) Var(riscv_xthead_subext)
+
+Mask(XTHEADBS) Var(riscv_xthead_subext)
+
+Mask(XTHEADCMO) Var(riscv_xthead_subext)
+
+Mask(XTHEADCONDMOV) Var(riscv_xthead_subext)
+
+Mask(XTHEADFMEMIDX) Var(riscv_xthead_subext)
+
+Mask(XTHEADFMV) Var(riscv_xthead_subext)
+
+Mask(XTHEADINT) Var(riscv_xthead_subext)
+
+Mask(XTHEADMAC) Var(riscv_xthead_subext)
+
+Mask(XTHEADMEMIDX) Var(riscv_xthead_subext)
+
+Mask(XTHEADMEMPAIR) Var(riscv_xthead_subext)
+
+Mask(XTHEADSYNC) Var(riscv_xthead_subext)
+
+Mask(XTHEADVECTOR) Var(riscv_xthead_subext)
+
+Mask(XVENTANACONDOPS) Var(riscv_xventana_subext)
+
+Mask(XMIPSCMOV) Var(riscv_xmips_subext)
+
+Mask(XMIPSCBOP) Var(riscv_xmips_subext)
diff --git a/gcc/config/riscv/riscv-ext.opt.urls b/gcc/config/riscv/riscv-ext.opt.urls
new file mode 100644
index 0000000..c4f4710
--- /dev/null
+++ b/gcc/config/riscv/riscv-ext.opt.urls
@@ -0,0 +1,2 @@
+; Autogenerated by regenerate-opt-urls.py from gcc/config/riscv/riscv-ext.opt and generated HTML
+
diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index 26fe228..e1a820b 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -58,7 +58,8 @@ enum riscv_microarchitecture_type {
   sifive_p400,
   sifive_p600,
   xiangshan,
-  generic_ooo
+  generic_ooo,
+  mips_p8700,
 };
 extern enum riscv_microarchitecture_type riscv_microarchitecture;
 
@@ -136,16 +137,16 @@ enum rvv_vector_bits_enum {
 /* Bit of riscv_zvl_flags will set continually, N-1 bit will set if N-bit is
    set, e.g. MASK_ZVL64B has set then MASK_ZVL32B is set, so we can use
    popcount to calculate the minimal VLEN.  */
-#define TARGET_MIN_VLEN \
-  ((riscv_zvl_flags == 0) \
-   ? 0 \
-   : 32 << (__builtin_popcount (riscv_zvl_flags) - 1))
+#define TARGET_MIN_VLEN                                                        \
+  ((riscv_zvl_subext == 0)                                                     \
+     ? 0                                                                       \
+     : 32 << (__builtin_popcount (riscv_zvl_subext) - 1))
 
 /* Same as TARGET_MIN_VLEN, but take an OPTS as gcc_options.  */
 #define TARGET_MIN_VLEN_OPTS(opts)                                             \
-  ((opts->x_riscv_zvl_flags == 0)                                              \
+  ((opts->x_riscv_zvl_subext == 0)                                             \
      ? 0                                                                       \
-     : 32 << (__builtin_popcount (opts->x_riscv_zvl_flags) - 1))
+     : 32 << (__builtin_popcount (opts->x_riscv_zvl_subext) - 1))
 
 /* The maximum LMUL according to user configuration.  */
 #define TARGET_MAX_LMUL                                                        \
@@ -162,4 +163,15 @@ enum riscv_tls_type {
 #define TARGET_VECTOR_AUTOVEC_SEGMENT					       \
   (TARGET_VECTOR && riscv_mautovec_segment)
 
+#define GPR2VR_COST_UNPROVIDED -1
+#define FPR2VR_COST_UNPROVIDED -1
+
+/* Extra extension flags, used for carry extra info for a RISC-V extension.  */
+enum
+{
+  EXT_FLAG_MACRO = 1 << 0,
+};
+
+#define BITMASK_NOT_YET_ALLOCATED -1
+
 #endif /* ! GCC_RISCV_OPTS_H */
diff --git a/gcc/config/riscv/riscv-passes.def b/gcc/config/riscv/riscv-passes.def
index 7e6a2a0..bc803c4 100644
--- a/gcc/config/riscv/riscv-passes.def
+++ b/gcc/config/riscv/riscv-passes.def
@@ -21,3 +21,5 @@ INSERT_PASS_AFTER (pass_rtl_store_motion, 1, pass_shorten_memrefs);
 INSERT_PASS_AFTER (pass_split_all_insns, 1, pass_avlprop);
 INSERT_PASS_BEFORE (pass_fast_rtl_dce, 1, pass_vsetvl);
 INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_insert_landing_pad);
+INSERT_PASS_AFTER (pass_cse2, 1, pass_vector_permconst);
+
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 2bedd87..46b256d 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -137,9 +137,14 @@ extern void riscv_expand_usadd (rtx, rtx, rtx);
 extern void riscv_expand_ssadd (rtx, rtx, rtx);
 extern void riscv_expand_ussub (rtx, rtx, rtx);
 extern void riscv_expand_sssub (rtx, rtx, rtx);
+extern void riscv_expand_usmul (rtx, rtx, rtx);
 extern void riscv_expand_ustrunc (rtx, rtx);
 extern void riscv_expand_sstrunc (rtx, rtx);
 extern int riscv_register_move_cost (machine_mode, reg_class_t, reg_class_t);
+extern bool synthesize_ior_xor (rtx_code, rtx [3]);
+extern bool synthesize_and (rtx [3]);
+extern bool synthesize_add (rtx [3]);
+extern bool synthesize_add_extended (rtx [3]);
 
 #ifdef RTX_CODE
 extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx, bool *invert_ptr = 0);
@@ -201,6 +206,8 @@ rtl_opt_pass * make_pass_shorten_memrefs (gcc::context *ctxt);
 rtl_opt_pass * make_pass_avlprop (gcc::context *ctxt);
 rtl_opt_pass * make_pass_vsetvl (gcc::context *ctxt);
 rtl_opt_pass * make_pass_insert_landing_pad (gcc::context *ctxt);
+rtl_opt_pass * make_pass_vector_permconst (gcc::context *ctxt);
+
 
 /* Routines implemented in riscv-string.c.  */
 extern bool riscv_expand_block_compare (rtx, rtx, rtx, rtx);
@@ -409,8 +416,14 @@ enum insn_flags : unsigned int
   /* Means INSN has VXRM operand and the value is VXRM_RNU.  */
   VXRM_RNU_P = 1 << 20,
 
+  /* Means INSN has VXRM operand and the value is VXRM_RNE.  */
+  VXRM_RNE_P = 1 << 21,
+
   /* Means INSN has VXRM operand and the value is VXRM_RDN.  */
-  VXRM_RDN_P = 1 << 21,
+  VXRM_RDN_P = 1 << 22,
+
+  /* Means INSN has VXRM operand and the value is VXRM_ROD.  */
+  VXRM_ROD_P = 1 << 23,
 };
 
 enum insn_type : unsigned int
@@ -472,7 +485,9 @@ enum insn_type : unsigned int
   BINARY_OP_TUMA = __MASK_OP_TUMA | BINARY_OP_P,
   BINARY_OP_FRM_DYN = BINARY_OP | FRM_DYN_P,
   BINARY_OP_VXRM_RNU = BINARY_OP | VXRM_RNU_P,
+  BINARY_OP_VXRM_RNE = BINARY_OP | VXRM_RNE_P,
   BINARY_OP_VXRM_RDN = BINARY_OP | VXRM_RDN_P,
+  BINARY_OP_VXRM_ROD = BINARY_OP | VXRM_ROD_P,
 
   /* Ternary operator. Always have real merge operand.  */
   TERNARY_OP = HAS_DEST_P | HAS_MASK_P | USE_ALL_TRUES_MASK_P | HAS_MERGE_P
@@ -599,6 +614,7 @@ void emit_vlmax_vsetvl (machine_mode, rtx);
 void emit_hard_vlmax_vsetvl (machine_mode, rtx);
 void emit_vlmax_insn (unsigned, unsigned, rtx *);
 void emit_nonvlmax_insn (unsigned, unsigned, rtx *, rtx);
+void emit_avltype_insn (unsigned, unsigned, rtx *, avl_type, rtx = nullptr);
 void emit_vlmax_insn_lra (unsigned, unsigned, rtx *, rtx);
 enum vlmul_type get_vlmul (machine_mode);
 rtx get_vlmax_rtx (machine_mode);
@@ -664,6 +680,10 @@ void expand_vec_oct_ustrunc (rtx, rtx, machine_mode, machine_mode,
 			     machine_mode);
 void expand_vec_oct_sstrunc (rtx, rtx, machine_mode, machine_mode,
 			     machine_mode);
+void expand_vx_binary_vec_dup_vec (rtx, rtx, rtx, rtx_code, machine_mode);
+void expand_vx_binary_vec_vec_dup (rtx, rtx, rtx, rtx_code, machine_mode);
+void expand_vx_binary_vxrm_vec_vec_dup (rtx, rtx, rtx, int, int, machine_mode);
+void expand_vx_binary_vxrm_vec_dup_vec (rtx, rtx, rtx, int, int, machine_mode);
 #endif
 bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
 			  bool, void (*)(rtx *, rtx), enum avl_type);
@@ -687,6 +707,9 @@ bool expand_block_move (rtx, rtx, rtx, bool);
 machine_mode preferred_simd_mode (scalar_mode);
 machine_mode get_mask_mode (machine_mode);
 void expand_vec_series (rtx, rtx, rtx, rtx = 0);
+void expand_broadcast (machine_mode, rtx *, rtx = 0);
+void expand_set_first (machine_mode, rtx *, rtx = 0);
+void expand_set_first_tu (machine_mode, rtx *, rtx = 0);
 void expand_vec_init (rtx, rtx);
 void expand_vec_perm (rtx, rtx, rtx, rtx);
 void expand_select_vl (rtx *);
@@ -753,7 +776,8 @@ uint8_t get_sew (rtx_insn *);
 enum vlmul_type get_vlmul (rtx_insn *);
 int count_regno_occurrences (rtx_insn *, unsigned int);
 bool imm_avl_p (machine_mode);
-bool can_be_broadcasted_p (rtx);
+bool can_be_broadcast_p (rtx);
+bool strided_broadcast_p (rtx);
 bool gather_scatter_valid_offset_p (machine_mode);
 HOST_WIDE_INT estimated_poly_value (poly_int64, unsigned int);
 bool whole_reg_to_reg_move_p (rtx *, machine_mode, int);
@@ -806,17 +830,20 @@ extern const char *th_output_move (rtx, rtx);
 extern bool th_print_operand_address (FILE *, machine_mode, rtx);
 #endif
 
+extern bool strided_load_broadcast_p (void);
 extern bool riscv_use_divmod_expander (void);
-void riscv_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
+void riscv_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx, tree, int);
 extern bool
 riscv_option_valid_attribute_p (tree, tree, tree, int);
 extern bool
 riscv_option_valid_version_attribute_p (tree, tree, tree, int);
 extern bool
-riscv_process_target_version_attr (tree, location_t);
+riscv_process_target_version_attr (tree, location_t *);
 extern void
 riscv_override_options_internal (struct gcc_options *);
 extern void riscv_option_override (void);
+extern rtx riscv_prefetch_cookie (rtx, rtx);
+extern bool riscv_prefetch_offset_address_p (rtx, machine_mode);
 
 struct riscv_tune_param;
 /* Information about one micro-arch we know about.  */
@@ -834,6 +861,8 @@ struct riscv_tune_info {
 const struct riscv_tune_info *
 riscv_parse_tune (const char *, bool);
 const cpu_vector_cost *get_vector_costs ();
+int get_gr2vr_cost ();
+int get_fr2vr_cost ();
 
 enum
 {
diff --git a/gcc/config/riscv/riscv-selftests.cc b/gcc/config/riscv/riscv-selftests.cc
index 34d01ac..9ca1ffe 100644
--- a/gcc/config/riscv/riscv-selftests.cc
+++ b/gcc/config/riscv/riscv-selftests.cc
@@ -342,9 +342,13 @@ run_broadcast_selftests (void)
 	  expand_vector_broadcast (mode, mem);                                 \
 	  insn = get_last_insn ();                                             \
 	  src = SET_SRC (PATTERN (insn));                                      \
-	  ASSERT_TRUE (MEM_P (XEXP (src, 0)));                                 \
-	  ASSERT_TRUE (                                                        \
-	    rtx_equal_p (src, gen_rtx_VEC_DUPLICATE (mode, XEXP (src, 0))));   \
+	  if (strided_load_broadcast_p ())                                     \
+	    {                                                                  \
+	      ASSERT_TRUE (MEM_P (XEXP (src, 0)));                             \
+	      ASSERT_TRUE (                                                    \
+		rtx_equal_p (src,                                              \
+			     gen_rtx_VEC_DUPLICATE (mode, XEXP (src, 0))));    \
+	    }                                                                  \
 	  end_sequence ();                                                     \
 	  /* Test vmv.v.x or vfmv.v.f.  */                                     \
 	  start_sequence ();                                                   \
diff --git a/gcc/config/riscv/riscv-shorten-memrefs.cc b/gcc/config/riscv/riscv-shorten-memrefs.cc
index 60f330e..2e3d9f6 100644
--- a/gcc/config/riscv/riscv-shorten-memrefs.cc
+++ b/gcc/config/riscv/riscv-shorten-memrefs.cc
@@ -189,8 +189,7 @@ pass_shorten_memrefs::transform (regno_map *m, basic_block bb)
 		}
 	    }
 	}
-      rtx_insn *seq = get_insns ();
-      end_sequence ();
+      rtx_insn *seq = end_sequence ();
       emit_insn_before (seq, insn);
     }
 }
diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index 9080189..61c4a09 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -1625,16 +1625,14 @@ expand_vec_setmem (rtx dst_in, rtx length_in, rtx fill_value_in)
      Otherwise, use a predicated store.  */
   if (known_eq (GET_MODE_SIZE (info.vmode), INTVAL (info.avl)))
     {
-      emit_vlmax_insn (code_for_pred_broadcast (info.vmode), UNARY_OP,
-		       broadcast_ops);
+      riscv_vector::expand_broadcast (info.vmode, broadcast_ops);
       emit_move_insn (dst, fill_value);
     }
   else
     {
       if (!satisfies_constraint_vl (info.avl))
 	info.avl = force_reg (Pmode, info.avl);
-      emit_nonvlmax_insn (code_for_pred_broadcast (info.vmode),
-			  riscv_vector::UNARY_OP, broadcast_ops, info.avl);
+      riscv_vector::expand_broadcast (info.vmode, broadcast_ops, info.avl);
       machine_mode mask_mode
 	= riscv_vector::get_vector_mode (BImode, GET_MODE_NUNITS (info.vmode))
 	  .require ();
diff --git a/gcc/config/riscv/riscv-subset.h b/gcc/config/riscv/riscv-subset.h
index 559e708..4cd860f 100644
--- a/gcc/config/riscv/riscv-subset.h
+++ b/gcc/config/riscv/riscv-subset.h
@@ -52,8 +52,9 @@ private:
   /* Original arch string.  */
   const char *m_arch;
 
-  /* Location of arch string, used for report error.  */
-  location_t m_loc;
+  /* A pointer to the location that should be used for diagnostics,
+     or null if diagnostics should be suppressed.  */
+  location_t *m_loc;
 
   /* Head of subset info list.  */
   riscv_subset_t *m_head;
@@ -70,7 +71,7 @@ private:
   /* Allow adding the same extension more than once.  */
   bool m_allow_adding_dup;
 
-  riscv_subset_list (const char *, location_t);
+  riscv_subset_list (const char *, location_t *);
 
   const char *parsing_subset_version (const char *, const char *, unsigned *,
 				      unsigned *, bool, bool *);
@@ -82,6 +83,8 @@ private:
   const char *parse_single_multiletter_ext (const char *, const char *,
 					    const char *, bool);
 
+  std::string parse_profiles (const char*);
+
   void handle_implied_ext (const char *);
   bool check_implied_ext ();
   void handle_combine_ext ();
@@ -104,19 +107,75 @@ public:
 
   riscv_subset_list *clone () const;
 
-  static riscv_subset_list *parse (const char *, location_t);
+  static riscv_subset_list *parse (const char *, location_t *);
   const char *parse_single_ext (const char *, bool exact_single_p = true);
 
-  const riscv_subset_t *begin () const {return m_head;};
-  const riscv_subset_t *end () const {return NULL;};
-
   int match_score (riscv_subset_list *) const;
 
-  void set_loc (location_t);
+  void set_loc (location_t *);
 
   void set_allow_adding_dup (bool v) { m_allow_adding_dup = v; }
 
   void finalize ();
+
+  class iterator
+  {
+  public:
+    explicit iterator(riscv_subset_t *node) : m_node(node) {}
+
+    riscv_subset_t &operator*() const { return *m_node; }
+    riscv_subset_t *operator->() const { return m_node; }
+
+    iterator &operator++()
+    {
+      if (m_node)
+	m_node = m_node->next;
+      return *this;
+    }
+
+    bool operator!=(const iterator &other) const
+    {
+      return m_node != other.m_node;
+    }
+
+    bool operator==(const iterator &other) const
+    {
+      return m_node == other.m_node;
+    }
+
+  private:
+    riscv_subset_t *m_node;
+  };
+
+  iterator begin() { return iterator(m_head); }
+  iterator end()   { return iterator(nullptr); }
+
+  class const_iterator
+  {
+  public:
+    explicit const_iterator(const riscv_subset_t *node) : m_node(node) {}
+
+    const riscv_subset_t &operator*() const { return *m_node; }
+    const riscv_subset_t *operator->() const { return m_node; }
+
+    const_iterator &operator++()
+    {
+      if (m_node)
+	m_node = m_node->next;
+      return *this;
+    }
+
+    bool operator!=(const const_iterator &other) const
+    {
+      return m_node != other.m_node;
+    }
+
+  private:
+    const riscv_subset_t *m_node;
+  };
+
+  const_iterator begin() const { return const_iterator(m_head); }
+  const_iterator end() const   { return const_iterator(nullptr); }
 };
 
 extern const riscv_subset_list *riscv_cmdline_subset_list (void);
@@ -124,9 +183,8 @@ extern void
 riscv_set_arch_by_subset_list (riscv_subset_list *, struct gcc_options *);
 extern bool riscv_minimal_hwprobe_feature_bits (const char *,
 						struct riscv_feature_bits *,
-						location_t);
+						location_t *);
 extern bool
 riscv_ext_is_subset (struct cl_target_option *, struct cl_target_option *);
-extern int riscv_x_target_flags_isa_mask (void);
 
 #endif /* ! GCC_RISCV_SUBSET_H */
diff --git a/gcc/config/riscv/riscv-target-attr.cc b/gcc/config/riscv/riscv-target-attr.cc
index 1d96865..5e01c92 100644
--- a/gcc/config/riscv/riscv-target-attr.cc
+++ b/gcc/config/riscv/riscv-target-attr.cc
@@ -34,7 +34,7 @@ namespace {
 class riscv_target_attr_parser
 {
 public:
-  riscv_target_attr_parser (location_t loc)
+  riscv_target_attr_parser (location_t *loc)
     : m_found_arch_p (false)
     , m_found_tune_p (false)
     , m_found_cpu_p (false)
@@ -62,7 +62,7 @@ private:
   bool m_found_cpu_p;
   bool m_found_priority_p;
   riscv_subset_list *m_subset_list;
-  location_t m_loc;
+  location_t *m_loc;
   const  riscv_cpu_info *m_cpu_info;
   const char *m_tune;
   int m_priority;
@@ -102,15 +102,17 @@ riscv_target_attr_parser::parse_arch (const char *str)
     {
       if (TARGET_64BIT && strncmp ("32", str + 2, strlen ("32")) == 0)
 	{
-	  error_at (m_loc, "unexpected arch for %<target()%> attribute: "
-		    "must start with rv64 but found %qs", str);
+	  if (m_loc)
+	    error_at (*m_loc, "unexpected arch for %<target()%> attribute: "
+		      "must start with rv64 but found %qs", str);
 	  goto fail;
 	}
 
       if (!TARGET_64BIT && strncmp ("64", str + 2, strlen ("64")) == 0)
 	{
-	  error_at (m_loc, "unexpected arch for %<target()%> attribute: "
-		    "must start with rv32 but found %qs", str);
+	  if (m_loc)
+	    error_at (*m_loc, "unexpected arch for %<target()%> attribute: "
+		      "must start with rv32 but found %qs", str);
 	  goto fail;
 	}
 
@@ -140,10 +142,9 @@ riscv_target_attr_parser::parse_arch (const char *str)
 	{
 	  if (token[0] != '+')
 	    {
-	      error_at (
-		m_loc,
-		"unexpected arch for %<target()%> attribute: must start "
-		"with + or rv");
+	      if (*m_loc)
+		error_at (*m_loc, "unexpected arch for %<target()%> "
+			  "attribute: must start with + or rv");
 	      goto fail;
 	    }
 
@@ -151,10 +152,9 @@ riscv_target_attr_parser::parse_arch (const char *str)
 	  /* Check parse_single_ext has consume all string.  */
 	  if (*result != '\0')
 	    {
-	      error_at (
-		m_loc,
-		"unexpected arch for %<target()%> attribute: bad "
-		"string found %qs", token);
+	      if (m_loc)
+		error_at (*m_loc, "unexpected arch for %<target()%> "
+			  "attribute: bad string found %qs", token);
 	      goto fail;
 	    }
 
@@ -179,8 +179,8 @@ fail:
 bool
 riscv_target_attr_parser::handle_arch (const char *str)
 {
-  if (m_found_arch_p)
-    error_at (m_loc, "%<target()%> attribute: arch appears more than once");
+  if (m_found_arch_p && m_loc)
+    error_at (*m_loc, "%<target()%> attribute: arch appears more than once");
   m_found_arch_p = true;
   return parse_arch (str);
 }
@@ -190,15 +190,16 @@ riscv_target_attr_parser::handle_arch (const char *str)
 bool
 riscv_target_attr_parser::handle_cpu (const char *str)
 {
-  if (m_found_cpu_p)
-    error_at (m_loc, "%<target()%> attribute: cpu appears more than once");
+  if (m_found_cpu_p && m_loc)
+    error_at (*m_loc, "%<target()%> attribute: cpu appears more than once");
 
   m_found_cpu_p = true;
   const riscv_cpu_info *cpu_info = riscv_find_cpu (str);
 
   if (!cpu_info)
     {
-      error_at (m_loc, "%<target()%> attribute: unknown CPU %qs", str);
+      if (m_loc)
+	error_at (*m_loc, "%<target()%> attribute: unknown CPU %qs", str);
       return false;
     }
 
@@ -218,14 +219,15 @@ riscv_target_attr_parser::handle_cpu (const char *str)
 bool
 riscv_target_attr_parser::handle_tune (const char *str)
 {
-  if (m_found_tune_p)
-    error_at (m_loc, "%<target()%> attribute: tune appears more than once");
+  if (m_found_tune_p && m_loc)
+    error_at (*m_loc, "%<target()%> attribute: tune appears more than once");
   m_found_tune_p = true;
   const struct riscv_tune_info *tune = riscv_parse_tune (str, true);
 
   if (tune == nullptr)
     {
-      error_at (m_loc, "%<target()%> attribute: unknown TUNE %qs", str);
+      if (m_loc)
+	error_at (*m_loc, "%<target()%> attribute: unknown TUNE %qs", str);
       return false;
     }
 
@@ -237,13 +239,15 @@ riscv_target_attr_parser::handle_tune (const char *str)
 bool
 riscv_target_attr_parser::handle_priority (const char *str)
 {
-  if (m_found_priority_p)
-    error_at (m_loc, "%<target()%> attribute: priority appears more than once");
+  if (m_found_priority_p && m_loc)
+    error_at (*m_loc, "%<target()%> attribute: priority appears "
+	      "more than once");
   m_found_priority_p = true;
 
   if (sscanf (str, "%d", &m_priority) != 1)
     {
-      error_at (m_loc, "%<target()%> attribute: invalid priority %qs", str);
+      if (m_loc)
+	error_at (*m_loc, "%<target()%> attribute: invalid priority %qs", str);
       return false;
     }
 
@@ -257,11 +261,7 @@ riscv_target_attr_parser::update_settings (struct gcc_options *opts) const
     {
       std::string local_arch = m_subset_list->to_string (true);
       const char* local_arch_str = local_arch.c_str ();
-      struct cl_target_option *default_opts
-	= TREE_TARGET_OPTION (target_option_default_node);
-      if (opts->x_riscv_arch_string != default_opts->x_riscv_arch_string)
-	free (CONST_CAST (void *, (const void *) opts->x_riscv_arch_string));
-      opts->x_riscv_arch_string = xstrdup (local_arch_str);
+      opts->x_riscv_arch_string = ggc_strdup (local_arch_str);
 
       riscv_set_arch_by_subset_list (m_subset_list, opts);
     }
@@ -286,7 +286,7 @@ riscv_target_attr_parser::update_settings (struct gcc_options *opts) const
 
 static bool
 riscv_process_one_target_attr (char *arg_str,
-			       location_t loc,
+			       location_t *loc,
 			       riscv_target_attr_parser &attr_parser,
 			       const struct riscv_attribute_info *attrs)
 {
@@ -294,7 +294,8 @@ riscv_process_one_target_attr (char *arg_str,
 
   if (len == 0)
     {
-      error_at (loc, "malformed %<target()%> attribute");
+      if (loc)
+	error_at (*loc, "malformed %<target()%> attribute");
       return false;
     }
 
@@ -306,10 +307,9 @@ riscv_process_one_target_attr (char *arg_str,
 
   if (!arg)
     {
-      error_at (
-	loc,
-	"attribute %<target(\"%s\")%> does not accept an argument",
-	str_to_check);
+      if (loc)
+	error_at (*loc, "attribute %<target(\"%s\")%> does not "
+		  "accept an argument", str_to_check);
       return false;
     }
 
@@ -328,7 +328,8 @@ riscv_process_one_target_attr (char *arg_str,
       return (&attr_parser->*attr->handler) (arg);
     }
 
-  error_at (loc, "Got unknown attribute %<target(\"%s\")%>", str_to_check);
+  if (loc)
+    error_at (*loc, "Got unknown attribute %<target(\"%s\")%>", str_to_check);
   return false;
 }
 
@@ -351,11 +352,12 @@ num_occurrences_in_str (char c, char *str)
 }
 
 /* Parse the string in ARGS that contains the target attribute information
-   and update the global target options space.  */
+   and update the global target options space.  If LOC is nonnull, report
+   diagnostics against location *LOC, otherwise remain silent.  */
 
 bool
 riscv_process_target_attr (const char *args,
-			   location_t loc,
+			   location_t *loc,
 			   const struct riscv_attribute_info *attrs)
 {
   size_t len = strlen (args);
@@ -391,8 +393,8 @@ riscv_process_target_attr (const char *args,
 
   if (num_attrs != num_semicolons + 1)
     {
-      error_at (loc, "malformed %<target(\"%s\")%> attribute",
-		args);
+      if (loc)
+	error_at (*loc, "malformed %<target(\"%s\")%> attribute", args);
       return false;
     }
 
@@ -403,11 +405,12 @@ riscv_process_target_attr (const char *args,
 }
 
 /* Parse the tree in ARGS that contains the target attribute information
-   and update the global target options space.  */
+   and update the global target options space.  If LOC is nonnull, report
+   diagnostics against *LOC, otherwise remain silent.  */
 
 static bool
 riscv_process_target_attr (tree args,
-			   location_t loc,
+			   location_t *loc,
 			   const struct riscv_attribute_info *attrs)
 {
   if (TREE_CODE (args) == TREE_LIST)
@@ -428,7 +431,8 @@ riscv_process_target_attr (tree args,
 
   if (TREE_CODE (args) != STRING_CST)
     {
-      error_at (loc, "attribute %<target%> argument not a string");
+      if (loc)
+	error_at (*loc, "attribute %<target%> argument not a string");
       return false;
     }
 
@@ -470,7 +474,7 @@ riscv_option_valid_attribute_p (tree fndecl, tree, tree args, int)
 			      TREE_TARGET_OPTION (target_option_default_node));
 
   /* Now we can parse the attributes and set &global_options accordingly.  */
-  ret = riscv_process_target_attr (args, loc, riscv_target_attrs);
+  ret = riscv_process_target_attr (args, &loc, riscv_target_attrs);
   if (ret)
     {
       riscv_override_options_internal (&global_options);
@@ -485,16 +489,19 @@ riscv_option_valid_attribute_p (tree fndecl, tree, tree args, int)
 }
 
 /* Parse the tree in ARGS that contains the target_version attribute
-   information and update the global target options space.  */
+   information and update the global target options space.  If LOC is nonnull,
+   report diagnostics against *LOC, otherwise remain silent.  */
 
 bool
-riscv_process_target_version_attr (tree args, location_t loc)
+riscv_process_target_version_attr (tree args, location_t *loc)
 {
   if (TREE_CODE (args) == TREE_LIST)
     {
       if (TREE_CHAIN (args))
 	{
-	  error ("attribute %<target_version%> has multiple values");
+	  if (loc)
+	    error_at (*loc, "attribute %<target_version%> "
+		      "has multiple values");
 	  return false;
 	}
       args = TREE_VALUE (args);
@@ -502,7 +509,8 @@ riscv_process_target_version_attr (tree args, location_t loc)
 
   if (!args || TREE_CODE (args) != STRING_CST)
     {
-      error ("attribute %<target_version%> argument not a string");
+      if (loc)
+	error_at (*loc, "attribute %<target_version%> argument not a string");
       return false;
     }
 
@@ -545,7 +553,7 @@ riscv_option_valid_version_attribute_p (tree fndecl, tree, tree args, int)
     cl_target_option_restore (&global_options, &global_options_set,
 			      TREE_TARGET_OPTION (target_option_current_node));
 
-  ret = riscv_process_target_version_attr (args, loc);
+  ret = riscv_process_target_version_attr (args, &loc);
 
   /* Set up any additional state.  */
   if (ret)
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index aae2d27..edfb4ff 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -351,9 +351,12 @@ public:
       add_rounding_mode_operand (FRM_RNE);
     else if (m_insn_flags & VXRM_RNU_P)
       add_rounding_mode_operand (VXRM_RNU);
+    else if (m_insn_flags & VXRM_RNE_P)
+      add_rounding_mode_operand (VXRM_RNE);
     else if (m_insn_flags & VXRM_RDN_P)
       add_rounding_mode_operand (VXRM_RDN);
-
+    else if (m_insn_flags & VXRM_ROD_P)
+      add_rounding_mode_operand (VXRM_ROD);
 
     if (insn_data[(int) icode].n_operands != m_opno)
       internal_error ("invalid number of operands for insn %s, "
@@ -408,7 +411,7 @@ emit_vlmax_insn_lra (unsigned icode, unsigned insn_flags, rtx *ops, rtx vl)
   gcc_assert (!can_create_pseudo_p ());
   machine_mode mode = GET_MODE (ops[0]);
 
-  if (imm_avl_p (mode))
+  if (imm_avl_p (mode) && !TARGET_XTHEADVECTOR)
     {
       /* Even though VL is a real hardreg already allocated since
 	 it is post-RA now, we still gain benefits that we emit
@@ -437,6 +440,26 @@ emit_nonvlmax_insn (unsigned icode, unsigned insn_flags, rtx *ops, rtx vl)
   e.emit_insn ((enum insn_code) icode, ops);
 }
 
+/* Emit either a VLMAX insn or a non-VLMAX insn depending on TYPE.  For a
+   non-VLMAX insn, the length must be specified in VL.  */
+
+void
+emit_avltype_insn (unsigned icode, unsigned insn_flags, rtx *ops,
+		   avl_type type, rtx vl)
+{
+  if (type != avl_type::VLMAX && vl != NULL_RTX)
+    {
+      insn_expander<RVV_INSN_OPERANDS_MAX> e (insn_flags, false);
+      e.set_vl (vl);
+      e.emit_insn ((enum insn_code) icode, ops);
+    }
+  else
+    {
+      insn_expander<RVV_INSN_OPERANDS_MAX> e (insn_flags, true);
+      e.emit_insn ((enum insn_code) icode, ops);
+    }
+}
+
 /* Return true if the vector duplicated by a super element which is the fusion
    of consecutive elements.
 
@@ -931,6 +954,26 @@ emit_vlmax_masked_gather_mu_insn (rtx target, rtx op, rtx sel, rtx mask)
   emit_vlmax_insn (icode, BINARY_OP_TAMU, ops);
 }
 
+/* Function to emit a vslide1up instruction of mode MODE with destination
+   DEST and slideup element ELT.  */
+
+rtx
+expand_slide1up (machine_mode mode, rtx dest, rtx elt)
+{
+  unsigned int unspec
+    = FLOAT_MODE_P (mode) ? UNSPEC_VFSLIDE1UP : UNSPEC_VSLIDE1UP;
+  insn_code icode = code_for_pred_slide (unspec, mode);
+  /* RVV Spec 16.3.1
+     The destination vector register group for vslideup cannot overlap the
+     source vector register group, otherwise the instruction encoding
+     is reserved.  Thus, we need a new register.  */
+  rtx tmp = gen_reg_rtx (mode);
+  rtx ops[] = {tmp, dest, elt};
+  emit_vlmax_insn (icode, BINARY_OP, ops);
+  return tmp;
+}
+
+
 /* According to RVV ISA spec (16.5.1. Synthesizing vdecompress):
    https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc
 
@@ -1152,16 +1195,7 @@ expand_vector_init_trailing_same_elem (rtx target,
     {
       rtx dup = expand_vector_broadcast (mode, builder.elt (nelts_reqd - 1));
       for (int i = nelts_reqd - trailing_ndups - 1; i >= 0; i--)
-	{
-	  unsigned int unspec
-	    = FLOAT_MODE_P (mode) ? UNSPEC_VFSLIDE1UP : UNSPEC_VSLIDE1UP;
-	  insn_code icode = code_for_pred_slide (unspec, mode);
-	  rtx tmp = gen_reg_rtx (mode);
-	  rtx ops[] = {tmp, dup, builder.elt (i)};
-	  emit_vlmax_insn (icode, BINARY_OP, ops);
-	  /* slide1up need source and dest to be different REG.  */
-	  dup = tmp;
-	}
+	dup = expand_slide1up (mode, dup, builder.elt (i));
 
       emit_move_insn (target, dup);
       return true;
@@ -1170,462 +1204,636 @@ expand_vector_init_trailing_same_elem (rtx target,
   return false;
 }
 
+/* Helper function to emit a vmv.vx/vi and float variants.
+   If VL is not given a VLMAX insn will be emitted, otherwise
+   a non-VLMAX insn with length VL.
+   If the value to be broadcast is not suitable for vmv.vx
+   fall back to a vlse with zero stride.  This itself has a
+   fallback if the uarch prefers not to use a strided load
+   for broadcast.  */
+
+void
+expand_broadcast (machine_mode mode, rtx *ops, rtx vl)
+{
+  rtx elt = ops[1];
+  avl_type type = vl ? NONVLMAX : VLMAX;
+  if (can_be_broadcast_p (elt))
+    emit_avltype_insn (code_for_pred_broadcast (mode), UNARY_OP, ops,
+		       type, vl);
+  else
+    emit_avltype_insn (code_for_pred_strided_broadcast (mode),
+		       UNARY_OP, ops, type, vl);
+}
+
+/* Similar to expand_broadcast but emits a vmv.s.x/vfmv.s.f instead.  */
+
+void
+expand_set_first (machine_mode mode, rtx *ops, rtx vl)
+{
+  rtx elt = ops[1];
+  avl_type type = vl ? NONVLMAX : VLMAX;
+  if (can_be_broadcast_p (elt))
+    emit_avltype_insn (code_for_pred_broadcast (mode),
+			SCALAR_MOVE_OP, ops, type, vl);
+  else
+    emit_avltype_insn (code_for_pred_strided_broadcast (mode),
+			SCALAR_MOVE_OP, ops, type, vl);
+}
+
+/* Similar to expand_set_first but keeping the tail elements
+   unchanged (TU) */
+
+void
+expand_set_first_tu (machine_mode mode, rtx *ops, rtx vl)
+{
+  rtx elt = ops[2];
+  if (!vl)
+    vl = const1_rtx;
+  if (can_be_broadcast_p (elt))
+    emit_nonvlmax_insn (code_for_pred_broadcast (mode),
+			SCALAR_MOVE_MERGED_OP_TU, ops, vl);
+  else
+    emit_nonvlmax_insn (code_for_pred_strided_broadcast (mode),
+			SCALAR_MOVE_MERGED_OP_TU, ops, vl);
+}
+
 static void
-expand_const_vector (rtx target, rtx src)
+expand_const_vec_duplicate (rtx target, rtx src, rtx elt)
 {
   machine_mode mode = GET_MODE (target);
   rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode);
-  rtx elt;
-  if (const_vec_duplicate_p (src, &elt))
+
+  if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
+    {
+      gcc_assert (rtx_equal_p (elt, const0_rtx)
+		  || rtx_equal_p (elt, const1_rtx));
+
+      rtx ops[] = {result, src};
+      emit_vlmax_insn (code_for_pred_mov (mode), UNARY_MASK_OP, ops);
+    }
+  else if (valid_vec_immediate_p (src))
     {
-      if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
-	{
-	  gcc_assert (rtx_equal_p (elt, const0_rtx)
-		      || rtx_equal_p (elt, const1_rtx));
-	  rtx ops[] = {result, src};
-	  emit_vlmax_insn (code_for_pred_mov (mode), UNARY_MASK_OP, ops);
-	}
       /* Element in range -16 ~ 15 integer or 0.0 floating-point,
 	 we use vmv.v.i instruction.  */
-      else if (valid_vec_immediate_p (src))
+      rtx ops[] = {result, src};
+      emit_vlmax_insn (code_for_pred_mov (mode), UNARY_OP, ops);
+    }
+  else
+    {
+      /* Emit vec_duplicate<mode> split pattern before RA so that
+	 we could have a better optimization opportunity in LICM
+	 which will hoist vmv.v.x outside the loop and in fwprop && combine
+	 which will transform 'vv' into 'vx' instruction.
+
+	 The reason we don't emit vec_duplicate<mode> split pattern during
+	 RA since the split stage after RA is a too late stage to generate
+	 RVV instruction which need an additional register (We can't
+	 allocate a new register after RA) for VL operand of vsetvl
+	 instruction (vsetvl a5, zero).  */
+      if (lra_in_progress)
 	{
-	  rtx ops[] = {result, src};
-	  emit_vlmax_insn (code_for_pred_mov (mode), UNARY_OP, ops);
+	  rtx ops[] = {result, elt};
+	  expand_broadcast (mode, ops);
 	}
       else
 	{
-	  /* Emit vec_duplicate<mode> split pattern before RA so that
-	     we could have a better optimization opportunity in LICM
-	     which will hoist vmv.v.x outside the loop and in fwprop && combine
-	     which will transform 'vv' into 'vx' instruction.
-
-	     The reason we don't emit vec_duplicate<mode> split pattern during
-	     RA since the split stage after RA is a too late stage to generate
-	     RVV instruction which need an additional register (We can't
-	     allocate a new register after RA) for VL operand of vsetvl
-	     instruction (vsetvl a5, zero).  */
-	  if (lra_in_progress)
-	    {
-	      rtx ops[] = {result, elt};
-	      emit_vlmax_insn (code_for_pred_broadcast (mode), UNARY_OP, ops);
-	    }
-	  else
-	    {
-	      struct expand_operand ops[2];
-	      enum insn_code icode = optab_handler (vec_duplicate_optab, mode);
-	      gcc_assert (icode != CODE_FOR_nothing);
-	      create_output_operand (&ops[0], result, mode);
-	      create_input_operand (&ops[1], elt, GET_MODE_INNER (mode));
-	      expand_insn (icode, 2, ops);
-	      result = ops[0].value;
-	    }
+	  struct expand_operand ops[2];
+	  enum insn_code icode = optab_handler (vec_duplicate_optab, mode);
+	  gcc_assert (icode != CODE_FOR_nothing);
+	  create_output_operand (&ops[0], result, mode);
+	  create_input_operand (&ops[1], elt, GET_MODE_INNER (mode));
+	  expand_insn (icode, 2, ops);
+	  result = ops[0].value;
 	}
-
-      if (result != target)
-	emit_move_insn (target, result);
-      return;
     }
 
-  /* Support scalable const series vector.  */
-  rtx base, step;
-  if (const_vec_series_p (src, &base, &step))
-    {
-      expand_vec_series (result, base, step);
+  if (result != target)
+    emit_move_insn (target, result);
+}
 
-      if (result != target)
-	emit_move_insn (target, result);
-      return;
+static void
+expand_const_vec_series (rtx target, rtx base, rtx step)
+{
+  machine_mode mode = GET_MODE (target);
+  rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode);
+
+  expand_vec_series (result, base, step);
+
+  if (result != target)
+    emit_move_insn (target, result);
+}
+
+
+/* We handle the case that we can find a vector container to hold
+   element bitsize = NPATTERNS * ele_bitsize.
+
+     NPATTERNS = 8, element width = 8
+       v = { 0, 1, 2, 3, 4, 5, 6, 7, ... }
+     In this case, we can combine NPATTERNS element into a larger
+     element.  Use element width = 64 and broadcast a vector with
+     all element equal to 0x0706050403020100.  */
+
+static void
+expand_const_vector_duplicate_repeating (rtx target, rvv_builder *builder)
+{
+  machine_mode mode = GET_MODE (target);
+  rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode);
+  rtx ele = builder->get_merged_repeating_sequence ();
+  rtx dup;
+
+  if (lra_in_progress)
+    {
+      dup = gen_reg_rtx (builder->new_mode ());
+      rtx ops[] = {dup, ele};
+      expand_broadcast (builder->new_mode (), ops);
     }
+  else
+    dup = expand_vector_broadcast (builder->new_mode (), ele);
 
-  /* Handle variable-length vector.  */
-  unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (src);
-  unsigned int npatterns = CONST_VECTOR_NPATTERNS (src);
-  rvv_builder builder (mode, npatterns, nelts_per_pattern);
-  for (unsigned int i = 0; i < nelts_per_pattern; i++)
+  emit_move_insn (result, gen_lowpart (mode, dup));
+
+  if (result != target)
+    emit_move_insn (target, result);
+}
+
+/* We handle the case that we can't find a vector container to hold
+   element bitsize = NPATTERNS * ele_bitsize.
+
+     NPATTERNS = 8, element width = 16
+       v = { 0, 1, 2, 3, 4, 5, 6, 7, ... }
+     Since NPATTERNS * element width = 128, we can't find a container
+     to hold it.
+
+     In this case, we use NPATTERNS merge operations to generate such
+     vector.  */
+
+static void
+expand_const_vector_duplicate_default (rtx target, rvv_builder *builder)
+{
+  machine_mode mode = GET_MODE (target);
+  rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode);
+  unsigned int nbits = builder->npatterns () - 1;
+
+  /* Generate vid = { 0, 1, 2, 3, 4, 5, 6, 7, ... }.  */
+  rtx vid = gen_reg_rtx (builder->int_mode ());
+  rtx op[] = {vid};
+  emit_vlmax_insn (code_for_pred_series (builder->int_mode ()), NULLARY_OP, op);
+
+  /* Generate vid_repeat = { 0, 1, ... nbits, ... }  */
+  rtx vid_repeat = gen_reg_rtx (builder->int_mode ());
+  rtx and_ops[] = {vid_repeat, vid,
+		   gen_int_mode (nbits, builder->inner_int_mode ())};
+  emit_vlmax_insn (code_for_pred_scalar (AND, builder->int_mode ()), BINARY_OP,
+		   and_ops);
+
+  rtx tmp1 = gen_reg_rtx (builder->mode ());
+  rtx dup_ops[] = {tmp1, builder->elt (0)};
+  expand_broadcast (builder->mode (), dup_ops);
+
+  for (unsigned int i = 1; i < builder->npatterns (); i++)
     {
-      for (unsigned int j = 0; j < npatterns; j++)
-	builder.quick_push (CONST_VECTOR_ELT (src, i * npatterns + j));
+      /* Generate mask according to i.  */
+      rtx mask = gen_reg_rtx (builder->mask_mode ());
+      rtx const_vec = gen_const_vector_dup (builder->int_mode (), i);
+      expand_vec_cmp (mask, EQ, vid_repeat, const_vec);
+
+      /* Merge scalar to each i.  */
+      rtx tmp2 = gen_reg_rtx (builder->mode ());
+      rtx merge_ops[] = {tmp2, tmp1, builder->elt (i), mask};
+      insn_code icode = code_for_pred_merge_scalar (builder->mode ());
+      emit_vlmax_insn (icode, MERGE_OP, merge_ops);
+      tmp1 = tmp2;
     }
-  builder.finalize ();
 
-  if (CONST_VECTOR_DUPLICATE_P (src))
+  emit_move_insn (result, tmp1);
+
+  if (result != target)
+    emit_move_insn (target, result);
+}
+
+/* Handle the case with repeating sequence that NELTS_PER_PATTERN = 1
+   E.g.  NPATTERNS = 4, v = { 0, 2, 6, 7, ... }
+	 NPATTERNS = 8, v = { 0, 2, 6, 7, 19, 20, 8, 7 ... }
+	 The elements within NPATTERNS are not necessary regular.  */
+static void
+expand_const_vector_duplicate (rtx target, rvv_builder *builder)
+{
+  if (builder->can_duplicate_repeating_sequence_p ())
+    return expand_const_vector_duplicate_repeating (target, builder);
+  else
+    return expand_const_vector_duplicate_default (target, builder);
+}
+
+static void
+expand_const_vector_single_step_npatterns (rtx target, rvv_builder *builder)
+{
+  machine_mode mode = GET_MODE (target);
+  rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode);
+
+  /* Describe the case by choosing NPATTERNS = 4 as an example.  */
+  insn_code icode;
+
+  /* Step 1: Generate vid = { 0, 1, 2, 3, 4, 5, 6, 7, ... }.  */
+  rtx vid = gen_reg_rtx (builder->mode ());
+  rtx vid_ops[] = {vid};
+  icode = code_for_pred_series (builder->mode ());
+  emit_vlmax_insn (icode, NULLARY_OP, vid_ops);
+
+  if (builder->npatterns_all_equal_p ())
     {
-      /* Handle the case with repeating sequence that NELTS_PER_PATTERN = 1
-	 E.g. NPATTERNS = 4, v = { 0, 2, 6, 7, ... }
-	      NPATTERNS = 8, v = { 0, 2, 6, 7, 19, 20, 8, 7 ... }
-	The elements within NPATTERNS are not necessary regular.  */
-      if (builder.can_duplicate_repeating_sequence_p ())
+      /* Generate the variable-length vector following this rule:
+	 { a, a, a + step, a + step, a + step * 2, a + step * 2, ...}
+	   E.g. { 0, 0, 8, 8, 16, 16, ... } */
+
+      /* We want to create a pattern where value[idx] = floor (idx /
+	 NPATTERNS). As NPATTERNS is always a power of two we can
+	 rewrite this as = idx & -NPATTERNS.  */
+      /* Step 2: VID AND -NPATTERNS:
+	 { 0&-4, 1&-4, 2&-4, 3 &-4, 4 &-4, 5 &-4, 6 &-4, 7 &-4, ... } */
+      rtx imm = gen_int_mode (-builder->npatterns (), builder->inner_mode ());
+      rtx tmp1 = gen_reg_rtx (builder->mode ());
+      rtx and_ops[] = {tmp1, vid, imm};
+      icode = code_for_pred_scalar (AND, builder->mode ());
+      emit_vlmax_insn (icode, BINARY_OP, and_ops);
+
+      /* Step 3: Convert to step size 1.  */
+      rtx tmp2 = gen_reg_rtx (builder->mode ());
+      /* log2 (npatterns) to get the shift amount to convert
+	 Eg.  { 0, 0, 0, 0, 4, 4, ... }
+	 into { 0, 0, 0, 0, 1, 1, ... }.  */
+      HOST_WIDE_INT shift_amt = exact_log2 (builder->npatterns ());
+      rtx shift = gen_int_mode (shift_amt, builder->inner_mode ());
+      rtx shift_ops[] = {tmp2, tmp1, shift};
+      icode = code_for_pred_scalar (ASHIFTRT, builder->mode ());
+      emit_vlmax_insn (icode, BINARY_OP, shift_ops);
+
+      /* Step 4: Multiply to step size n.  */
+      HOST_WIDE_INT step_size =
+	INTVAL (builder->elt (builder->npatterns ()))
+	  - INTVAL (builder->elt (0));
+      rtx tmp3 = gen_reg_rtx (builder->mode ());
+      if (pow2p_hwi (step_size))
 	{
-	  /* We handle the case that we can find a vector container to hold
-	     element bitsize = NPATTERNS * ele_bitsize.
-
-	       NPATTERNS = 8, element width = 8
-		 v = { 0, 1, 2, 3, 4, 5, 6, 7, ... }
-	       In this case, we can combine NPATTERNS element into a larger
-	       element. Use element width = 64 and broadcast a vector with
-	       all element equal to 0x0706050403020100.  */
-	  rtx ele = builder.get_merged_repeating_sequence ();
-	  rtx dup;
-	  if (lra_in_progress)
-	    {
-	      dup = gen_reg_rtx (builder.new_mode ());
-	      rtx ops[] = {dup, ele};
-	      emit_vlmax_insn (code_for_pred_broadcast
-			       (builder.new_mode ()), UNARY_OP, ops);
-	    }
-	  else
-	    dup = expand_vector_broadcast (builder.new_mode (), ele);
-	  emit_move_insn (result, gen_lowpart (mode, dup));
+	  /* Power of 2 can be handled with a left shift.  */
+	  HOST_WIDE_INT shift = exact_log2 (step_size);
+	  rtx shift_amount = gen_int_mode (shift, Pmode);
+	  insn_code icode = code_for_pred_scalar (ASHIFT, mode);
+	  rtx ops[] = {tmp3, tmp2, shift_amount};
+	  emit_vlmax_insn (icode, BINARY_OP, ops);
 	}
       else
 	{
-	  /* We handle the case that we can't find a vector container to hold
-	     element bitsize = NPATTERNS * ele_bitsize.
-
-	       NPATTERNS = 8, element width = 16
-		 v = { 0, 1, 2, 3, 4, 5, 6, 7, ... }
-	       Since NPATTERNS * element width = 128, we can't find a container
-	       to hold it.
-
-	       In this case, we use NPATTERNS merge operations to generate such
-	       vector.  */
-	  unsigned int nbits = npatterns - 1;
-
-	  /* Generate vid = { 0, 1, 2, 3, 4, 5, 6, 7, ... }.  */
-	  rtx vid = gen_reg_rtx (builder.int_mode ());
-	  rtx op[] = {vid};
-	  emit_vlmax_insn (code_for_pred_series (builder.int_mode ()),
-			    NULLARY_OP, op);
-
-	  /* Generate vid_repeat = { 0, 1, ... nbits, ... }  */
-	  rtx vid_repeat = gen_reg_rtx (builder.int_mode ());
-	  rtx and_ops[] = {vid_repeat, vid,
-			   gen_int_mode (nbits, builder.inner_int_mode ())};
-	  emit_vlmax_insn (code_for_pred_scalar (AND, builder.int_mode ()),
-			    BINARY_OP, and_ops);
-
-	  rtx tmp1 = gen_reg_rtx (builder.mode ());
-	  rtx dup_ops[] = {tmp1, builder.elt (0)};
-	  emit_vlmax_insn (code_for_pred_broadcast (builder.mode ()), UNARY_OP,
-			    dup_ops);
-	  for (unsigned int i = 1; i < builder.npatterns (); i++)
-	    {
-	      /* Generate mask according to i.  */
-	      rtx mask = gen_reg_rtx (builder.mask_mode ());
-	      rtx const_vec = gen_const_vector_dup (builder.int_mode (), i);
-	      expand_vec_cmp (mask, EQ, vid_repeat, const_vec);
-
-	      /* Merge scalar to each i.  */
-	      rtx tmp2 = gen_reg_rtx (builder.mode ());
-	      rtx merge_ops[] = {tmp2, tmp1, builder.elt (i), mask};
-	      insn_code icode = code_for_pred_merge_scalar (builder.mode ());
-	      emit_vlmax_insn (icode, MERGE_OP, merge_ops);
-	      tmp1 = tmp2;
-	    }
-	  emit_move_insn (result, tmp1);
+	  rtx mult_amt = gen_int_mode (step_size, builder->inner_mode ());
+	  insn_code icode = code_for_pred_scalar (MULT, builder->mode ());
+	  rtx ops[] = {tmp3, tmp2, mult_amt};
+	  emit_vlmax_insn (icode, BINARY_OP, ops);
+	}
+
+      /* Step 5: Add starting value to all elements.  */
+      HOST_WIDE_INT init_val = INTVAL (builder->elt (0));
+      if (init_val == 0)
+	emit_move_insn (result, tmp3);
+      else
+	{
+	  rtx dup = gen_const_vector_dup (builder->mode (), init_val);
+	  rtx add_ops[] = {result, tmp3, dup};
+	  icode = code_for_pred (PLUS, builder->mode ());
+	  emit_vlmax_insn (icode, BINARY_OP, add_ops);
 	}
     }
-  else if (CONST_VECTOR_STEPPED_P (src))
+  else
     {
-      gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
-      if (builder.single_step_npatterns_p ())
+      /* Generate the variable-length vector following this rule:
+	{ a, b, a + step, b + step, a + step*2, b + step*2, ... }  */
+      if (builder->npatterns_vid_diff_repeated_p ())
 	{
-	  /* Describe the case by choosing NPATTERNS = 4 as an example.  */
-	  insn_code icode;
-
-	  /* Step 1: Generate vid = { 0, 1, 2, 3, 4, 5, 6, 7, ... }.  */
-	  rtx vid = gen_reg_rtx (builder.mode ());
-	  rtx vid_ops[] = {vid};
-	  icode = code_for_pred_series (builder.mode ());
-	  emit_vlmax_insn (icode, NULLARY_OP, vid_ops);
-
-	  if (builder.npatterns_all_equal_p ())
+	  /* Case 1: For example as below:
+	     {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }
+	     We have 3 - 0 = 3 equals 7 - 4 = 3, the sequence is
+	     repeated as below after minus vid.
+	     {3, 1, -1, -3, 3, 1, -1, -3...}
+	     Then we can simplify the diff code gen to at most
+	     npatterns().  */
+	  rvv_builder v (builder->mode (), builder->npatterns (), 1);
+
+	  /* Step 1: Generate diff = TARGET - VID.  */
+	  for (unsigned int i = 0; i < v.npatterns (); ++i)
 	    {
-	      /* Generate the variable-length vector following this rule:
-		 { a, a, a + step, a + step, a + step * 2, a + step * 2, ...}
-		   E.g. { 0, 0, 8, 8, 16, 16, ... } */
-
-	      /* We want to create a pattern where value[idx] = floor (idx /
-		 NPATTERNS). As NPATTERNS is always a power of two we can
-		 rewrite this as = idx & -NPATTERNS.  */
-	      /* Step 2: VID AND -NPATTERNS:
-		 { 0&-4, 1&-4, 2&-4, 3 &-4, 4 &-4, 5 &-4, 6 &-4, 7 &-4, ... }
-	      */
-	      rtx imm
-		= gen_int_mode (-builder.npatterns (), builder.inner_mode ());
-	      rtx tmp1 = gen_reg_rtx (builder.mode ());
-	      rtx and_ops[] = {tmp1, vid, imm};
-	      icode = code_for_pred_scalar (AND, builder.mode ());
-	      emit_vlmax_insn (icode, BINARY_OP, and_ops);
-
-	      /* Step 3: Convert to step size 1.  */
-	      rtx tmp2 = gen_reg_rtx (builder.mode ());
-	      /* log2 (npatterns) to get the shift amount to convert
-		 Eg.  { 0, 0, 0, 0, 4, 4, ... }
-		 into { 0, 0, 0, 0, 1, 1, ... }.  */
-	      HOST_WIDE_INT shift_amt = exact_log2 (builder.npatterns ()) ;
-	      rtx shift = gen_int_mode (shift_amt, builder.inner_mode ());
-	      rtx shift_ops[] = {tmp2, tmp1, shift};
-	      icode = code_for_pred_scalar (ASHIFTRT, builder.mode ());
-	      emit_vlmax_insn (icode, BINARY_OP, shift_ops);
-
-	      /* Step 4: Multiply to step size n.  */
-	      HOST_WIDE_INT step_size =
-		INTVAL (builder.elt (builder.npatterns ()))
-		- INTVAL (builder.elt (0));
-	      rtx tmp3 = gen_reg_rtx (builder.mode ());
-	      if (pow2p_hwi (step_size))
-		{
-		  /* Power of 2 can be handled with a left shift.  */
-		  HOST_WIDE_INT shift = exact_log2 (step_size);
-		  rtx shift_amount = gen_int_mode (shift, Pmode);
-		  insn_code icode = code_for_pred_scalar (ASHIFT, mode);
-		  rtx ops[] = {tmp3, tmp2, shift_amount};
-		  emit_vlmax_insn (icode, BINARY_OP, ops);
-		}
-	      else
-		{
-		  rtx mult_amt = gen_int_mode (step_size, builder.inner_mode ());
-		  insn_code icode = code_for_pred_scalar (MULT, builder.mode ());
-		  rtx ops[] = {tmp3, tmp2, mult_amt};
-		  emit_vlmax_insn (icode, BINARY_OP, ops);
-		}
-
-	      /* Step 5: Add starting value to all elements.  */
-	      HOST_WIDE_INT init_val = INTVAL (builder.elt (0));
-	      if (init_val == 0)
-		emit_move_insn (result, tmp3);
-	      else
-		{
-		  rtx dup = gen_const_vector_dup (builder.mode (), init_val);
-		  rtx add_ops[] = {result, tmp3, dup};
-		  icode = code_for_pred (PLUS, builder.mode ());
-		  emit_vlmax_insn (icode, BINARY_OP, add_ops);
-		}
+	      poly_int64 diff = rtx_to_poly_int64 (builder->elt (i)) - i;
+	      v.quick_push (gen_int_mode (diff, v.inner_mode ()));
 	    }
-	  else
-	    {
-	      /* Generate the variable-length vector following this rule:
-		{ a, b, a + step, b + step, a + step*2, b + step*2, ... }  */
 
-	      if (builder.npatterns_vid_diff_repeated_p ())
-		{
-		  /* Case 1: For example as below:
-		     {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }
-		     We have 3 - 0 = 3 equals 7 - 4 = 3, the sequence is
-		     repeated as below after minus vid.
-		     {3, 1, -1, -3, 3, 1, -1, -3...}
-		     Then we can simplify the diff code gen to at most
-		     npatterns().  */
-		  rvv_builder v (builder.mode (), builder.npatterns (), 1);
-
-		  /* Step 1: Generate diff = TARGET - VID.  */
-		  for (unsigned int i = 0; i < v.npatterns (); ++i)
-		    {
-		     poly_int64 diff = rtx_to_poly_int64 (builder.elt (i)) - i;
-		     v.quick_push (gen_int_mode (diff, v.inner_mode ()));
-		    }
-
-		  /* Step 2: Generate result = VID + diff.  */
-		  rtx vec = v.build ();
-		  rtx add_ops[] = {result, vid, vec};
-		  emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
-				   BINARY_OP, add_ops);
-		}
-	      else
-		{
-		  /* Case 2: For example as below:
-		     { -4, 4, -4 + 1, 4 + 1, -4 + 2, 4 + 2, -4 + 3, 4 + 3, ... }
-		   */
-		  rvv_builder v (builder.mode (), builder.npatterns (), 1);
-
-		  /* Step 1: Generate { a, b, a, b, ... }  */
-		  for (unsigned int i = 0; i < v.npatterns (); ++i)
-		    v.quick_push (builder.elt (i));
-		  rtx new_base = v.build ();
-
-		  /* Step 2: Generate tmp1 = VID >> LOG2 (NPATTERNS).  */
-		  rtx shift_count
-		    = gen_int_mode (exact_log2 (builder.npatterns ()),
-				    builder.inner_mode ());
-		  rtx tmp1 = gen_reg_rtx (builder.mode ());
-		  rtx shift_ops[] = {tmp1, vid, shift_count};
-		  emit_vlmax_insn (code_for_pred_scalar
-				   (LSHIFTRT, builder.mode ()), BINARY_OP,
-				   shift_ops);
-
-		  /* Step 3: Generate tmp2 = tmp1 * step.  */
-		  rtx tmp2 = gen_reg_rtx (builder.mode ());
-		  rtx step
-		    = simplify_binary_operation (MINUS, builder.inner_mode (),
-						 builder.elt (v.npatterns()),
-						 builder.elt (0));
-		  expand_vec_series (tmp2, const0_rtx, step, tmp1);
-
-		  /* Step 4: Generate result = tmp2 + new_base.  */
-		  rtx add_ops[] = {result, tmp2, new_base};
-		  emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
-				   BINARY_OP, add_ops);
-		}
-	    }
+	  /* Step 2: Generate result = VID + diff.  */
+	  rtx vec = v.build ();
+	  rtx add_ops[] = {result, vid, vec};
+	  emit_vlmax_insn (code_for_pred (PLUS, builder->mode ()), BINARY_OP,
+			   add_ops);
 	}
-      else if (builder.interleaved_stepped_npatterns_p ())
+      else
 	{
-	  rtx base1 = builder.elt (0);
-	  rtx base2 = builder.elt (1);
-	  poly_int64 step1
-	    = rtx_to_poly_int64 (builder.elt (builder.npatterns ()))
-	      - rtx_to_poly_int64 (base1);
-	  poly_int64 step2
-	    = rtx_to_poly_int64 (builder.elt (builder.npatterns () + 1))
-	      - rtx_to_poly_int64 (base2);
+	  /* Case 2: For example as below:
+	     { -4, 4, -4 + 1, 4 + 1, -4 + 2, 4 + 2, -4 + 3, 4 + 3, ... }
+	   */
+	  rvv_builder v (builder->mode (), builder->npatterns (), 1);
+
+	  /* Step 1: Generate { a, b, a, b, ... }  */
+	  for (unsigned int i = 0; i < v.npatterns (); ++i)
+	    v.quick_push (builder->elt (i));
+	  rtx new_base = v.build ();
+
+	  /* Step 2: Generate tmp1 = VID >> LOG2 (NPATTERNS).  */
+	  rtx shift_count = gen_int_mode (exact_log2 (builder->npatterns ()),
+					  builder->inner_mode ());
+	  rtx tmp1 = gen_reg_rtx (builder->mode ());
+	  rtx shift_ops[] = {tmp1, vid, shift_count};
+	  emit_vlmax_insn (code_for_pred_scalar (LSHIFTRT, builder->mode ()),
+			   BINARY_OP, shift_ops);
+
+	  /* Step 3: Generate tmp2 = tmp1 * step.  */
+	  rtx tmp2 = gen_reg_rtx (builder->mode ());
+	  rtx step
+	    = simplify_binary_operation (MINUS, builder->inner_mode (),
+					 builder->elt (v.npatterns()),
+					 builder->elt (0));
+	  expand_vec_series (tmp2, const0_rtx, step, tmp1);
+
+	  /* Step 4: Generate result = tmp2 + new_base.  */
+	  rtx add_ops[] = {result, tmp2, new_base};
+	  emit_vlmax_insn (code_for_pred (PLUS, builder->mode ()), BINARY_OP,
+			   add_ops);
+	}
+    }
+
+  if (result != target)
+    emit_move_insn (target, result);
+}
 
-	  /* For { 1, 0, 2, 0, ... , n - 1, 0 }, we can use larger EEW
-	     integer vector mode to generate such vector efficiently.
+static void
+expand_const_vector_interleaved_stepped_npatterns (rtx target, rtx src,
+						   rvv_builder *builder)
+{
+  machine_mode mode = GET_MODE (target);
+  rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode);
+  rtx base1 = builder->elt (0);
+  rtx base2 = builder->elt (1);
 
-	     E.g. EEW = 16, { 2, 0, 4, 0, ... }
+  poly_int64 step1 = rtx_to_poly_int64 (builder->elt (builder->npatterns ()))
+    - rtx_to_poly_int64 (base1);
+  poly_int64 step2 =
+    rtx_to_poly_int64 (builder->elt (builder->npatterns () + 1))
+      - rtx_to_poly_int64 (base2);
 
-	     can be interpreted into:
+  /* For { 1, 0, 2, 0, ... , n - 1, 0 }, we can use larger EEW
+     integer vector mode to generate such vector efficiently.
 
-		  EEW = 32, { 2, 4, ... }.
+     E.g. EEW = 16, { 2, 0, 4, 0, ... }
 
-	     Both the series1 and series2 may overflow before taking the IOR
-	     to generate the final result.  However, only series1 matters
-	     because the series2 will shift before IOR, thus the overflow
-	     bits will never pollute the final result.
+     can be interpreted into:
 
-	     For now we forbid the negative steps and overflow, and they
-	     will fall back to the default merge way to generate the
-	     const_vector.  */
+     EEW = 32, { 2, 4, ... }.
 
-	  unsigned int new_smode_bitsize = builder.inner_bits_size () * 2;
-	  scalar_int_mode new_smode;
-	  machine_mode new_mode;
-	  poly_uint64 new_nunits
-	    = exact_div (GET_MODE_NUNITS (builder.mode ()), 2);
+     Both the series1 and series2 may overflow before taking the IOR
+     to generate the final result.  However, only series1 matters
+     because the series2 will shift before IOR, thus the overflow
+     bits will never pollute the final result.
 
-	  poly_int64 base1_poly = rtx_to_poly_int64 (base1);
-	  bool overflow_smode_p = false;
+     For now we forbid the negative steps and overflow, and they
+     will fall back to the default merge way to generate the
+     const_vector.  */
 
-	  if (!step1.is_constant ())
-	    overflow_smode_p = true;
-	  else
-	    {
-	      int elem_count = XVECLEN (src, 0);
-	      uint64_t step1_val = step1.to_constant ();
-	      uint64_t base1_val = base1_poly.to_constant ();
-	      uint64_t elem_val = base1_val + (elem_count - 1) * step1_val;
+  unsigned int new_smode_bitsize = builder->inner_bits_size () * 2;
+  scalar_int_mode new_smode;
+  machine_mode new_mode;
+  poly_uint64 new_nunits = exact_div (GET_MODE_NUNITS (builder->mode ()), 2);
 
-	      if ((elem_val >> builder.inner_bits_size ()) != 0)
-		overflow_smode_p = true;
-	    }
+  poly_int64 base1_poly = rtx_to_poly_int64 (base1);
+  bool overflow_smode_p = false;
 
-	  if (known_ge (step1, 0) && known_ge (step2, 0)
-	      && int_mode_for_size (new_smode_bitsize, 0).exists (&new_smode)
-	      && get_vector_mode (new_smode, new_nunits).exists (&new_mode)
-	      && !overflow_smode_p)
+  if (!step1.is_constant ())
+    overflow_smode_p = true;
+  else
+    {
+      int elem_count = XVECLEN (src, 0);
+      uint64_t step1_val = step1.to_constant ();
+      uint64_t base1_val = base1_poly.to_constant ();
+      uint64_t elem_val = base1_val + (elem_count - 1) * step1_val;
+
+      if ((elem_val >> builder->inner_bits_size ()) != 0)
+	overflow_smode_p = true;
+    }
+
+  if (known_ge (step1, 0) && known_ge (step2, 0)
+      && int_mode_for_size (new_smode_bitsize, 0).exists (&new_smode)
+      && get_vector_mode (new_smode, new_nunits).exists (&new_mode)
+      && !overflow_smode_p)
+    {
+      rtx tmp1 = gen_reg_rtx (new_mode);
+      base1 = gen_int_mode (base1_poly, new_smode);
+      expand_vec_series (tmp1, base1, gen_int_mode (step1, new_smode));
+
+      if (rtx_equal_p (base2, const0_rtx) && known_eq (step2, 0))
+	/* { 1, 0, 2, 0, ... }.  */
+	emit_move_insn (result, gen_lowpart (mode, tmp1));
+      else if (known_eq (step2, 0))
+	{
+	  /* { 1, 1, 2, 1, ... }.  */
+	  rtx scalar = expand_simple_binop (
+	    Xmode, ASHIFT, gen_int_mode (rtx_to_poly_int64 (base2), Xmode),
+	    gen_int_mode (builder->inner_bits_size (), Xmode), NULL_RTX, false,
+	    OPTAB_DIRECT);
+	  scalar = simplify_gen_subreg (new_smode, scalar, Xmode, 0);
+	  rtx tmp2 = gen_reg_rtx (new_mode);
+	  rtx ior_ops[] = {tmp2, tmp1, scalar};
+	  emit_vlmax_insn (code_for_pred_scalar (IOR, new_mode), BINARY_OP,
+			   ior_ops);
+	  emit_move_insn (result, gen_lowpart (mode, tmp2));
+	}
+      else
+	{
+	  /* { 1, 3, 2, 6, ... }.  */
+	  rtx tmp2 = gen_reg_rtx (new_mode);
+	  base2 = gen_int_mode (rtx_to_poly_int64 (base2), new_smode);
+	  expand_vec_series (tmp2, base2, gen_int_mode (step2, new_smode));
+	  rtx shifted_tmp2;
+	  rtx shift = gen_int_mode (builder->inner_bits_size (), Xmode);
+	  if (lra_in_progress)
 	    {
-	      rtx tmp1 = gen_reg_rtx (new_mode);
-	      base1 = gen_int_mode (base1_poly, new_smode);
-	      expand_vec_series (tmp1, base1, gen_int_mode (step1, new_smode));
-
-	      if (rtx_equal_p (base2, const0_rtx) && known_eq (step2, 0))
-		/* { 1, 0, 2, 0, ... }.  */
-		emit_move_insn (result, gen_lowpart (mode, tmp1));
-	      else if (known_eq (step2, 0))
-		{
-		  /* { 1, 1, 2, 1, ... }.  */
-		  rtx scalar = expand_simple_binop (
-		    Xmode, ASHIFT,
-		    gen_int_mode (rtx_to_poly_int64 (base2), Xmode),
-		    gen_int_mode (builder.inner_bits_size (), Xmode),
-		    NULL_RTX, false, OPTAB_DIRECT);
-		  scalar = simplify_gen_subreg (new_smode, scalar, Xmode, 0);
-		  rtx tmp2 = gen_reg_rtx (new_mode);
-		  rtx ior_ops[] = {tmp2, tmp1, scalar};
-		  emit_vlmax_insn (code_for_pred_scalar (IOR, new_mode),
-				   BINARY_OP, ior_ops);
-		  emit_move_insn (result, gen_lowpart (mode, tmp2));
-		}
-	      else
-		{
-		  /* { 1, 3, 2, 6, ... }.  */
-		  rtx tmp2 = gen_reg_rtx (new_mode);
-		  base2 = gen_int_mode (rtx_to_poly_int64 (base2), new_smode);
-		  expand_vec_series (tmp2, base2,
-				     gen_int_mode (step2, new_smode));
-		  rtx shifted_tmp2;
-		  rtx shift = gen_int_mode (builder.inner_bits_size (), Xmode);
-		  if (lra_in_progress)
-		    {
-		      shifted_tmp2 = gen_reg_rtx (new_mode);
-		      rtx shift_ops[] = {shifted_tmp2, tmp2, shift};
-		      emit_vlmax_insn (code_for_pred_scalar
-				       (ASHIFT, new_mode), BINARY_OP,
-				       shift_ops);
-		    }
-		  else
-		    shifted_tmp2 = expand_simple_binop (new_mode, ASHIFT, tmp2,
-							shift, NULL_RTX, false,
-							OPTAB_DIRECT);
-		  rtx tmp3 = gen_reg_rtx (new_mode);
-		  rtx ior_ops[] = {tmp3, tmp1, shifted_tmp2};
-		  emit_vlmax_insn (code_for_pred (IOR, new_mode), BINARY_OP,
-				   ior_ops);
-		  emit_move_insn (result, gen_lowpart (mode, tmp3));
-		}
+	      shifted_tmp2 = gen_reg_rtx (new_mode);
+	      rtx shift_ops[] = {shifted_tmp2, tmp2, shift};
+	      emit_vlmax_insn (code_for_pred_scalar (ASHIFT, new_mode),
+			       BINARY_OP, shift_ops);
 	    }
 	  else
-	    {
-	      rtx vid = gen_reg_rtx (mode);
-	      expand_vec_series (vid, const0_rtx, const1_rtx);
-	      /* Transform into { 0, 0, 1, 1, 2, 2, ... }.  */
-	      rtx shifted_vid;
-	      if (lra_in_progress)
-		{
-		  shifted_vid = gen_reg_rtx (mode);
-		  rtx shift = gen_int_mode (1, Xmode);
-		  rtx shift_ops[] = {shifted_vid, vid, shift};
-		  emit_vlmax_insn (code_for_pred_scalar
-				   (ASHIFT, mode), BINARY_OP,
-				   shift_ops);
-		}
-	      else
-		shifted_vid = expand_simple_binop (mode, LSHIFTRT, vid,
-						   const1_rtx, NULL_RTX,
-						   false, OPTAB_DIRECT);
-	      rtx tmp1 = gen_reg_rtx (mode);
-	      rtx tmp2 = gen_reg_rtx (mode);
-	      expand_vec_series (tmp1, base1,
-				 gen_int_mode (step1, builder.inner_mode ()),
-				 shifted_vid);
-	      expand_vec_series (tmp2, base2,
-				 gen_int_mode (step2, builder.inner_mode ()),
-				 shifted_vid);
-
-	      /* Transform into { 0, 1, 0, 1, 0, 1, ... }.  */
-	      rtx and_vid = gen_reg_rtx (mode);
-	      rtx and_ops[] = {and_vid, vid, const1_rtx};
-	      emit_vlmax_insn (code_for_pred_scalar (AND, mode), BINARY_OP,
-			       and_ops);
-	      rtx mask = gen_reg_rtx (builder.mask_mode ());
-	      expand_vec_cmp (mask, EQ, and_vid, CONST1_RTX (mode));
-
-	      rtx ops[] = {result, tmp1, tmp2, mask};
-	      emit_vlmax_insn (code_for_pred_merge (mode), MERGE_OP, ops);
-	    }
+	    shifted_tmp2 = expand_simple_binop (new_mode, ASHIFT, tmp2, shift,
+						NULL_RTX, false, OPTAB_DIRECT);
+	  rtx tmp3 = gen_reg_rtx (new_mode);
+	  rtx ior_ops[] = {tmp3, tmp1, shifted_tmp2};
+	  emit_vlmax_insn (code_for_pred (IOR, new_mode), BINARY_OP, ior_ops);
+	  emit_move_insn (result, gen_lowpart (mode, tmp3));
 	}
-      else
-	/* TODO: We will enable more variable-length vector in the future.  */
-	gcc_unreachable ();
     }
   else
-    gcc_unreachable ();
+    {
+      rtx vid = gen_reg_rtx (mode);
+      expand_vec_series (vid, const0_rtx, const1_rtx);
+      /* Transform into { 0, 0, 1, 1, 2, 2, ... }.  */
+      rtx shifted_vid;
+      if (lra_in_progress)
+	{
+	  shifted_vid = gen_reg_rtx (mode);
+	  rtx shift = gen_int_mode (1, Xmode);
+	  rtx shift_ops[] = {shifted_vid, vid, shift};
+	  emit_vlmax_insn (code_for_pred_scalar (LSHIFTRT, mode), BINARY_OP,
+			   shift_ops);
+	}
+      else
+	shifted_vid = expand_simple_binop (mode, LSHIFTRT, vid, const1_rtx,
+					   NULL_RTX, false, OPTAB_DIRECT);
+      rtx tmp1 = gen_reg_rtx (mode);
+      rtx tmp2 = gen_reg_rtx (mode);
+      expand_vec_series (tmp1, base1,
+			 gen_int_mode (step1, builder->inner_mode ()),
+			 shifted_vid);
+      expand_vec_series (tmp2, base2,
+			 gen_int_mode (step2, builder->inner_mode ()),
+			 shifted_vid);
+
+      /* Transform into { 0, 1, 0, 1, 0, 1, ... }.  */
+      rtx and_vid = gen_reg_rtx (mode);
+      rtx and_ops[] = {and_vid, vid, const1_rtx};
+      emit_vlmax_insn (code_for_pred_scalar (AND, mode), BINARY_OP, and_ops);
+      rtx mask = gen_reg_rtx (builder->mask_mode ());
+      expand_vec_cmp (mask, EQ, and_vid, CONST1_RTX (mode));
+
+      rtx ops[] = {result, tmp1, tmp2, mask};
+      emit_vlmax_insn (code_for_pred_merge (mode), MERGE_OP, ops);
+    }
 
   if (result != target)
     emit_move_insn (target, result);
 }
 
+static void
+expand_const_vector_stepped (rtx target, rtx src, rvv_builder *builder)
+{
+  gcc_assert (GET_MODE_CLASS (GET_MODE (target)) == MODE_VECTOR_INT);
+
+  if (builder->single_step_npatterns_p ())
+    return expand_const_vector_single_step_npatterns (target, builder);
+  else if (builder->interleaved_stepped_npatterns_p ())
+    return expand_const_vector_interleaved_stepped_npatterns (target, src,
+							      builder);
+
+  /* TODO: We will enable more variable-length vector in the future.  */
+  gcc_unreachable ();
+}
+
+/* We don't actually allow this case in legitimate_constant_p but
+   the middle-end still expects us to handle it in an expander
+   (see PR121334).  This is assumed to happen very rarely so the
+   implementation is not very efficient, particularly
+   for short vectors.
+*/
+
+static void
+expand_const_vector_onestep (rtx target, rvv_builder &builder)
+{
+  machine_mode mode = GET_MODE (target);
+  gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
+  gcc_assert (builder.nelts_per_pattern () == 2);
+
+  /* We have n encoded patterns
+       {csta_0, cstb_0},
+       {csta_1, cstb_1},
+       ...
+       {csta_{n-1}, cstb_{n-1}}
+     which should become one vector:
+       {csta_0, csta_1, ..., csta_{n-1},
+	cstb_0, cstb_1, ..., cstb_{n-1},
+	...
+	cstb_0, cstb_1, ..., cstb_{n-1}}.
+
+     In order to achieve this we create a permute/gather constant
+	sel = {0, 1, ..., n - 1, 0, 1, ..., n - 1, ...}
+     and two vectors
+	va = {csta_0, csta_1, ..., csta_{n-1}},
+	vb = {cstb_0, cstb_1, ..., cstb_{n-1}}.
+
+     Then we use a VLMAX gather to "broadcast" vb and afterwards
+     overwrite the first n elements with va.  */
+
+  int n = builder.npatterns ();
+  /* { 0, 1, 2, ..., n - 1 }.  */
+  rtx vid = gen_reg_rtx (mode);
+  expand_vec_series (vid, const0_rtx, const1_rtx);
+
+  /* { 0, 1, ..., n - 1, 0, 1, ..., n - 1, ... }.  */
+  rtx sel = gen_reg_rtx (mode);
+  rtx and_ops[] = {sel, vid, GEN_INT (n)};
+  emit_vlmax_insn (code_for_pred_scalar (AND, mode), BINARY_OP, and_ops);
+
+  /* va = { ELT (0), ELT (1), ... ELT (n - 1) }.  */
+  rtx tmp1 = gen_reg_rtx (mode);
+  rtx ops1[] = {tmp1, builder.elt (0)};
+  expand_broadcast (mode, ops1);
+  for (int i = 1; i < n; i++)
+    tmp1 = expand_slide1up (mode, tmp1, builder.elt (i));
+
+  /* vb = { ELT (n), ELT (n + 1), ... ELT (2 * n - 1) }.  */
+  rtx tmp2 = gen_reg_rtx (mode);
+  rtx ops2[] = {tmp2, builder.elt (n)};
+  expand_broadcast (mode, ops2);
+  for (int i = 1; i < n; i++)
+    tmp2 = expand_slide1up (mode, tmp2, builder.elt (n + i));
+
+  /* Duplicate vb.  */
+  rtx tmp3 = gen_reg_rtx (mode);
+  emit_vlmax_gather_insn (tmp3, tmp2, sel);
+
+  /* Overwrite the first n - 1 elements with va.  */
+  rtx dest = gen_reg_rtx (mode);
+  insn_code icode = code_for_pred_mov (mode);
+  rtx ops3[] = {dest, tmp3, tmp1};
+  emit_nonvlmax_insn (icode, __MASK_OP_TUMA | UNARY_OP_P, ops3, GEN_INT (n));
+
+  emit_move_insn (target, dest);
+}
+
+static void
+expand_const_vector (rtx target, rtx src)
+{
+  rtx elt;
+  if (const_vec_duplicate_p (src, &elt))
+    return expand_const_vec_duplicate (target, src, elt);
+
+  /* Support scalable const series vector.  */
+  rtx base, step;
+  if (const_vec_series_p (src, &base, &step))
+    return expand_const_vec_series(target, base, step);
+
+  /* Handle variable-length vector.  */
+  unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (src);
+  unsigned int npatterns = CONST_VECTOR_NPATTERNS (src);
+  rvv_builder builder (GET_MODE (target), npatterns, nelts_per_pattern);
+
+  for (unsigned int i = 0; i < nelts_per_pattern; i++)
+    {
+      for (unsigned int j = 0; j < npatterns; j++)
+	builder.quick_push (CONST_VECTOR_ELT (src, i * npatterns + j));
+    }
+
+  builder.finalize ();
+
+  if (CONST_VECTOR_DUPLICATE_P (src))
+    return expand_const_vector_duplicate (target, &builder);
+  else if (CONST_VECTOR_NELTS_PER_PATTERN (src) == 2)
+    return expand_const_vector_onestep (target, builder);
+  else if (CONST_VECTOR_STEPPED_P (src))
+    return expand_const_vector_stepped (target, src, &builder);
+
+  gcc_unreachable ();
+}
+
 /* Get the frm mode with given CONST_INT rtx, the default mode is
    FRM_DYN.  */
 enum floating_point_rounding_mode
@@ -2066,18 +2274,32 @@ has_vi_variant_p (rtx_code code, rtx x)
     }
 }
 
+/* This is a helper for binary ops with DImode scalar operands that are
+   broadcast (like vadd.vx v1, a1).
+   Instead of having similar code for all the expanders this function
+   unifies the handling.  For 64-bit targets all we do is choose
+   between the vi variant (if available) and the register variant.
+   For 32-bit targets we either create the sign-extending variant
+   of vop.vx (when the immediate fits 32 bits) or emit a vector
+   broadcast of the 64-bit register/immediate and switch to a
+   vop.vv (replacing the scalar op with the broadcast vector.  */
+
 bool
 sew64_scalar_helper (rtx *operands, rtx *scalar_op, rtx vl,
 		     machine_mode vector_mode, bool has_vi_variant_p,
 		     void (*emit_vector_func) (rtx *, rtx), enum avl_type type)
 {
   machine_mode scalar_mode = GET_MODE_INNER (vector_mode);
+
+  /* If the scalar broadcast op fits an immediate, use the
+     vop.vi variant if there is one.  */
   if (has_vi_variant_p)
     {
       *scalar_op = force_reg (scalar_mode, *scalar_op);
       return false;
     }
 
+  /* On a 64-bit target we can always use the vop.vx variant.  */
   if (TARGET_64BIT)
     {
       if (!rtx_equal_p (*scalar_op, const0_rtx))
@@ -2085,6 +2307,8 @@ sew64_scalar_helper (rtx *operands, rtx *scalar_op, rtx vl,
       return false;
     }
 
+  /* For 32 bit and if there is no vop.vi variant for a 32-bit immediate
+     we need to use the sign-extending (SI -> DI) vop.vx variants.  */
   if (immediate_operand (*scalar_op, Pmode))
     {
       if (!rtx_equal_p (*scalar_op, const0_rtx))
@@ -2094,6 +2318,17 @@ sew64_scalar_helper (rtx *operands, rtx *scalar_op, rtx vl,
       return false;
     }
 
+  /* Now we're left with a 64-bit immediate or a register.
+     We cannot use a vop.vx variant but must broadcast the value first
+     and switch to a vop.vv variant.
+     Broadcast can either be done via vlse64.v v1, reg, zero
+     or by loading one 64-bit element (vle64.v) and using a
+     broadcast vrgather.vi.  This is decided when splitting
+     the strided broadcast insn.  */
+  gcc_assert (!TARGET_64BIT
+	      && (CONST_INT_P (*scalar_op)
+		  || register_operand (*scalar_op, scalar_mode)));
+
   if (CONST_INT_P (*scalar_op))
     {
       if (maybe_gt (GET_MODE_SIZE (scalar_mode), GET_MODE_SIZE (Pmode)))
@@ -2104,11 +2339,8 @@ sew64_scalar_helper (rtx *operands, rtx *scalar_op, rtx vl,
 
   rtx tmp = gen_reg_rtx (vector_mode);
   rtx ops[] = {tmp, *scalar_op};
-  if (type == VLMAX)
-    emit_vlmax_insn (code_for_pred_broadcast (vector_mode), UNARY_OP, ops);
-  else
-    emit_nonvlmax_insn (code_for_pred_broadcast (vector_mode), UNARY_OP, ops,
-			vl);
+  emit_avltype_insn (code_for_pred_strided_broadcast (vector_mode),
+		     UNARY_OP, ops, type, vl);
   emit_vector_func (operands, tmp);
 
   return true;
@@ -2500,10 +2732,15 @@ expand_vector_init_merge_repeating_sequence (rtx target,
     = get_repeating_sequence_dup_machine_mode (builder, mask_bit_mode);
   uint64_t full_nelts = builder.full_nelts ().to_constant ();
 
+  gcc_assert (builder.nelts_per_pattern () == 1
+	      || builder.nelts_per_pattern () == 2);
+
+  rtx first
+    = builder.nelts_per_pattern () == 1 ? builder.elt (0) : builder.elt (1);
+
   /* Step 1: Broadcast the first pattern.  */
-  rtx ops[] = {target, force_reg (builder.inner_mode (), builder.elt (0))};
-  emit_vlmax_insn (code_for_pred_broadcast (builder.mode ()),
-		    UNARY_OP, ops);
+  rtx ops[] = {target, force_reg (builder.inner_mode (), first)};
+  expand_broadcast (builder.mode (), ops);
   /* Step 2: Merge the rest iteration of pattern.  */
   for (unsigned int i = 1; i < builder.npatterns (); i++)
     {
@@ -2516,8 +2753,7 @@ expand_vector_init_merge_repeating_sequence (rtx target,
       if (full_nelts <= builder.inner_bits_size ()) /* vmv.s.x.  */
 	{
 	  rtx ops[] = {dup, merge_mask};
-	  emit_nonvlmax_insn (code_for_pred_broadcast (GET_MODE (dup)),
-			       SCALAR_MOVE_OP, ops, CONST1_RTX (Pmode));
+	  expand_set_first (GET_MODE (dup), ops);
 	}
       else /* vmv.v.x.  */
 	{
@@ -2525,14 +2761,16 @@ expand_vector_init_merge_repeating_sequence (rtx target,
 		       force_reg (GET_MODE_INNER (mask_int_mode), merge_mask)};
 	  rtx vl = gen_int_mode (CEIL (full_nelts, builder.inner_bits_size ()),
 				 Pmode);
-	  emit_nonvlmax_insn (code_for_pred_broadcast (mask_int_mode), UNARY_OP,
-			       ops, vl);
+	  expand_broadcast (mask_int_mode, ops, vl);
 	}
 
       emit_move_insn (mask, gen_lowpart (mask_bit_mode, dup));
 
       /* Step 2-2: Merge pattern according to the mask.  */
-      rtx ops[] = {target, target, builder.elt (i), mask};
+      unsigned int which = i;
+      if (builder.nelts_per_pattern () == 2)
+	which = 2 * which + 1;
+      rtx ops[] = {target, target, builder.elt (which), mask};
       emit_vlmax_insn (code_for_pred_merge_scalar (GET_MODE (target)),
 			MERGE_OP, ops);
     }
@@ -2771,6 +3009,28 @@ autovectorize_vector_modes (vector_modes *modes, bool)
 	i++;
 	size = base_size / (1U << i);
      }
+
+  /* If the user specified the exact mode to use look if it is available and
+     remove all other ones before returning.  */
+  if (riscv_autovec_mode)
+    {
+      auto_vector_modes ms;
+      ms.safe_splice (*modes);
+      modes->truncate (0);
+
+      for (machine_mode mode : ms)
+	{
+	  if (!strcmp (GET_MODE_NAME (mode), riscv_autovec_mode))
+	    {
+	      modes->safe_push (mode);
+	      return 0;
+	    }
+	}
+
+      /* Nothing found, fall back to regular handling.  */
+      modes->safe_splice (ms);
+    }
+
   /* Enable LOOP_VINFO comparison in COST model.  */
   return VECT_COMPARE_COSTS;
 }
@@ -3911,11 +4171,7 @@ shuffle_off_by_one_patterns (struct expand_vec_perm_d *d)
       emit_vec_extract (tmp, d->op0, gen_int_mode (nunits - 1, Pmode));
 
       /* Insert the scalar into element 0.  */
-      unsigned int unspec
-	= FLOAT_MODE_P (d->vmode) ? UNSPEC_VFSLIDE1UP : UNSPEC_VSLIDE1UP;
-      insn_code icode = code_for_pred_slide (unspec, d->vmode);
-      rtx ops[] = {d->target, d->op1, tmp};
-      emit_vlmax_insn (icode, BINARY_OP, ops);
+      expand_slide1up (d->vmode, d->op1, tmp);
     }
 
   return true;
@@ -4595,20 +4851,20 @@ expand_reduction (unsigned unspec, unsigned unspec_for_vl0_safe,
 
   rtx m1_tmp = gen_reg_rtx (m1_mode);
   rtx scalar_move_ops[] = {m1_tmp, init};
-  insn_code icode = code_for_pred_broadcast (m1_mode);
   if (need_mask_operand_p (insn_flags))
     {
       if (need_vl0_safe)
-	emit_nonvlmax_insn (icode, SCALAR_MOVE_OP, scalar_move_ops, const1_rtx);
+	expand_set_first (m1_mode, scalar_move_ops, const1_rtx);
       else
-	emit_nonvlmax_insn (icode, SCALAR_MOVE_OP, scalar_move_ops, vl_op);
+	expand_set_first (m1_mode, scalar_move_ops, vl_op);
     }
   else
-    emit_vlmax_insn (icode, SCALAR_MOVE_OP, scalar_move_ops);
+    expand_set_first (m1_mode, scalar_move_ops);
 
   rtx m1_tmp2 = gen_reg_rtx (m1_mode);
   rtx reduc_ops[] = {m1_tmp2, vector_src, m1_tmp};
 
+  insn_code icode;
   if (need_vl0_safe)
     icode = code_for_pred (unspec_for_vl0_safe, vmode);
   else
@@ -4651,7 +4907,7 @@ prepare_ternary_operands (rtx *ops)
 				   ops[4], ops[1], ops[6], ops[7], ops[9]));
       ops[5] = ops[4] = ops[0];
     }
-  else
+  else if (VECTOR_MODE_P (GET_MODE (ops[2])))
     {
       /* Swap the multiplication ops if the fallback value is the
 	 second of the two.  */
@@ -4661,8 +4917,10 @@ prepare_ternary_operands (rtx *ops)
       /* TODO: ??? Maybe we could support splitting FMA (a, 4, b)
 	 into PLUS (ASHIFT (a, 2), b) according to uarchs.  */
     }
-  gcc_assert (rtx_equal_p (ops[5], RVV_VUNDEF (mode))
-	      || rtx_equal_p (ops[5], ops[2]) || rtx_equal_p (ops[5], ops[4]));
+  gcc_assert (
+    rtx_equal_p (ops[5], RVV_VUNDEF (mode)) || rtx_equal_p (ops[5], ops[2])
+    || (!VECTOR_MODE_P (GET_MODE (ops[2])) && rtx_equal_p (ops[5], ops[3]))
+    || rtx_equal_p (ops[5], ops[4]));
 }
 
 /* Expand VEC_MASK_LEN_{LOAD_LANES,STORE_LANES}.  */
@@ -5448,6 +5706,157 @@ expand_vec_oct_sstrunc (rtx op_0, rtx op_1, machine_mode vec_mode,
   expand_vec_double_sstrunc (op_0, quad_rtx, quad_mode);
 }
 
+/* Expand the binary vx combine with the format like v2 = vop(vec_dup(x), v1).
+   Aka the first op comes from the vec_duplicate, and the second op is
+   the vector reg.  */
+
+void
+expand_vx_binary_vec_dup_vec (rtx op_0, rtx op_1, rtx op_2,
+			      rtx_code code, machine_mode mode)
+{
+  enum insn_code icode;
+
+  switch (code)
+    {
+    case PLUS:
+    case AND:
+    case IOR:
+    case XOR:
+    case MULT:
+    case SMAX:
+    case UMAX:
+    case SMIN:
+    case UMIN:
+    case US_PLUS:
+    case SS_PLUS:
+      icode = code_for_pred_scalar (code, mode);
+      break;
+    case MINUS:
+      icode = code_for_pred_sub_reverse_scalar (mode);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  rtx ops[] = {op_0, op_1, op_2};
+  emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops);
+}
+
+static enum insn_type
+get_insn_type_by_vxrm_val (int vxrm_val)
+{
+  enum insn_type itype;
+
+  switch (vxrm_val)
+    {
+    case VXRM_RNU:
+      itype = BINARY_OP_VXRM_RNU;
+      break;
+    case VXRM_RNE:
+      itype = BINARY_OP_VXRM_RNE;
+      break;
+    case VXRM_RDN:
+      itype = BINARY_OP_VXRM_RDN;
+      break;
+    case VXRM_ROD:
+      itype = BINARY_OP_VXRM_ROD;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  return itype;
+}
+
+/* Expand the binary vx combine with the format like v2 = vop(v1, vec_dup(x))
+   and its' vxrm value.  Aka the second op comes from the vec_duplicate,
+   and the first op is the vector reg.  */
+
+void
+expand_vx_binary_vxrm_vec_vec_dup (rtx op_0, rtx op_1, rtx op_2, int unspec,
+				   int vxrm_val, machine_mode mode)
+{
+  enum insn_code icode;
+  enum insn_type itype = get_insn_type_by_vxrm_val (vxrm_val);
+  rtx ops[] = {op_0, op_1, op_2};
+
+  switch (unspec)
+    {
+    case UNSPEC_VAADD:
+    case UNSPEC_VAADDU:
+      icode = code_for_pred_scalar (unspec, mode);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_vlmax_insn (icode, itype, ops);
+}
+
+/* Expand the binary vx combine with the format like v2 = vop(vec_dup(x), v1)
+   and its' vxrm value.  Aka the second op comes from the vec_duplicate,
+   and the first op is the vector reg.  */
+
+void
+expand_vx_binary_vxrm_vec_dup_vec (rtx op_0, rtx op_1, rtx op_2, int unspec,
+				   int vxrm_val, machine_mode mode)
+{
+  enum insn_code icode;
+  enum insn_type itype = get_insn_type_by_vxrm_val (vxrm_val);
+  rtx ops[] = {op_0, op_1, op_2};
+
+  switch (unspec)
+    {
+    case UNSPEC_VAADD:
+    case UNSPEC_VAADDU:
+      icode = code_for_pred_scalar (unspec, mode);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_vlmax_insn (icode, itype, ops);
+}
+
+/* Expand the binary vx combine with the format like v2 = vop(v1, vec_dup(x)).
+   Aka the second op comes from the vec_duplicate, and the first op is
+   the vector reg.  */
+
+void
+expand_vx_binary_vec_vec_dup (rtx op_0, rtx op_1, rtx op_2,
+			      rtx_code code, machine_mode mode)
+{
+  enum insn_code icode;
+
+  switch (code)
+    {
+    case MINUS:
+    case AND:
+    case IOR:
+    case XOR:
+    case MULT:
+    case DIV:
+    case UDIV:
+    case MOD:
+    case UMOD:
+    case SMAX:
+    case UMAX:
+    case SMIN:
+    case UMIN:
+    case US_PLUS:
+    case US_MINUS:
+    case SS_PLUS:
+    case SS_MINUS:
+      icode = code_for_pred_scalar (code, mode);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  rtx ops[] = {op_0, op_1, op_2};
+  emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops);
+}
+
 /* Vectorize popcount by the Wilkes-Wheeler-Gill algorithm that libgcc uses as
    well.  */
 void
@@ -5620,24 +6029,84 @@ count_regno_occurrences (rtx_insn *rinsn, unsigned int regno)
   return count;
 }
 
-/* Return true if the OP can be directly broadcasted.  */
+/* Return true if the OP can be broadcast with a
+   v[f]mv.v.[xif] instruction.  */
+
 bool
-can_be_broadcasted_p (rtx op)
+can_be_broadcast_p (rtx op)
 {
   machine_mode mode = GET_MODE (op);
-  /* We don't allow RA (register allocation) reload generate
-    (vec_duplicate:DI reg) in RV32 system wheras we allow
-    (vec_duplicate:DI mem) in RV32 system.  */
-  if (!can_create_pseudo_p () && !FLOAT_MODE_P (mode)
-      && maybe_gt (GET_MODE_SIZE (mode), GET_MODE_SIZE (Pmode))
-      && !satisfies_constraint_Wdm (op))
+
+  /* Zero always works and we can always put an immediate into a
+     register.
+     What's tricky is that for an immediate we don't know the
+     register's mode it will end up in, i.e. what element size
+     we want to broadcast.  So even if the immediate is small it might
+     still end up in a DImode register that we cannot broadcast.
+     vmv.s.x, i.e. a single-element set can handle this, though,
+     because it implicitly sign-extends to SEW.  */
+  if (rtx_equal_p (op, CONST0_RTX (mode))
+      || const_int_operand (op, Xmode))
+    return true;
+
+  /* Do not accept DImode broadcasts on !TARGET_64BIT.  Those
+     are handled by strided broadcast.  */
+  if (INTEGRAL_MODE_P (mode)
+      && maybe_gt (GET_MODE_SIZE (mode), UNITS_PER_WORD))
+    return false;
+
+  /* Non-register operands that can be forced into a register we can
+     handle.  These don't need to use strided broadcast. */
+  if (INTEGRAL_MODE_P (mode)
+      && (memory_operand (op, mode) || CONST_POLY_INT_P (op))
+      && can_create_pseudo_p ())
+    return true;
+
+  /* Likewise, do not accept HFmode broadcast if we don't have
+     vfmv.v.f for 16-bit registers available.  */
+  if (mode == HFmode && !TARGET_ZVFH)
     return false;
 
-  if (satisfies_constraint_K (op) || register_operand (op, mode)
-      || satisfies_constraint_Wdm (op) || rtx_equal_p (op, CONST0_RTX (mode)))
+  /* Same for float, just that we can always handle 64-bit doubles
+     even on !TARGET_64BIT.  We have ruled out 16-bit HF already
+     above.  */
+  if (FLOAT_MODE_P (mode)
+      && (memory_operand (op, mode) || CONSTANT_P (op))
+      && can_create_pseudo_p ())
     return true;
 
-  return can_create_pseudo_p () && nonmemory_operand (op, mode);
+  /* After excluding all the cases we cannot handle the register types
+     that remain can always be broadcast.  */
+  if (register_operand (op, mode))
+    return true;
+
+  return false;
+}
+
+/* Returns true for all operands that cannot use vmv.vx, vfmv.vf,
+   vmv.s.x, or vfmv.s.f but rather need to go via memory.  */
+
+bool
+strided_broadcast_p (rtx op)
+{
+  machine_mode mode = GET_MODE (op);
+  if (!memory_operand (op, mode)
+      && !register_operand (op, mode)
+      && !rtx_equal_p (op, CONST0_RTX (mode))
+      && !const_int_operand (op, mode))
+    return false;
+
+  /* !TARGET64_BIT does not have a vmv.v.x/vmv.s.x for 64-bit
+     DImode elements.  */
+  if (INTEGRAL_MODE_P (mode)
+      && maybe_gt (GET_MODE_SIZE (mode), UNITS_PER_WORD))
+    return true;
+
+  /* Zvfhmin does not have a vfmv.v.f/vfmv.s.f.  for 16-bit elements.  */
+  if (!TARGET_ZVFH && mode == HFmode)
+    return true;
+
+  return false;
 }
 
 void
@@ -5752,7 +6221,10 @@ whole_reg_to_reg_move_p (rtx *ops, machine_mode mode, int avl_type_index)
   return false;
 }
 
-/* Return true if we can transform vmv.v.x/vfmv.v.f to vmv.s.x/vfmv.s.f.  */
+/* Return true if we can transform vmv.v.x/vfmv.v.f to vmv.s.x/vfmv.s.f.
+   That's the case if we're dealing with a scalar broadcast that
+   has VL = 1.  */
+
 bool
 splat_to_scalar_move_p (rtx *ops)
 {
diff --git a/gcc/config/riscv/riscv-vect-permconst.cc b/gcc/config/riscv/riscv-vect-permconst.cc
new file mode 100644
index 0000000..087f26a
--- /dev/null
+++ b/gcc/config/riscv/riscv-vect-permconst.cc
@@ -0,0 +1,318 @@
+/* Copyright (C) 2024 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or(at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define IN_TARGET_CODE 1
+#define INCLUDE_ALGORITHM
+#define INCLUDE_FUNCTIONAL
+#define INCLUDE_MEMORY
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "backend.h"
+#include "rtl.h"
+#include "target.h"
+#include "tree-pass.h"
+#include "df.h"
+#include "rtl-ssa.h"
+#include "cfgcleanup.h"
+#include "insn-attr.h"
+#include "tm-constrs.h"
+#include "insn-opinit.h"
+#include "cfgrtl.h"
+
+/* So the basic idea of this pass is to identify loads of permutation
+   constants from the constant pool which could instead be trivially
+   derived from some earlier vector permutation constant.  This will
+   replace a memory load from the constant pool with a vadd.vi
+   instruction.
+
+   Conceptually this is much like the related_values optimization in
+   CSE, reload_cse_move2add or using SLSR to optimize constant synthesis.
+   If we wanted to make this generic I would suggest putting it into CSE
+   and providing target hooks to determine if particular permutation
+   constants could be derived from earlier permutation constants.  */
+
+const pass_data pass_data_vect_permconst = {
+  RTL_PASS,	 /* type */
+  "vect_permconst",	 /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  TV_NONE,	 /* tv_id */
+  0,		 /* properties_required */
+  0,		 /* properties_provided */
+  0,		 /* properties_destroyed */
+  0,		 /* todo_flags_start */
+  0,		 /* todo_flags_finish */
+};
+
+/* Entry in the hash table.  We "normalize" the permutation constant
+   by adjusting all entries by the value in the first element.  This
+   allows simple hashing to discover permutation constants that differ
+   by a single constant across all their elements and may be derived
+   from each other with a vadd.vi.  */
+
+struct vector_permconst_entry
+{
+  /* The CONST_VECTOR in normalized form (first entry is zero).  */
+  /* We could avoid copying the vector with a more customized hash
+     routine which took care of normalization.  */
+  rtx normalized_vec;
+
+  /* The destination register holding the CONST_VECTOR.  When the optimization
+     applies this will be used as a source operand in the vadd.vi.  */
+  rtx dest;
+
+  /* The insn generating DEST, the only reason we need this is because we
+     do not invalidate entries which implies we have to verify that DEST
+     is unchanged between INSN and the point where we want to use DEST
+     to derive a new permutation constant.  */
+  rtx_insn *insn;
+
+  /* The bias of this entry used for normalization.  If this value is added
+     to each element in NORMALIZED_VEC we would have the original permutation
+     constant.  */
+  HOST_WIDE_INT bias;
+};
+
+struct const_vector_hasher : nofree_ptr_hash <vector_permconst_entry>
+{
+  static inline hashval_t hash (const vector_permconst_entry *);
+  static inline bool equal (const vector_permconst_entry *,
+			    const vector_permconst_entry *);
+};
+
+inline bool
+const_vector_hasher::equal (const vector_permconst_entry *vpe1,
+			    const vector_permconst_entry *vpe2)
+{
+  /* Do the cheap tests first, namely that the mode and number of entries
+     match between the two enries.  */
+  if (GET_MODE (vpe1->normalized_vec) != GET_MODE (vpe2->normalized_vec))
+    return false;
+
+  if (CONST_VECTOR_NUNITS (vpe1->normalized_vec).to_constant ()
+      != CONST_VECTOR_NUNITS (vpe2->normalized_vec).to_constant ())
+    return false;
+
+  /* Check the value of each entry in the vector.  We violate structure
+     sharing rules inside this pass, so while pointer equality would normally
+     be OK, it isn't here.  */
+  for (int i = 0;
+       i < CONST_VECTOR_NUNITS (vpe1->normalized_vec).to_constant ();
+       i++)
+    if (!rtx_equal_p (CONST_VECTOR_ELT (vpe1->normalized_vec, i),
+		      CONST_VECTOR_ELT (vpe2->normalized_vec, i)))
+      return false;
+
+  return true;
+}
+
+inline hashval_t
+const_vector_hasher::hash (const vector_permconst_entry *vpe)
+{
+  int do_not_record;
+  return hash_rtx (vpe->normalized_vec, Pmode, &do_not_record, NULL, false);
+}
+
+
+class vector_permconst : public rtl_opt_pass
+{
+public:
+  vector_permconst (gcc::context *ctxt)
+    : rtl_opt_pass (pass_data_vect_permconst, ctxt) {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *) final override
+  {
+    return TARGET_VECTOR && optimize > 0;
+  }
+  virtual unsigned int execute (function *) final override;
+
+private:
+  void process_bb (basic_block);
+  hash_table<const_vector_hasher> *vector_permconst_table;
+}; // class pass_vector_permconst
+
+/* Try to optimize vector permutation constants in BB.  */
+void
+vector_permconst::process_bb (basic_block bb)
+{
+  vector_permconst_table = new hash_table<const_vector_hasher> (11);
+
+  /* Walk the insns in BB searching for vector loads from the constant pool
+     which can be satisfied by adjusting an earlier load with trivial
+     arithmetic.  */
+  rtx_insn *insn;
+  FOR_BB_INSNS (bb, insn)
+    {
+      if (!INSN_P (insn))
+	continue;
+
+      rtx set = single_set (insn);
+      if (!set)
+	continue;
+
+      rtx dest = SET_DEST (set);
+      if (GET_MODE_CLASS (GET_MODE (dest)) != MODE_VECTOR_INT)
+	continue;
+
+      rtx src = SET_SRC (set);
+      if (!MEM_P (src))
+	continue;
+
+      /* A load from the constant pool should have a REG_EQUAL
+	 note with the vector contant in the note.  */
+      rtx note = find_reg_equal_equiv_note (insn);
+      if (!note
+	  || REG_NOTE_KIND (note) != REG_EQUAL
+	  || GET_CODE (XEXP (note, 0)) != CONST_VECTOR)
+	continue;
+
+      if (!CONST_VECTOR_NUNITS (XEXP (note, 0)).is_constant ())
+	continue;
+
+      /* XXX Do we need to consider other forms of constants?  */
+
+      /* We want to be selective about what gets past this point since
+	 we make a copy of the vector and possibly enter it into the
+	 hash table.  So reject cases that are not likely a permutation
+	 constant.  ie, negative bias and large biases.  We arbitrarily
+	 use 16k as the largest vector size in bits we try to optimize.
+
+	 It may seem like a bias outside the range of vadd.vi should
+	 be rejected, but what really matters is the difference of
+	 biases across the two permutation constants.  */
+      rtx cvec = XEXP (note, 0);
+      HOST_WIDE_INT bias = INTVAL (CONST_VECTOR_ELT (cvec, 0));
+      if (bias < 0 || bias > 16384 / 8)
+	continue;
+
+      /* We need to verify that each element would be a valid value
+	 in the inner mode after applying the bias.  */
+      machine_mode inner = GET_MODE_INNER (GET_MODE (cvec));
+      HOST_WIDE_INT precision = GET_MODE_PRECISION (inner).to_constant ();
+      int i;
+      for (i = 0; i < CONST_VECTOR_NUNITS (cvec).to_constant (); i++)
+	{
+	  HOST_WIDE_INT val = INTVAL (CONST_VECTOR_ELT (cvec, i)) - bias;
+	  if (val != sext_hwi (val, precision))
+	    break;
+	}
+
+      /* If the loop terminated early, then we found a case where the
+	 adjusted constant would not fit, so we can't record the constant
+	 for this case (it's unlikely to be useful anyway.  */
+      if (i != CONST_VECTOR_NUNITS (cvec).to_constant ())
+	continue;
+
+      /* At this point we have a load of a constant integer vector from the
+	 constant pool.  That constant integer vector is hopefully a
+	 permutation constant.  We need to make a copy of the vector and
+	 normalize it to zero.
+
+	 XXX This violates structure sharing conventions.  */
+      rtvec_def *nvec = rtvec_alloc (CONST_VECTOR_NUNITS (cvec).to_constant ());
+
+      for (i = 0; i < CONST_VECTOR_NUNITS (cvec).to_constant (); i++)
+	nvec->elem[i] = GEN_INT (INTVAL (CONST_VECTOR_ELT (cvec, i)) - bias);
+
+      rtx copy = gen_rtx_CONST_VECTOR (GET_MODE (cvec), nvec);
+
+      /* Now that we have a normalized vector, look it up in the hash table,
+	 inserting it if it wasn't already in the table.  */
+      struct vector_permconst_entry tmp;
+      tmp.normalized_vec = copy;
+      struct vector_permconst_entry **slot
+	= vector_permconst_table->find_slot (&tmp, INSERT);
+      if (*slot == NULL)
+	{
+	  /* This constant was not in the table, so initialize the hash table
+	     entry.  */
+	  *slot = XNEW (vector_permconst_entry);
+	  (*slot)->normalized_vec = copy;
+	  (*slot)->dest = dest;
+	  (*slot)->bias = bias;
+	  (*slot)->insn = insn;
+	}
+      else
+	{
+	  /* A hit in the hash table.  We may be able to optimize this case.
+
+	     If the difference in biases fits in the immediate range for
+	     vadd.vi, then we may optimize.  */
+	  HOST_WIDE_INT adjustment = bias - (*slot)->bias;
+	  if (IN_RANGE (adjustment, -16, 15))
+	    {
+	      /* We also need to make sure the destination register was not
+		 modified.  I've chosen to test for that at optimization time
+		 rather than invalidate entries in the table.  This could be
+		 changed to use REG_TICK like schemes or true invalidation if
+		 this proves too compile-time costly.  */
+	      if (!reg_set_between_p ((*slot)->dest, (*slot)->insn, insn))
+		{
+		  /* Instead of loading from the constant pool, adjust the
+		     output of the earlier insn into our destination.  */
+		  rtx x = gen_const_vec_duplicate (GET_MODE (copy),
+						   GEN_INT (adjustment));
+		  rtx plus = gen_rtx_PLUS (GET_MODE (copy), (*slot)->dest, x);
+		  rtx set = gen_rtx_SET (dest, plus);
+		  rtx_insn *new_insn = emit_insn_after (set, insn);
+		  /* XXX Should we copy over the REG_EQUAL note first?  */
+		  delete_insn (insn);
+		  insn = new_insn;
+		}
+	    }
+
+	  /* We always keep the hash table entry pointing to the most recent
+	     INSN that could generate the normalized entry.  We can adjust
+	     in the future if data says it's useful to do so.  This just
+	     keeps things simple for now.
+
+	     For example, we might want to keep multiple entries if they
+	     have a different biases.  */
+	  (*slot)->dest = dest;
+	  (*slot)->bias = bias;
+	  (*slot)->insn = insn;
+	}
+    }
+
+  /* We construct and tear down the table for each block.  This may
+     be overly expensive.  */
+  vector_permconst_table->empty ();
+}
+
+/* Main entry point for this pass.  */
+unsigned int
+vector_permconst::execute (function *fn)
+{
+  /* Handle each block independently.  While this should work nicely on EBBs,
+     let's wait for real world cases where it matters before adding that
+     complexity.  */
+  basic_block bb;
+  FOR_EACH_BB_FN (bb, fn)
+    process_bb (bb);
+
+  return 0;
+}
+
+rtl_opt_pass *
+make_pass_vector_permconst (gcc::context *ctxt)
+{
+  return new vector_permconst (ctxt);
+}
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index bf5172c..7e4d396 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -643,7 +643,8 @@ public:
 	return e.use_exact_insn (code_for_pred_mov (e.vector_mode ()));
       case OP_TYPE_x:
       case OP_TYPE_f:
-	return e.use_exact_insn (code_for_pred_broadcast (e.vector_mode ()));
+	return e.use_scalar_broadcast_insn
+	  (code_for_pred_broadcast (e.vector_mode ()));
       default:
 	gcc_unreachable ();
       }
diff --git a/gcc/config/riscv/riscv-vector-builtins-shapes.cc b/gcc/config/riscv/riscv-vector-builtins-shapes.cc
index fc21b20..9832eb9 100644
--- a/gcc/config/riscv/riscv-vector-builtins-shapes.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-shapes.cc
@@ -908,6 +908,8 @@ struct vset_def : public build_base
   {
     poly_int64 outer_size = GET_MODE_SIZE (c.arg_mode (0));
     poly_int64 inner_size = GET_MODE_SIZE (c.arg_mode (2));
+    if (maybe_eq (inner_size, 0))
+      return false;
     unsigned int nvecs = exact_div (outer_size, inner_size).to_constant ();
     return c.require_immediate (1, 0, nvecs - 1);
   }
@@ -920,6 +922,8 @@ struct vget_def : public misc_def
   {
     poly_int64 outer_size = GET_MODE_SIZE (c.arg_mode (0));
     poly_int64 inner_size = GET_MODE_SIZE (c.ret_mode ());
+    if (maybe_eq (inner_size, 0))
+      return false;
     unsigned int nvecs = exact_div (outer_size, inner_size).to_constant ();
     return c.require_immediate (1, 0, nvecs - 1);
   }
@@ -1343,6 +1347,52 @@ struct sf_vfnrclip_def : public build_base
   }
 };
 
+/* sf_vcix_se_def class.  */
+struct sf_vcix_se_def : public build_base
+{
+  char *get_name (function_builder &b, const function_instance &instance,
+		  bool overloaded_p) const override
+  {
+    /* Return nullptr if it is overloaded.  */
+    if (overloaded_p)
+      return nullptr;
+
+    b.append_base_name (instance.base_name);
+
+    /* vop --> vop<op>_se_<type>.  */
+    if (!overloaded_p)
+      {
+	b.append_name (operand_suffixes[instance.op_info->op]);
+	b.append_name ("_se");
+	b.append_name (type_suffixes[instance.type.index].vector);
+      }
+    return b.finish_name ();
+  }
+};
+
+/* sf_vcix_def class.  */
+struct sf_vcix_def : public build_base
+{
+  char *get_name (function_builder &b, const function_instance &instance,
+		  bool overloaded_p) const override
+  {
+    /* Return nullptr if it is overloaded.  */
+    if (overloaded_p)
+      return nullptr;
+
+    b.append_base_name (instance.base_name);
+
+    /* vop --> vop_<type>.  */
+    if (!overloaded_p)
+      {
+	b.append_name (operand_suffixes[instance.op_info->op]);
+	b.append_name (type_suffixes[instance.type.index].vector);
+      }
+    return b.finish_name ();
+  }
+};
+
+
 SHAPE(vsetvl, vsetvl)
 SHAPE(vsetvl, vsetvlmax)
 SHAPE(loadstore, loadstore)
@@ -1379,4 +1429,6 @@ SHAPE(crypto_vi, crypto_vi)
 SHAPE(crypto_vv_no_op_type, crypto_vv_no_op_type)
 SHAPE (sf_vqmacc, sf_vqmacc)
 SHAPE (sf_vfnrclip, sf_vfnrclip)
+SHAPE(sf_vcix_se, sf_vcix_se)
+SHAPE(sf_vcix, sf_vcix)
 } // end namespace riscv_vector
diff --git a/gcc/config/riscv/riscv-vector-builtins-shapes.h b/gcc/config/riscv/riscv-vector-builtins-shapes.h
index 858799b..2f2636e 100644
--- a/gcc/config/riscv/riscv-vector-builtins-shapes.h
+++ b/gcc/config/riscv/riscv-vector-builtins-shapes.h
@@ -62,6 +62,8 @@ extern const function_shape *const crypto_vv_no_op_type;
 /* Sifive vendor extension.  */
 extern const function_shape *const sf_vqmacc;
 extern const function_shape *const sf_vfnrclip;
+extern const function_shape *const sf_vcix_se;
+extern const function_shape *const sf_vcix;
 }
 
 } // end namespace riscv_vector
diff --git a/gcc/config/riscv/riscv-vector-builtins-types.def b/gcc/config/riscv/riscv-vector-builtins-types.def
index 857b637..ade6644 100644
--- a/gcc/config/riscv/riscv-vector-builtins-types.def
+++ b/gcc/config/riscv/riscv-vector-builtins-types.def
@@ -369,6 +369,18 @@ along with GCC; see the file COPYING3. If not see
 #define DEF_RVV_XFQF_OPS(TYPE, REQUIRE)
 #endif
 
+/* Use "DEF_RVV_X2_U_OPS" macro include unsigned integer which will
+   be iterated and registered as intrinsic functions.  */
+#ifndef DEF_RVV_X2_U_OPS
+#define DEF_RVV_X2_U_OPS(TYPE, REQUIRE)
+#endif
+
+/* Use "DEF_RVV_X2_WU_OPS" macro include widen unsigned integer which will
+   be iterated and registered as intrinsic functions.  */
+#ifndef DEF_RVV_X2_WU_OPS
+#define DEF_RVV_X2_WU_OPS(TYPE, REQUIRE)
+#endif
+
 DEF_RVV_I_OPS (vint8mf8_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_I_OPS (vint8mf4_t, 0)
 DEF_RVV_I_OPS (vint8mf2_t, 0)
@@ -1463,6 +1475,32 @@ DEF_RVV_XFQF_OPS (vint8mf2_t, 0)
 DEF_RVV_XFQF_OPS (vint8m1_t, 0)
 DEF_RVV_XFQF_OPS (vint8m2_t, 0)
 
+DEF_RVV_X2_U_OPS (vuint8mf8_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_X2_U_OPS (vuint8mf4_t, 0)
+DEF_RVV_X2_U_OPS (vuint8mf2_t, 0)
+DEF_RVV_X2_U_OPS (vuint8m1_t, 0)
+DEF_RVV_X2_U_OPS (vuint8m2_t, 0)
+DEF_RVV_X2_U_OPS (vuint8m4_t, 0)
+DEF_RVV_X2_U_OPS (vuint16mf4_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_X2_U_OPS (vuint16mf2_t, 0)
+DEF_RVV_X2_U_OPS (vuint16m1_t, 0)
+DEF_RVV_X2_U_OPS (vuint16m2_t, 0)
+DEF_RVV_X2_U_OPS (vuint16m4_t, 0)
+DEF_RVV_X2_U_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_X2_U_OPS (vuint32m1_t, 0)
+DEF_RVV_X2_U_OPS (vuint32m2_t, 0)
+DEF_RVV_X2_U_OPS (vuint32m4_t, 0)
+
+DEF_RVV_X2_WU_OPS (vuint16mf4_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_X2_WU_OPS (vuint16mf2_t, 0)
+DEF_RVV_X2_WU_OPS (vuint16m1_t, 0)
+DEF_RVV_X2_WU_OPS (vuint16m2_t, 0)
+DEF_RVV_X2_WU_OPS (vuint16m4_t, 0)
+DEF_RVV_X2_WU_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_X2_WU_OPS (vuint32m1_t, 0)
+DEF_RVV_X2_WU_OPS (vuint32m2_t, 0)
+DEF_RVV_X2_WU_OPS (vuint32m4_t, 0)
+
 #undef DEF_RVV_I_OPS
 #undef DEF_RVV_U_OPS
 #undef DEF_RVV_F_OPS
@@ -1519,3 +1557,5 @@ DEF_RVV_XFQF_OPS (vint8m2_t, 0)
 #undef DEF_RVV_F32_OPS
 #undef DEF_RVV_QMACC_OPS
 #undef DEF_RVV_XFQF_OPS
+#undef DEF_RVV_X2_U_OPS
+#undef DEF_RVV_X2_WU_OPS
diff --git a/gcc/config/riscv/riscv-vector-builtins.cc b/gcc/config/riscv/riscv-vector-builtins.cc
index 61dcdab..0db7549 100644
--- a/gcc/config/riscv/riscv-vector-builtins.cc
+++ b/gcc/config/riscv/riscv-vector-builtins.cc
@@ -544,6 +544,20 @@ static const rvv_type_info crypto_sew64_ops[] = {
 #include "riscv-vector-builtins-types.def"
   {NUM_VECTOR_TYPES, 0}};
 
+/* A list of signed integer will be registered for Sifive Xsfvcp intrinsic*/
+/* functions.  */
+static const rvv_type_info x2_u_ops[] = {
+#define DEF_RVV_X2_U_OPS(TYPE, REQUIRE) {VECTOR_TYPE_##TYPE, REQUIRE},
+#include "riscv-vector-builtins-types.def"
+  {NUM_VECTOR_TYPES, 0}};
+
+/* A list of signed integer will be registered for Sifive Xsfvcp intrinsic*/
+/* functions.  */
+static const rvv_type_info x2_wu_ops[] = {
+#define DEF_RVV_X2_WU_OPS(TYPE, REQUIRE) {VECTOR_TYPE_##TYPE, REQUIRE},
+#include "riscv-vector-builtins-types.def"
+  {NUM_VECTOR_TYPES, 0}};
+
 /* A list of signed integer will be registered for intrinsic
  * functions.  */
 static const rvv_type_info qmacc_ops[] = {
@@ -805,7 +819,7 @@ static CONSTEXPR const rvv_arg_type_info bf_wwxv_args[]
 static CONSTEXPR const rvv_arg_type_info m_args[]
   = {rvv_arg_type_info (RVV_BASE_mask), rvv_arg_type_info_end};
 
-/* A list of args for vector_type func (scalar_type) function.  */
+/* A list of args for vector_type func (scalar_type/sf.vc) function.  */
 static CONSTEXPR const rvv_arg_type_info x_args[]
   = {rvv_arg_type_info (RVV_BASE_scalar), rvv_arg_type_info_end};
 
@@ -1055,6 +1069,161 @@ static CONSTEXPR const rvv_arg_type_info scalar_ptr_size_args[]
      rvv_arg_type_info (RVV_BASE_size), rvv_arg_type_info (RVV_BASE_vector),
      rvv_arg_type_info_end};
 
+/* A list of args for vector_type func (sf.vc.x) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_x_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.v.x) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_v_x_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.i) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_i_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info_end};
+
+
+/* A list of args for vector_type func (sf.vc.i) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_v_i_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.vv) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_vv_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.v.vv) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_v_vv_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.xv) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_xv_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.v.xv) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_v_xv_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.iv) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_iv_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.v.iv) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_v_iv_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.fv) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_fv_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_scalar_float),
+     rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.v.fv) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_v_fv_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_scalar_float),
+     rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.vvv/sf.vc.v.vvv) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_vvv_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.xvv/sf.vc.v.xvv) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_xvv_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.ivv/sf.vc.v.ivv) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_ivv_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_unsigned_vector),
+     rvv_arg_type_info (RVV_BASE_unsigned_vector),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.fvv/sf.vc.v.fvv) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_fvv_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_scalar_float),
+     rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.vvw/sf.vc.v.vvw) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_vvw_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_x2_vector),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.xvw/sf.vc.v.xvw) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_xvw_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_x2_vector),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.ivw/sf.vc.v.ivw) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_ivw_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_x2_vector),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.fvw/sf.vc.v.fvw) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_fvw_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_x2_vector),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_scalar_float),
+     rvv_arg_type_info_end};
+
 /* A list of none preds that will be registered for intrinsic functions.  */
 static CONSTEXPR const predication_type_index none_preds[]
   = {PRED_TYPE_none, NUM_PRED_TYPES};
@@ -3006,6 +3175,174 @@ static CONSTEXPR const rvv_op_info u_vvvv_crypto_sew64_ops
      rvv_arg_type_info (RVV_BASE_vector), /* Return type */
      vvv_args /* Args */};
 
+static CONSTEXPR const rvv_op_info sf_vc_x_ops
+  = {full_v_u_ops,					     /* Types */
+     OP_TYPE_x,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_void), /* Return type */
+     sf_vc_x_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_x_ops
+  = {full_v_u_ops,					     /* Types */
+     OP_TYPE_v_x,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+     sf_vc_v_x_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_i_ops
+  = {full_v_u_ops,					     /* Types */
+     OP_TYPE_i,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_void), /* Return type */
+     sf_vc_i_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_i_ops
+  = {full_v_u_ops,					     /* Types */
+     OP_TYPE_v_i,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+     sf_vc_v_i_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_vv_ops
+  = {full_v_u_ops,					     /* Types */
+     OP_TYPE_vv,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_void), /* Return type */
+     sf_vc_vv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_vv_ops
+  = {full_v_u_ops,					     /* Types */
+     OP_TYPE_v_vv,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+     sf_vc_v_vv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_xv_ops
+  = {full_v_u_ops,					     /* Types */
+     OP_TYPE_xv,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_void), /* Return type */
+     sf_vc_xv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_xv_ops
+  = {full_v_u_ops,					     /* Types */
+     OP_TYPE_v_xv,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+     sf_vc_v_xv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_iv_ops
+  = {full_v_u_ops,					     /* Types */
+     OP_TYPE_iv,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_void), /* Return type */
+     sf_vc_iv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_iv_ops
+  = {full_v_u_ops,					     /* Types */
+     OP_TYPE_v_iv,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+     sf_vc_v_iv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_fv_ops
+  = {wextu_ops,					     /* Types */
+     OP_TYPE_fv,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_void), /* Return type */
+     sf_vc_fv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_fv_ops
+  = {wextu_ops,					     /* Types */
+     OP_TYPE_v_fv,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+     sf_vc_v_fv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_vvv_ops
+  = {full_v_u_ops,					     /* Types */
+     OP_TYPE_vvv,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_void), /* Return type */
+     sf_vc_vvv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_vvv_ops
+  = {full_v_u_ops,					     /* Types */
+     OP_TYPE_v_vvv,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+     sf_vc_vvv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_xvv_ops
+  = {full_v_u_ops,					     /* Types */
+     OP_TYPE_xvv,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_void), /* Return type */
+     sf_vc_xvv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_xvv_ops
+  = {full_v_u_ops,					     /* Types */
+     OP_TYPE_v_xvv,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+     sf_vc_xvv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_ivv_ops
+  = {full_v_u_ops,					     /* Types */
+     OP_TYPE_ivv,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_void), /* Return type */
+     sf_vc_ivv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_ivv_ops
+  = {full_v_u_ops,					     /* Types */
+     OP_TYPE_v_ivv,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+     sf_vc_ivv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_fvv_ops
+  = {wextu_ops,					     /* Types */
+     OP_TYPE_fvv,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_void), /* Return type */
+     sf_vc_fvv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_fvv_ops
+  = {wextu_ops,					     /* Types */
+     OP_TYPE_v_fvv,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+     sf_vc_fvv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_vvw_ops
+  = {x2_u_ops,					     /* Types */
+     OP_TYPE_vvw,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_void), /* Return type */
+     sf_vc_vvw_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_vvw_ops
+  = {x2_u_ops,					     /* Types */
+     OP_TYPE_v_vvw,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_x2_vector), /* Return type */
+     sf_vc_vvw_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_xvw_ops
+  = {x2_u_ops,					     /* Types */
+     OP_TYPE_xvw,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_void), /* Return type */
+     sf_vc_xvw_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_xvw_ops
+  = {x2_u_ops,					     /* Types */
+     OP_TYPE_v_xvw,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_x2_vector), /* Return type */
+     sf_vc_xvw_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_ivw_ops
+  = {x2_u_ops,					     /* Types */
+     OP_TYPE_ivw,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_void), /* Return type */
+     sf_vc_ivw_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_ivw_ops
+  = {x2_u_ops,					     /* Types */
+     OP_TYPE_v_ivw,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_x2_vector), /* Return type */
+     sf_vc_ivw_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_fvw_ops
+  = {x2_wu_ops,					     /* Types */
+     OP_TYPE_fvw,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_void), /* Return type */
+     sf_vc_fvw_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_fvw_ops
+  = {x2_wu_ops,					     /* Types */
+     OP_TYPE_v_fvw,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_x2_vector), /* Return type */
+     sf_vc_fvw_args /* Args */};
+
 /* A list of all RVV base function types.  */
 static CONSTEXPR const function_type_info function_types[] = {
 #define DEF_RVV_TYPE_INDEX(                                                    \
@@ -3022,7 +3359,7 @@ static CONSTEXPR const function_type_info function_types[] = {
   SIGNED_EEW16_LMUL1_INTERPRET, SIGNED_EEW32_LMUL1_INTERPRET,                  \
   SIGNED_EEW64_LMUL1_INTERPRET, UNSIGNED_EEW8_LMUL1_INTERPRET,                 \
   UNSIGNED_EEW16_LMUL1_INTERPRET, UNSIGNED_EEW32_LMUL1_INTERPRET,              \
-  UNSIGNED_EEW64_LMUL1_INTERPRET, X2_VLMUL_EXT, X4_VLMUL_EXT, X8_VLMUL_EXT,    \
+  UNSIGNED_EEW64_LMUL1_INTERPRET, X2, X2_VLMUL_EXT, X4_VLMUL_EXT, X8_VLMUL_EXT,\
   X16_VLMUL_EXT, X32_VLMUL_EXT, X64_VLMUL_EXT, TUPLE_SUBPART)                  \
   {                                                                            \
     VECTOR_TYPE_##VECTOR,                                                      \
@@ -3087,6 +3424,7 @@ static CONSTEXPR const function_type_info function_types[] = {
     VECTOR_TYPE_##UNSIGNED_EEW16_LMUL1_INTERPRET,                              \
     VECTOR_TYPE_##UNSIGNED_EEW32_LMUL1_INTERPRET,                              \
     VECTOR_TYPE_##UNSIGNED_EEW64_LMUL1_INTERPRET,                              \
+    VECTOR_TYPE_##X2,                                                          \
     VECTOR_TYPE_##X2_VLMUL_EXT,                                                \
     VECTOR_TYPE_##X4_VLMUL_EXT,                                                \
     VECTOR_TYPE_##X8_VLMUL_EXT,                                                \
@@ -3504,26 +3842,26 @@ check_required_extensions (const function_instance &instance)
 	required_extensions |= RVV_REQUIRE_RV64BIT;
     }
 
-  uint64_t riscv_isa_flags = 0;
+  uint64_t isa_flags = 0;
 
   if (TARGET_VECTOR_ELEN_BF_16)
-    riscv_isa_flags |= RVV_REQUIRE_ELEN_BF_16;
+    isa_flags |= RVV_REQUIRE_ELEN_BF_16;
   if (TARGET_VECTOR_ELEN_FP_16)
-    riscv_isa_flags |= RVV_REQUIRE_ELEN_FP_16;
+    isa_flags |= RVV_REQUIRE_ELEN_FP_16;
   if (TARGET_VECTOR_ELEN_FP_32)
-    riscv_isa_flags |= RVV_REQUIRE_ELEN_FP_32;
+    isa_flags |= RVV_REQUIRE_ELEN_FP_32;
   if (TARGET_VECTOR_ELEN_FP_64)
-    riscv_isa_flags |= RVV_REQUIRE_ELEN_FP_64;
+    isa_flags |= RVV_REQUIRE_ELEN_FP_64;
   if (TARGET_VECTOR_ELEN_64)
-    riscv_isa_flags |= RVV_REQUIRE_ELEN_64;
+    isa_flags |= RVV_REQUIRE_ELEN_64;
   if (TARGET_64BIT)
-    riscv_isa_flags |= RVV_REQUIRE_RV64BIT;
+    isa_flags |= RVV_REQUIRE_RV64BIT;
   if (TARGET_FULL_V)
-    riscv_isa_flags |= RVV_REQUIRE_FULL_V;
+    isa_flags |= RVV_REQUIRE_FULL_V;
   if (TARGET_MIN_VLEN > 32)
-    riscv_isa_flags |= RVV_REQUIRE_MIN_VLEN_64;
+    isa_flags |= RVV_REQUIRE_MIN_VLEN_64;
 
-  uint64_t missing_extensions = required_extensions & ~riscv_isa_flags;
+  uint64_t missing_extensions = required_extensions & ~isa_flags;
   if (missing_extensions != 0)
     return false;
   return true;
@@ -3600,6 +3938,24 @@ rvv_arg_type_info::get_xfqf_float_type (vector_type_index type_idx) const
     return NULL_TREE;
 }
 
+tree
+rvv_arg_type_info::get_scalar_float_type (vector_type_index type_idx) const
+{
+  /* Convert vint types to their corresponding scalar float types.
+     Note:
+     - According to riscv-vector-builtins-types.def, the index of an unsigned
+       type is always one greater than its corresponding signed type.
+     - Conversion for vint8 types is not required.  */
+  if (type_idx >= VECTOR_TYPE_vint16mf4_t && type_idx <= VECTOR_TYPE_vuint16m8_t)
+    return builtin_types[VECTOR_TYPE_vfloat16m1_t].scalar;
+  else if (type_idx >= VECTOR_TYPE_vint32mf2_t && type_idx <= VECTOR_TYPE_vuint32m8_t)
+    return builtin_types[VECTOR_TYPE_vfloat32m1_t].scalar;
+  else if (type_idx >= VECTOR_TYPE_vint64m1_t && type_idx <= VECTOR_TYPE_vuint64m8_t)
+    return builtin_types[VECTOR_TYPE_vfloat64m1_t].scalar;
+  else
+    return NULL_TREE;
+}
+
 vector_type_index
 rvv_arg_type_info::get_function_type_index (vector_type_index type_idx) const
 {
@@ -3758,7 +4114,7 @@ function_instance::modifies_global_state_p () const
     return true;
 
   /* Handle direct modifications of global state.  */
-  return flags & (CP_WRITE_MEMORY | CP_WRITE_CSR);
+  return flags & (CP_WRITE_MEMORY | CP_WRITE_CSR | CP_USE_COPROCESSORS);
 }
 
 /* Return true if calls to the function could raise a signal.  */
@@ -4397,7 +4753,10 @@ function_expander::use_ternop_insn (bool vd_accum_p, insn_code icode)
 }
 
 /* Implement the call using instruction ICODE, with a 1:1 mapping between
-   arguments and input operands.  */
+   arguments and input operands.
+   There are operands that cannot be broadcast using v[f]mv.  In that case
+   we switch to a strided broadcast.  */
+
 rtx
 function_expander::use_widen_ternop_insn (insn_code icode)
 {
@@ -4438,7 +4797,10 @@ function_expander::use_widen_ternop_insn (insn_code icode)
 }
 
 /* Implement the call using instruction ICODE, with a 1:1 mapping between
-   arguments and input operands.  */
+   arguments and input operands.
+   There are operands that cannot be broadcast using v[f]mv.  In that case
+   we switch to a strided broadcast.  */
+
 rtx
 function_expander::use_scalar_move_insn (insn_code icode)
 {
@@ -4456,6 +4818,37 @@ function_expander::use_scalar_move_insn (insn_code icode)
   for (int argno = arg_offset; argno < call_expr_nargs (exp); argno++)
     add_input_operand (argno);
 
+  if (!can_be_broadcast_p (m_ops[3].value))
+    icode = code_for_pred_strided_broadcast (vector_mode ());
+
+  add_input_operand (Pmode, get_tail_policy_for_pred (pred));
+  add_input_operand (Pmode, get_mask_policy_for_pred (pred));
+  add_input_operand (Pmode, get_avl_type_rtx (avl_type::NONVLMAX));
+  return generate_insn (icode);
+}
+
+/* Implement the call using instruction ICODE, with a 1:1 mapping between
+   arguments and input operands.  */
+rtx
+function_expander::use_scalar_broadcast_insn (insn_code icode)
+{
+  machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
+
+  /* Record the offset to get the argument.  */
+  int arg_offset = 0;
+  add_all_one_mask_operand (mask_mode ());
+
+  if (use_real_merge_p (pred))
+    add_input_operand (arg_offset++);
+  else
+    add_vundef_operand (mode);
+
+  for (int argno = arg_offset; argno < call_expr_nargs (exp); argno++)
+    add_input_operand (argno);
+
+  if (!can_be_broadcast_p (m_ops[3].value))
+    icode = code_for_pred_strided_broadcast (vector_mode ());
+
   add_input_operand (Pmode, get_tail_policy_for_pred (pred));
   add_input_operand (Pmode, get_mask_policy_for_pred (pred));
   add_input_operand (Pmode, get_avl_type_rtx (avl_type::NONVLMAX));
@@ -4621,6 +5014,12 @@ registered_function::overloaded_hash () const
   for (unsigned int i = 0; i < argument_types.length (); i++)
     {
       type = argument_types[i];
+
+      /* If we're passed something entirely unreasonable, just ignore here.
+	 We'll warn later anyway.  */
+      if (TREE_CODE_CLASS (TREE_CODE (type)) != tcc_type)
+	continue;
+
       unsigned_p = POINTER_TYPE_P (type) ? TYPE_UNSIGNED (TREE_TYPE (type))
 					 : TYPE_UNSIGNED (type);
       mode_p = POINTER_TYPE_P (type) ? TYPE_MODE (TREE_TYPE (type))
diff --git a/gcc/config/riscv/riscv-vector-builtins.def b/gcc/config/riscv/riscv-vector-builtins.def
index 3a62869..be3fb1a 100644
--- a/gcc/config/riscv/riscv-vector-builtins.def
+++ b/gcc/config/riscv/riscv-vector-builtins.def
@@ -82,7 +82,7 @@ along with GCC; see the file COPYING3.  If not see
   SIGNED_EEW16_LMUL1_INTERPRET, SIGNED_EEW32_LMUL1_INTERPRET,                  \
   SIGNED_EEW64_LMUL1_INTERPRET, UNSIGNED_EEW8_LMUL1_INTERPRET,                 \
   UNSIGNED_EEW16_LMUL1_INTERPRET, UNSIGNED_EEW32_LMUL1_INTERPRET,              \
-  UNSIGNED_EEW64_LMUL1_INTERPRET, X2_VLMUL_EXT, X4_VLMUL_EXT, X8_VLMUL_EXT,    \
+  UNSIGNED_EEW64_LMUL1_INTERPRET, X2, X2_VLMUL_EXT, X4_VLMUL_EXT, X8_VLMUL_EXT,\
   X16_VLMUL_EXT, X32_VLMUL_EXT, X64_VLMUL_EXT, TUPLE_SUBPART)
 #endif
 
@@ -637,6 +637,32 @@ DEF_RVV_OP_TYPE (xu_w)
 DEF_RVV_OP_TYPE (s)
 DEF_RVV_OP_TYPE (4x8x4)
 DEF_RVV_OP_TYPE (2x8x2)
+DEF_RVV_OP_TYPE (v_x)
+DEF_RVV_OP_TYPE (i)
+DEF_RVV_OP_TYPE (v_i)
+DEF_RVV_OP_TYPE (xv)
+DEF_RVV_OP_TYPE (iv)
+DEF_RVV_OP_TYPE (fv)
+DEF_RVV_OP_TYPE (vvv)
+DEF_RVV_OP_TYPE (xvv)
+DEF_RVV_OP_TYPE (ivv)
+DEF_RVV_OP_TYPE (fvv)
+DEF_RVV_OP_TYPE (vvw)
+DEF_RVV_OP_TYPE (xvw)
+DEF_RVV_OP_TYPE (ivw)
+DEF_RVV_OP_TYPE (fvw)
+DEF_RVV_OP_TYPE (v_vv)
+DEF_RVV_OP_TYPE (v_xv)
+DEF_RVV_OP_TYPE (v_iv)
+DEF_RVV_OP_TYPE (v_fv)
+DEF_RVV_OP_TYPE (v_vvv)
+DEF_RVV_OP_TYPE (v_xvv)
+DEF_RVV_OP_TYPE (v_ivv)
+DEF_RVV_OP_TYPE (v_fvv)
+DEF_RVV_OP_TYPE (v_vvw)
+DEF_RVV_OP_TYPE (v_xvw)
+DEF_RVV_OP_TYPE (v_ivw)
+DEF_RVV_OP_TYPE (v_fvw)
 
 DEF_RVV_PRED_TYPE (ta)
 DEF_RVV_PRED_TYPE (tu)
@@ -720,6 +746,7 @@ DEF_RVV_BASE_TYPE (unsigned_eew8_lmul1_interpret, get_vector_type (type_idx))
 DEF_RVV_BASE_TYPE (unsigned_eew16_lmul1_interpret, get_vector_type (type_idx))
 DEF_RVV_BASE_TYPE (unsigned_eew32_lmul1_interpret, get_vector_type (type_idx))
 DEF_RVV_BASE_TYPE (unsigned_eew64_lmul1_interpret, get_vector_type (type_idx))
+DEF_RVV_BASE_TYPE (x2_vector, get_vector_type (type_idx))
 DEF_RVV_BASE_TYPE (vlmul_ext_x2, get_vector_type (type_idx))
 DEF_RVV_BASE_TYPE (vlmul_ext_x4, get_vector_type (type_idx))
 DEF_RVV_BASE_TYPE (vlmul_ext_x8, get_vector_type (type_idx))
@@ -729,6 +756,7 @@ DEF_RVV_BASE_TYPE (vlmul_ext_x64, get_vector_type (type_idx))
 DEF_RVV_BASE_TYPE (size_ptr, build_pointer_type (size_type_node))
 DEF_RVV_BASE_TYPE (tuple_subpart, get_tuple_subpart_type (type_idx))
 DEF_RVV_BASE_TYPE (xfqf_float, get_xfqf_float_type (type_idx))
+DEF_RVV_BASE_TYPE (scalar_float, get_scalar_float_type (type_idx))
 
 DEF_RVV_VXRM_ENUM (RNU, VXRM_RNU)
 DEF_RVV_VXRM_ENUM (RNE, VXRM_RNE)
diff --git a/gcc/config/riscv/riscv-vector-builtins.h b/gcc/config/riscv/riscv-vector-builtins.h
index 42ba905..86d8115 100644
--- a/gcc/config/riscv/riscv-vector-builtins.h
+++ b/gcc/config/riscv/riscv-vector-builtins.h
@@ -130,6 +130,7 @@ enum required_ext
   XSFVQMACCQOQ_EXT,	/* XSFVQMACCQOQ extension */
   XSFVQMACCDOD_EXT,	/* XSFVQMACCDOD extension */
   XSFVFNRCLIPXFQF_EXT,	/* XSFVFNRCLIPXFQF extension */
+  XSFVCP_EXT, /* XSFVCP extension*/
   /* Please update below to isa_name func when add or remove enum type(s).  */
 };
 
@@ -169,6 +170,8 @@ static inline const char * required_ext_to_isa_name (enum required_ext required)
       return "xsfvqmaccdod";
     case XSFVFNRCLIPXFQF_EXT:
       return "xsfvfnrclipxfqf";
+    case XSFVCP_EXT:
+      return "xsfvcp";
     default:
       gcc_unreachable ();
   }
@@ -212,6 +215,8 @@ static inline bool required_extensions_specified (enum required_ext required)
       return TARGET_XSFVQMACCDOD;
     case XSFVFNRCLIPXFQF_EXT:
       return TARGET_XSFVFNRCLIPXFQF;
+    case XSFVCP_EXT:
+      return TARGET_XSFVCP;
     default:
       gcc_unreachable ();
   }
@@ -297,6 +302,7 @@ struct rvv_arg_type_info
   tree get_tree_type (vector_type_index) const;
   tree get_tuple_subpart_type (vector_type_index) const;
   tree get_xfqf_float_type (vector_type_index) const;
+  tree get_scalar_float_type (vector_type_index) const;
 };
 
 /* Static information for each operand.  */
@@ -325,43 +331,7 @@ struct function_group_info
   /* Return true if required extension is enabled */
   bool match (required_ext ext_value) const
   {
-    switch (ext_value)
-    {
-      case VECTOR_EXT:
-        return TARGET_VECTOR;
-      case ZVBB_EXT:
-        return TARGET_ZVBB;
-      case ZVBB_OR_ZVKB_EXT:
-        return (TARGET_ZVBB || TARGET_ZVKB);
-      case ZVBC_EXT:
-        return TARGET_ZVBC;
-      case ZVKG_EXT:
-        return TARGET_ZVKG;
-      case ZVKNED_EXT:
-        return TARGET_ZVKNED;
-      case ZVKNHA_OR_ZVKNHB_EXT:
-        return (TARGET_ZVKNHA || TARGET_ZVKNHB);
-      case ZVKNHB_EXT:
-        return TARGET_ZVKNHB;
-      case ZVKSED_EXT:
-        return TARGET_ZVKSED;
-      case ZVKSH_EXT:
-        return TARGET_ZVKSH;
-      case XTHEADVECTOR_EXT:
-	return TARGET_XTHEADVECTOR;
-      case ZVFBFMIN_EXT:
-	return TARGET_ZVFBFMIN;
-      case ZVFBFWMA_EXT:
-	return TARGET_ZVFBFWMA;
-      case XSFVQMACCQOQ_EXT:
-	return TARGET_XSFVQMACCQOQ;
-      case XSFVQMACCDOD_EXT:
-	return TARGET_XSFVQMACCDOD;
-      case XSFVFNRCLIPXFQF_EXT:
-	return TARGET_XSFVFNRCLIPXFQF;
-      default:
-        gcc_unreachable ();
-    }
+    return required_extensions_specified (ext_value);
   }
   /* The base name, as a string.  */
   const char *base_name;
@@ -527,6 +497,7 @@ public:
   rtx use_ternop_insn (bool, insn_code);
   rtx use_widen_ternop_insn (insn_code);
   rtx use_scalar_move_insn (insn_code);
+  rtx use_scalar_broadcast_insn (insn_code);
   rtx generate_insn (insn_code);
 
   /* The function call expression.  */
diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc
index 167375c..5e6cb67 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -178,8 +178,8 @@ get_live_range (hash_map<tree, pair> *live_ranges, tree arg)
        STMT 5 (be vectorized)      -- point 2
        ...
 */
-static void
-compute_local_program_points (
+void
+costs::compute_local_program_points (
   vec_info *vinfo,
   hash_map<basic_block, vec<stmt_point>> &program_points_per_bb)
 {
@@ -205,9 +205,7 @@ compute_local_program_points (
 	      if (!is_gimple_assign_or_call (gsi_stmt (si)))
 		continue;
 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
-	      enum stmt_vec_info_type type
-		= STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info));
-	      if (type != undef_vec_info_type)
+	      if (STMT_VINFO_RELEVANT_P (stmt_info))
 		{
 		  stmt_point info = {point, gsi_stmt (si), stmt_info};
 		  program_points.safe_push (info);
@@ -276,14 +274,14 @@ loop_invariant_op_p (class loop *loop,
 
 /* Return true if the variable should be counted into liveness.  */
 static bool
-variable_vectorized_p (class loop *loop, stmt_vec_info stmt_info, tree var,
-		       bool lhs_p)
+variable_vectorized_p (class loop *loop, stmt_vec_info stmt_info,
+		       slp_tree node, tree var, bool lhs_p)
 {
   if (!var)
     return false;
   gimple *stmt = STMT_VINFO_STMT (stmt_info);
-  enum stmt_vec_info_type type
-    = STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info));
+  stmt_info = vect_stmt_to_vectorize (stmt_info);
+  enum stmt_vec_info_type type = SLP_TREE_TYPE (node);
   if (is_gimple_call (stmt) && gimple_call_internal_p (stmt))
     {
       if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE
@@ -359,8 +357,8 @@ variable_vectorized_p (class loop *loop, stmt_vec_info stmt_info, tree var,
 
    The live range of SSA 1 is [1, 3] in bb 2.
    The live range of SSA 2 is [0, 4] in bb 3.  */
-static machine_mode
-compute_local_live_ranges (
+machine_mode
+costs::compute_local_live_ranges (
   loop_vec_info loop_vinfo,
   const hash_map<basic_block, vec<stmt_point>> &program_points_per_bb,
   hash_map<basic_block, hash_map<tree, pair>> &live_ranges_per_bb)
@@ -390,8 +388,11 @@ compute_local_live_ranges (
 	      unsigned int point = program_point.point;
 	      gimple *stmt = program_point.stmt;
 	      tree lhs = gimple_get_lhs (stmt);
-	      if (variable_vectorized_p (loop, program_point.stmt_info, lhs,
-					 true))
+	      slp_tree *node = vinfo_slp_map.get (program_point.stmt_info);
+	      if (!node)
+		continue;
+	      if (variable_vectorized_p (loop, program_point.stmt_info,
+					 *node, lhs, true))
 		{
 		  biggest_mode = get_biggest_mode (biggest_mode,
 						   TYPE_MODE (TREE_TYPE (lhs)));
@@ -399,7 +400,7 @@ compute_local_live_ranges (
 		  pair &live_range
 		    = live_ranges->get_or_insert (lhs, &existed_p);
 		  gcc_assert (!existed_p);
-		  if (STMT_VINFO_MEMORY_ACCESS_TYPE (program_point.stmt_info)
+		  if (SLP_TREE_MEMORY_ACCESS_TYPE (*node)
 		      == VMAT_LOAD_STORE_LANES)
 		    point = get_first_lane_point (program_points,
 						  program_point.stmt_info);
@@ -408,8 +409,8 @@ compute_local_live_ranges (
 	      for (i = 0; i < gimple_num_args (stmt); i++)
 		{
 		  tree var = gimple_arg (stmt, i);
-		  if (variable_vectorized_p (loop, program_point.stmt_info, var,
-					     false))
+		  if (variable_vectorized_p (loop, program_point.stmt_info,
+					     *node, var, false))
 		    {
 		      biggest_mode
 			= get_biggest_mode (biggest_mode,
@@ -417,8 +418,7 @@ compute_local_live_ranges (
 		      bool existed_p = false;
 		      pair &live_range
 			= live_ranges->get_or_insert (var, &existed_p);
-		      if (STMT_VINFO_MEMORY_ACCESS_TYPE (
-			    program_point.stmt_info)
+		      if (SLP_TREE_MEMORY_ACCESS_TYPE (*node)
 			  == VMAT_LOAD_STORE_LANES)
 			point = get_last_lane_point (program_points,
 						     program_point.stmt_info);
@@ -599,15 +599,15 @@ get_store_value (gimple *stmt)
 }
 
 /* Return true if additional vector vars needed.  */
-static bool
-need_additional_vector_vars_p (stmt_vec_info stmt_info)
+bool
+costs::need_additional_vector_vars_p (stmt_vec_info stmt_info,
+				      slp_tree node)
 {
-  enum stmt_vec_info_type type
-    = STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info));
+  enum stmt_vec_info_type type = SLP_TREE_TYPE (node);
   if (type == load_vec_info_type || type == store_vec_info_type)
     {
       if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
-	  && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER)
+	  && mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node)))
 	return true;
 
       machine_mode mode = TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info));
@@ -626,7 +626,7 @@ compute_estimated_lmul (loop_vec_info loop_vinfo, machine_mode mode)
   int regno_alignment = riscv_get_v_regno_alignment (loop_vinfo->vector_mode);
   if (riscv_v_ext_vls_mode_p (loop_vinfo->vector_mode))
     return regno_alignment;
-  else if (known_eq (LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo), 1U))
+  else
     {
       int estimated_vf = vect_vf_for_cost (loop_vinfo);
       int estimated_lmul = estimated_vf * GET_MODE_BITSIZE (mode).to_constant ()
@@ -636,25 +636,6 @@ compute_estimated_lmul (loop_vec_info loop_vinfo, machine_mode mode)
       else
 	return estimated_lmul;
     }
-  else
-    {
-      /* Estimate the VLA SLP LMUL.  */
-      if (regno_alignment > RVV_M1)
-	return regno_alignment;
-      else if (mode != QImode
-	       || LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo).is_constant ())
-	{
-	  int ratio;
-	  if (can_div_trunc_p (BYTES_PER_RISCV_VECTOR,
-			       GET_MODE_SIZE (loop_vinfo->vector_mode), &ratio))
-	    {
-	      if (ratio == 1)
-		return RVV_M4;
-	      else if (ratio == 2)
-		return RVV_M2;
-	    }
-	}
-    }
   return 0;
 }
 
@@ -678,8 +659,8 @@ compute_estimated_lmul (loop_vec_info loop_vinfo, machine_mode mode)
 
    Then, after this function, we update SSA 1 live range in bb 2
    into [2, 4] since SSA 1 is live out into bb 3.  */
-static void
-update_local_live_ranges (
+void
+costs::update_local_live_ranges (
   vec_info *vinfo,
   hash_map<basic_block, vec<stmt_point>> &program_points_per_bb,
   hash_map<basic_block, hash_map<tree, pair>> &live_ranges_per_bb,
@@ -706,8 +687,13 @@ update_local_live_ranges (
 	{
 	  gphi *phi = psi.phi ();
 	  stmt_vec_info stmt_info = vinfo->lookup_stmt (phi);
-	  if (STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info))
-	      == undef_vec_info_type)
+	  stmt_info = vect_stmt_to_vectorize (stmt_info);
+	  slp_tree *node = vinfo_slp_map.get (stmt_info);
+
+	  if (!node)
+	    continue;
+
+	  if (SLP_TREE_TYPE (*node) == undef_vec_info_type)
 	    continue;
 
 	  for (j = 0; j < gimple_phi_num_args (phi); j++)
@@ -782,9 +768,12 @@ update_local_live_ranges (
 	  if (!is_gimple_assign_or_call (gsi_stmt (si)))
 	    continue;
 	  stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
-	  enum stmt_vec_info_type type
-	    = STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info));
-	  if (need_additional_vector_vars_p (stmt_info))
+	  stmt_info = vect_stmt_to_vectorize (stmt_info);
+	  slp_tree *node = vinfo_slp_map.get (stmt_info);
+	  if (!node)
+	    continue;
+	  enum stmt_vec_info_type type = SLP_TREE_TYPE (*node);
+	  if (need_additional_vector_vars_p (stmt_info, *node))
 	    {
 	      /* For non-adjacent load/store STMT, we will potentially
 		 convert it into:
@@ -837,8 +826,8 @@ update_local_live_ranges (
 }
 
 /* Compute the maximum live V_REGS.  */
-static bool
-has_unexpected_spills_p (loop_vec_info loop_vinfo)
+bool
+costs::has_unexpected_spills_p (loop_vec_info loop_vinfo)
 {
   /* Compute local program points.
      It's a fast and effective computation.  */
@@ -920,7 +909,11 @@ costs::analyze_loop_vinfo (loop_vec_info loop_vinfo)
   /* Detect whether we're vectorizing for VLA and should apply the unrolling
      heuristic described above m_unrolled_vls_niters.  */
   record_potential_vls_unrolling (loop_vinfo);
+}
 
+void
+costs::record_lmul_spills (loop_vec_info loop_vinfo)
+{
   /* Detect whether the LOOP has unexpected spills.  */
   record_potential_unexpected_spills (loop_vinfo);
 }
@@ -1092,7 +1085,7 @@ costs::better_main_loop_than_p (const vector_costs *uncast_other) const
    load/store.  */
 static int
 segment_loadstore_group_size (enum vect_cost_for_stmt kind,
-			      stmt_vec_info stmt_info)
+			      stmt_vec_info stmt_info, slp_tree node)
 {
   if (stmt_info
       && (kind == vector_load || kind == vector_store)
@@ -1100,7 +1093,7 @@ segment_loadstore_group_size (enum vect_cost_for_stmt kind,
     {
       stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
       if (stmt_info
-	  && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_LOAD_STORE_LANES)
+	  && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_LOAD_STORE_LANES)
 	return DR_GROUP_SIZE (stmt_info);
     }
   return 0;
@@ -1114,14 +1107,14 @@ segment_loadstore_group_size (enum vect_cost_for_stmt kind,
 unsigned
 costs::adjust_stmt_cost (enum vect_cost_for_stmt kind, loop_vec_info loop,
 			 stmt_vec_info stmt_info,
-			 slp_tree, tree vectype, int stmt_cost)
+			 slp_tree node, tree vectype, int stmt_cost)
 {
   const cpu_vector_cost *costs = get_vector_costs ();
   switch (kind)
     {
     case scalar_to_vec:
-      stmt_cost += (FLOAT_TYPE_P (vectype) ? costs->regmove->FR2VR
-		    : costs->regmove->GR2VR);
+      stmt_cost
+	+= (FLOAT_TYPE_P (vectype) ? get_fr2vr_cost () : get_gr2vr_cost ());
       break;
     case vec_to_scalar:
       stmt_cost += (FLOAT_TYPE_P (vectype) ? costs->regmove->VR2FR
@@ -1137,7 +1130,8 @@ costs::adjust_stmt_cost (enum vect_cost_for_stmt kind, loop_vec_info loop,
 		 each vector in the group.  Here we additionally add permute
 		 costs for each.  */
 	      /* TODO: Indexed and ordered/unordered cost.  */
-	      int group_size = segment_loadstore_group_size (kind, stmt_info);
+	      int group_size = segment_loadstore_group_size (kind, stmt_info,
+							     node);
 	      if (group_size > 1)
 		{
 		  switch (group_size)
@@ -1260,8 +1254,12 @@ costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
   int stmt_cost
     = targetm.vectorize.builtin_vectorization_cost (kind, vectype, misalign);
 
+  if (stmt_info && node)
+    vinfo_slp_map.put (stmt_info, node);
+
   /* Do one-time initialization based on the vinfo.  */
   loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo);
+
   if (!m_analyzed_vinfo)
     {
       if (loop_vinfo)
@@ -1347,6 +1345,8 @@ costs::finish_cost (const vector_costs *scalar_costs)
 {
   if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo))
     {
+      record_lmul_spills (loop_vinfo);
+
       adjust_vect_cost_per_loop (loop_vinfo);
     }
   vector_costs::finish_cost (scalar_costs);
diff --git a/gcc/config/riscv/riscv-vector-costs.h b/gcc/config/riscv/riscv-vector-costs.h
index de546a6..b84ceb1 100644
--- a/gcc/config/riscv/riscv-vector-costs.h
+++ b/gcc/config/riscv/riscv-vector-costs.h
@@ -91,7 +91,10 @@ private:
   typedef pair_hash <tree_operand_hash, tree_operand_hash> tree_pair_hash;
   hash_set <tree_pair_hash> memrefs;
 
+  hash_map <stmt_vec_info, slp_tree> vinfo_slp_map;
+
   void analyze_loop_vinfo (loop_vec_info);
+  void record_lmul_spills (loop_vec_info loop_vinfo);
   void record_potential_vls_unrolling (loop_vec_info);
   bool prefer_unrolled_loop () const;
 
@@ -103,6 +106,19 @@ private:
   bool m_has_unexpected_spills_p = false;
   void record_potential_unexpected_spills (loop_vec_info);
 
+  void compute_local_program_points (vec_info *,
+				     hash_map<basic_block, vec<stmt_point>> &);
+  void update_local_live_ranges (vec_info *,
+				 hash_map<basic_block, vec<stmt_point>> &,
+				 hash_map<basic_block, hash_map<tree, pair>> &,
+				 machine_mode *);
+  machine_mode compute_local_live_ranges
+    (loop_vec_info, const hash_map<basic_block, vec<stmt_point>> &,
+     hash_map<basic_block, hash_map<tree, pair>> &);
+
+  bool has_unexpected_spills_p (loop_vec_info);
+  bool need_additional_vector_vars_p (stmt_vec_info, slp_tree);
+
   void adjust_vect_cost_per_loop (loop_vec_info);
   unsigned adjust_stmt_cost (enum vect_cost_for_stmt kind,
 			     loop_vec_info,
diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index a8c9256..4fe0ae6 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -100,31 +100,28 @@ using namespace riscv_vector;
 static void
 bitmap_union_of_preds_with_entry (sbitmap dst, sbitmap *src, basic_block b)
 {
-  unsigned int set_size = dst->size;
-  edge e;
-  unsigned ix;
-
-  for (ix = 0; ix < EDGE_COUNT (b->preds); ix++)
+  /* Handle case with no predecessors (including ENTRY block).  */
+  if (EDGE_COUNT (b->preds) == 0)
     {
-      e = EDGE_PRED (b, ix);
-      bitmap_copy (dst, src[e->src->index]);
-      break;
+      bitmap_clear (dst);
+      return;
     }
 
-  if (ix == EDGE_COUNT (b->preds))
-    bitmap_clear (dst);
-  else
-    for (ix++; ix < EDGE_COUNT (b->preds); ix++)
-      {
-	unsigned int i;
-	SBITMAP_ELT_TYPE *p, *r;
-
-	e = EDGE_PRED (b, ix);
-	p = src[e->src->index]->elms;
-	r = dst->elms;
-	for (i = 0; i < set_size; i++)
-	  *r++ |= *p++;
-      }
+  edge e;
+  edge_iterator ei;
+  /* Union remaining predecessors' bitmaps.  */
+  FOR_EACH_EDGE (e, ei, b->preds)
+    {
+      /* Initialize with first predecessor's bitmap.  */
+      if (ei.index == 0)
+	{
+	  bitmap_copy (dst, src[e->src->index]);
+	  continue;
+	}
+
+      /* Perform bitmap OR operation element-wise.  */
+      bitmap_ior (dst, dst, src[e->src->index]);
+    }
 }
 
 /* Compute the reaching definition in and out based on the gen and KILL
@@ -3419,8 +3416,7 @@ pre_vsetvl::emit_vsetvl ()
 	    }
 	  start_sequence ();
 	  insert_vsetvl_insn (EMIT_DIRECT, footer_info);
-	  rtx_insn *rinsn = get_insns ();
-	  end_sequence ();
+	  rtx_insn *rinsn = end_sequence ();
 	  default_rtl_profile ();
 	  insert_insn_on_edge (rinsn, eg);
 	  need_commit = true;
@@ -3451,8 +3447,7 @@ pre_vsetvl::emit_vsetvl ()
       start_sequence ();
 
       insert_vsetvl_insn (EMIT_DIRECT, info);
-      rtx_insn *rinsn = get_insns ();
-      end_sequence ();
+      rtx_insn *rinsn = end_sequence ();
       default_rtl_profile ();
 
       /* We should not get an abnormal edge here.  */
diff --git a/gcc/config/riscv/riscv-vsetvl.def b/gcc/config/riscv/riscv-vsetvl.def
index d7a5ada..0f999d2 100644
--- a/gcc/config/riscv/riscv-vsetvl.def
+++ b/gcc/config/riscv/riscv-vsetvl.def
@@ -79,7 +79,7 @@ DEF_SEW_LMUL_RULE (sew_only, sew_only, sew_only, sew_eq_p, sew_eq_p, nop)
 DEF_SEW_LMUL_RULE (sew_only, ge_sew, sew_only,
 		   sew_ge_and_prev_sew_le_next_max_sew_p, sew_ge_p, nop)
 DEF_SEW_LMUL_RULE (
-  sew_only, ratio_and_ge_sew, sew_lmul,
+  sew_only, ratio_and_ge_sew, ratio_and_ge_sew,
   sew_ge_and_prev_sew_le_next_max_sew_and_next_ratio_valid_for_prev_sew_p,
   always_false, modify_lmul_with_next_ratio)
 
@@ -104,9 +104,9 @@ DEF_SEW_LMUL_RULE (ratio_and_ge_sew, sew_lmul, sew_lmul,
 DEF_SEW_LMUL_RULE (ratio_and_ge_sew, ratio_only, ratio_and_ge_sew, ratio_eq_p,
 		   ratio_eq_p, use_max_sew_and_lmul_with_prev_ratio)
 DEF_SEW_LMUL_RULE (
-  ratio_and_ge_sew, sew_only, sew_only,
+  ratio_and_ge_sew, sew_only, ratio_and_ge_sew,
   sew_le_and_next_sew_le_prev_max_sew_and_prev_ratio_valid_for_next_sew_p,
-  always_false, use_next_sew_with_prev_ratio)
+  sew_eq_p, use_next_sew_with_prev_ratio)
 DEF_SEW_LMUL_RULE (ratio_and_ge_sew, ge_sew, ratio_and_ge_sew,
 		   max_sew_overlap_and_prev_ratio_valid_for_next_sew_p,
 		   sew_ge_p, use_max_sew_and_lmul_with_prev_ratio)
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index d3656a7..591122f 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -87,6 +87,10 @@ along with GCC; see the file COPYING3.  If not see
 #include "riscv-vector-costs.h"
 #include "riscv-subset.h"
 
+/* Target variants that support full conditional move.  */
+#define	TARGET_COND_MOV						\
+   (TARGET_SFB_ALU || TARGET_XTHEADCONDMOV || TARGET_XMIPSCMOV)
+
 /* True if X is an UNSPEC wrapper around a SYMBOL_REF or LABEL_REF.  */
 #define UNSPEC_ADDRESS_P(X)					\
   (GET_CODE (X) == UNSPEC					\
@@ -107,6 +111,8 @@ along with GCC; see the file COPYING3.  If not see
 /* True the mode switching has static frm, or false.  */
 #define STATIC_FRM_P(c) ((c)->machine->mode_sw_info.static_frm_p)
 
+#define CFUN_IN_CALL(c) ((c)->machine->mode_sw_info.cfun_call)
+
 /* True if we can use the instructions in the XTheadInt extension
    to handle interrupts, or false.  */
 #define TH_INT_INTERRUPT(c)						\
@@ -164,7 +170,7 @@ struct GTY(())  riscv_frame_info {
 };
 
 enum riscv_privilege_levels {
-  UNKNOWN_MODE, USER_MODE, SUPERVISOR_MODE, MACHINE_MODE
+  UNKNOWN_MODE, SUPERVISOR_MODE, MACHINE_MODE, RNMI_MODE
 };
 
 struct GTY(()) mode_switching_info {
@@ -176,10 +182,13 @@ struct GTY(()) mode_switching_info {
      mode instruction in the function or not.  */
   bool static_frm_p;
 
+  bool cfun_call;
+
   mode_switching_info ()
     {
       dynamic_frm = NULL_RTX;
       static_frm_p = false;
+      cfun_call = false;
     }
 };
 
@@ -278,6 +287,10 @@ enum riscv_fusion_pairs
   RISCV_FUSE_AUIPC_LD = (1 << 7),
   RISCV_FUSE_LDPREINCREMENT = (1 << 8),
   RISCV_FUSE_ALIGNED_STD = (1 << 9),
+  RISCV_FUSE_CACHE_ALIGNED_STD = (1 << 10),
+  RISCV_FUSE_BFEXT = (1 << 11),
+  RISCV_FUSE_EXPANDED_LD = (1 << 12),
+  RISCV_FUSE_B_ALUI = (1 << 13),
 };
 
 /* Costs of various operations on the different architectures.  */
@@ -297,6 +310,8 @@ struct riscv_tune_param
   bool vector_unaligned_access;
   bool use_divmod_expansion;
   bool overlap_op_by_pieces;
+  bool use_zero_stride_load;
+  bool speculative_sched_vsetvl;
   unsigned int fusible_ops;
   const struct cpu_vector_cost *vec_costs;
   const char *function_align;
@@ -444,6 +459,30 @@ static const struct cpu_vector_cost generic_vector_cost = {
   &rvv_regmove_vector_cost, /* regmove  */
 };
 
+/* Costs to use when optimizing for generic.  */
+static const struct riscv_tune_param generic_tune_info = {
+  {COSTS_N_INSNS (4), COSTS_N_INSNS (5)},	/* fp_add */
+  {COSTS_N_INSNS (4), COSTS_N_INSNS (5)},	/* fp_mul */
+  {COSTS_N_INSNS (20), COSTS_N_INSNS (20)},	/* fp_div */
+  {COSTS_N_INSNS (4), COSTS_N_INSNS (4)},	/* int_mul */
+  {COSTS_N_INSNS (33), COSTS_N_INSNS (65)},	/* int_div */
+  1,						/* issue_rate */
+  4,						/* branch_cost */
+  5,						/* memory_cost */
+  8,						/* fmv_cost */
+  true,						/* slow_unaligned_access */
+  false,					/* vector_unaligned_access */
+  false,					/* use_divmod_expansion */
+  false,					/* overlap_op_by_pieces */
+  true,						/* use_zero_stride_load */
+  false,					/* speculative_sched_vsetvl */
+  RISCV_FUSE_NOTHING,                           /* fusible_ops */
+  NULL,						/* vector cost */
+  NULL,						/* function_align */
+  NULL,						/* jump_align */
+  NULL,						/* loop_align */
+};
+
 /* Costs to use when optimizing for rocket.  */
 static const struct riscv_tune_param rocket_tune_info = {
   {COSTS_N_INSNS (4), COSTS_N_INSNS (5)},	/* fp_add */
@@ -459,6 +498,8 @@ static const struct riscv_tune_param rocket_tune_info = {
   false,					/* vector_unaligned_access */
   false,					/* use_divmod_expansion */
   false,					/* overlap_op_by_pieces */
+  true,						/* use_zero_stride_load */
+  false,					/* speculative_sched_vsetvl */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
   NULL,						/* vector cost */
   NULL,						/* function_align */
@@ -481,6 +522,8 @@ static const struct riscv_tune_param sifive_7_tune_info = {
   false,					/* vector_unaligned_access */
   false,					/* use_divmod_expansion */
   false,					/* overlap_op_by_pieces */
+  true,						/* use_zero_stride_load */
+  false,					/* speculative_sched_vsetvl */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
   NULL,						/* vector cost */
   NULL,						/* function_align */
@@ -503,6 +546,8 @@ static const struct riscv_tune_param sifive_p400_tune_info = {
   false,					/* vector_unaligned_access */
   false,					/* use_divmod_expansion */
   false,					/* overlap_op_by_pieces */
+  true,						/* use_zero_stride_load */
+  false,					/* speculative_sched_vsetvl */
   RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI,  /* fusible_ops */
   &generic_vector_cost,				/* vector cost */
   NULL,						/* function_align */
@@ -525,6 +570,8 @@ static const struct riscv_tune_param sifive_p600_tune_info = {
   false,					/* vector_unaligned_access */
   false,					/* use_divmod_expansion */
   false,					/* overlap_op_by_pieces */
+  true,						/* use_zero_stride_load */
+  false,					/* speculative_sched_vsetvl */
   RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI,  /* fusible_ops */
   &generic_vector_cost,				/* vector cost */
   NULL,						/* function_align */
@@ -547,6 +594,8 @@ static const struct riscv_tune_param thead_c906_tune_info = {
   false,					/* vector_unaligned_access */
   false,	/* use_divmod_expansion */
   false,					/* overlap_op_by_pieces */
+  true,						/* use_zero_stride_load */
+  false,					/* speculative_sched_vsetvl */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
   NULL,						/* vector cost */
   NULL,						/* function_align */
@@ -569,6 +618,8 @@ static const struct riscv_tune_param xiangshan_nanhu_tune_info = {
   false,					/* vector_unaligned_access */
   false,					/* use_divmod_expansion */
   false,					/* overlap_op_by_pieces */
+  true,						/* use_zero_stride_load */
+  false,					/* speculative_sched_vsetvl */
   RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH,          /* fusible_ops */
   NULL,						/* vector cost */
   NULL,						/* function_align */
@@ -591,6 +642,8 @@ static const struct riscv_tune_param generic_ooo_tune_info = {
   true,						/* vector_unaligned_access */
   false,					/* use_divmod_expansion */
   true,						/* overlap_op_by_pieces */
+  true,						/* use_zero_stride_load */
+  false,					/* speculative_sched_vsetvl */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
   &generic_vector_cost,				/* vector cost */
   NULL,						/* function_align */
@@ -613,6 +666,8 @@ static const struct riscv_tune_param tt_ascalon_d8_tune_info = {
   true,						/* vector_unaligned_access */
   true,						/* use_divmod_expansion */
   true,						/* overlap_op_by_pieces */
+  true,						/* use_zero_stride_load */
+  false,					/* speculative_sched_vsetvl */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
   &generic_vector_cost,				/* vector cost */
   NULL,						/* function_align */
@@ -635,6 +690,8 @@ static const struct riscv_tune_param optimize_size_tune_info = {
   false,					/* vector_unaligned_access */
   false,					/* use_divmod_expansion */
   false,					/* overlap_op_by_pieces */
+  true,						/* use_zero_stride_load */
+  false,					/* speculative_sched_vsetvl */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
   NULL,						/* vector cost */
   NULL,						/* function_align */
@@ -642,6 +699,30 @@ static const struct riscv_tune_param optimize_size_tune_info = {
   NULL,						/* loop_align */
 };
 
+/* Costs to use when optimizing for MIPS P8700 */
+static const struct riscv_tune_param mips_p8700_tune_info = {
+  {COSTS_N_INSNS (4), COSTS_N_INSNS (4)},	/* fp_add */
+  {COSTS_N_INSNS (5), COSTS_N_INSNS (5)},	/* fp_mul */
+  {COSTS_N_INSNS (17), COSTS_N_INSNS (17)},	/* fp_div */
+  {COSTS_N_INSNS (5), COSTS_N_INSNS (5)},	/* int_mul */
+  {COSTS_N_INSNS (8), COSTS_N_INSNS (8)},	/* int_div */
+  4,            /* issue_rate */
+  8,            /* branch_cost */
+  4,            /* memory_cost */
+  8,            /* fmv_cost */
+  true,         /* slow_unaligned_access */
+  false,        /* vector_unaligned_access */
+  true,         /* use_divmod_expansion */
+  false,        /* overlap_op_by_pieces */
+  true,						/* use_zero_stride_load */
+  false,					/* speculative_sched_vsetvl */
+  RISCV_FUSE_NOTHING,				/* fusible_ops */
+  NULL,         /* vector cost */
+  NULL,         /* function_align */
+  NULL,         /* jump_align */
+  NULL,         /* loop_align */
+};
+
 static bool riscv_avoid_shrink_wrapping_separate ();
 static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 static tree riscv_handle_type_attribute (tree *, tree, tree, int, bool *);
@@ -776,6 +857,16 @@ void riscv_frame_info::reset(void)
   arg_pointer_offset = 0;
 }
 
+/* Check if the mode is twice the size of the XLEN mode.  */
+
+static bool
+riscv_2x_xlen_mode_p (machine_mode mode)
+{
+  poly_int64 mode_size = GET_MODE_SIZE (mode);
+  return mode_size.is_constant ()
+	 && (mode_size.to_constant () == UNITS_PER_WORD * 2);
+}
+
 /* Implement TARGET_MIN_ARITHMETIC_PRECISION.  */
 
 static unsigned int
@@ -1007,16 +1098,16 @@ riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
 	  /* Now iterate over the bits we want to clear until the cost is
 	     too high or we're done.  */
 	  nval = value ^ HOST_WIDE_INT_C (-1);
-	  nval &= HOST_WIDE_INT_C (~0x7fffffff);
+	  nval &= ~HOST_WIDE_INT_C (0x7fffffff);
 	  while (nval && alt_cost < cost)
 	    {
 	      HOST_WIDE_INT bit = ctz_hwi (nval);
 	      alt_codes[alt_cost].code = AND;
-	      alt_codes[alt_cost].value = ~(1UL << bit);
+	      alt_codes[alt_cost].value = ~(HOST_WIDE_INT_UC (1) << bit);
 	      alt_codes[alt_cost].use_uw = false;
 	      alt_codes[alt_cost].save_temporary = false;
 	      alt_cost++;
-	      nval &= ~(1UL << bit);
+	      nval &= ~(HOST_WIDE_INT_UC (1) << bit);
 	    }
 
 	  if (nval == 0 && alt_cost <= cost)
@@ -2856,9 +2947,7 @@ riscv_call_tls_get_addr (rtx sym, rtx result)
 					 gen_int_mode (RISCV_CC_BASE, SImode)));
   RTL_CONST_CALL_P (insn) = 1;
   use_reg (&CALL_INSN_FUNCTION_USAGE (insn), a0);
-  insn = get_insns ();
-
-  end_sequence ();
+  insn = end_sequence ();
 
   return insn;
 }
@@ -3596,7 +3685,8 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx src)
       /* This test can fail if (for example) we want a HF and Z[v]fh is
 	 not enabled.  In that case we just want to let the standard
 	 expansion path run.  */
-      if (riscv_vector::get_vector_mode (smode, nunits).exists (&vmode))
+      if (riscv_vector::get_vector_mode (smode, nunits).exists (&vmode)
+	  && gen_lowpart_common (vmode, SUBREG_REG (src)))
 	{
 	  rtx v = gen_lowpart (vmode, SUBREG_REG (src));
 	  rtx int_reg = dest;
@@ -3742,6 +3832,24 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx src)
       return true;
     }
 
+  if (TARGET_ZILSD && riscv_2x_xlen_mode_p (mode)
+      && ((REG_P (dest) && MEM_P (src)) || (MEM_P (dest) && REG_P (src)))
+      && can_create_pseudo_p ())
+    {
+      rtx reg = REG_P (dest) ? dest : src;
+      unsigned regno = REGNO (reg);
+      /* ZILSD requires an even-odd register pair, let RA to
+	 fix the constraint if the reg is hard reg and not even reg.  */
+      if ((regno < FIRST_PSEUDO_REGISTER)
+	  && (regno % 2) != 0)
+	{
+	  rtx tmp = gen_reg_rtx (GET_MODE (reg));
+	  emit_move_insn (tmp, src);
+	  emit_move_insn (dest, tmp);
+	  return true;
+	}
+    }
+
   /* RISC-V GCC may generate non-legitimate address due to we provide some
      pattern for optimize access PIC local symbol and it's make GCC generate
      unrecognizable instruction during optimizing.  */
@@ -3812,7 +3920,7 @@ static int
 riscv_binary_cost (rtx x, int single_insns, int double_insns)
 {
   if (!riscv_v_ext_mode_p (GET_MODE (x))
-      && GET_MODE_SIZE (GET_MODE (x)).to_constant () == UNITS_PER_WORD * 2)
+      && riscv_2x_xlen_mode_p (GET_MODE (x)))
     return COSTS_N_INSNS (double_insns);
   return COSTS_N_INSNS (single_insns);
 }
@@ -3863,7 +3971,37 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
      Cost Model need to be well analyzed and supported in the future. */
   if (riscv_v_ext_mode_p (mode))
     {
-      *total = COSTS_N_INSNS (1);
+      int gr2vr_cost = get_gr2vr_cost ();
+      int fr2vr_cost = get_fr2vr_cost ();
+      int scalar2vr_cost = FLOAT_MODE_P (GET_MODE_INNER (mode))
+	? fr2vr_cost : gr2vr_cost;
+
+      switch (outer_code)
+	{
+	case SET:
+	  {
+	    if (GET_CODE (x) == VEC_DUPLICATE)
+	      *total = (scalar2vr_cost + 1) * COSTS_N_INSNS (1);
+	    else
+	      {
+		int vec_dup_count = 0;
+		subrtx_var_iterator::array_type array;
+
+		FOR_EACH_SUBRTX_VAR (iter, array, x, ALL)
+		  if (GET_CODE (*iter) == VEC_DUPLICATE)
+		    vec_dup_count++;
+
+		int total_vec_dup_cost = vec_dup_count * scalar2vr_cost;
+
+		*total = COSTS_N_INSNS (1) * (total_vec_dup_cost + 1);
+	      }
+	  }
+	  break;
+	default:
+	  *total = COSTS_N_INSNS (1);
+	  break;
+	}
+
       return true;
     }
 
@@ -3883,10 +4021,41 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
 	      *total = COSTS_N_INSNS (1);
 	      return true;
 	    }
+
+	  /* Register move for XLEN * 2.  */
+	  if (TARGET_ZILSD
+	      && register_operand (SET_SRC (x), GET_MODE (SET_SRC (x)))
+	      && riscv_2x_xlen_mode_p (mode))
+	    {
+	      /* We still need two instruction for move with ZILSD,
+		 but let minus one cost to let subreg split don't.
+		 TODO: Add riscv_tune_param for this.  */
+	      *total = COSTS_N_INSNS (2) - 1;
+	      return true;
+	    }
+
+	  /* Load for XLEN * 2.  */
+	  if (TARGET_ZILSD && MEM_P (SET_SRC (x))
+	      && riscv_2x_xlen_mode_p (mode))
+	    {
+	      /* TODO: Add riscv_tune_param for this.  */
+	      *total = COSTS_N_INSNS (1);
+	      return true;
+	    }
+
 	  riscv_rtx_costs (SET_SRC (x), mode, SET, opno, total, speed);
 	  return true;
 	}
 
+      /* Store for XLEN * 2.  */
+      if (TARGET_ZILSD && MEM_P (SET_DEST (x)) && REG_P (SET_SRC (x))
+	  && riscv_2x_xlen_mode_p (mode))
+	{
+	  /* TODO: Add riscv_tune_param for this.  */
+	  *total = COSTS_N_INSNS (1);
+	  return true;
+	}
+
       /* Otherwise return FALSE indicating we should recurse into both the
 	 SET_DEST and SET_SRC combining the cost of both.  */
       return false;
@@ -3946,7 +4115,7 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
       return false;
 
     case IF_THEN_ELSE:
-      if ((TARGET_SFB_ALU || TARGET_XTHEADCONDMOV)
+      if (TARGET_COND_MOV
 	  && reg_or_0_operand (XEXP (x, 1), mode)
 	  && sfb_alu_operand (XEXP (x, 2), mode)
 	  && comparison_operator (XEXP (x, 0), VOIDmode))
@@ -4486,16 +4655,14 @@ riscv_noce_conversion_profitable_p (rtx_insn *seq,
 
 	  rtx dest = SET_DEST (x);
 
-	  /* Do something similar for the  moves that are likely to
+	  /* Do something similar for the moves that are likely to
 	     turn into NOP moves by the time the register allocator is
-	     done.  These are also side effects of how our sCC expanders
-	     work.  We'll want to check and update LAST_DEST here too.  */
-	  if (last_dest
-	      && REG_P (dest)
+	     done.  We don't require src to be something set in this
+	     sequence, just a promoted SUBREG.  */
+	  if (REG_P (dest)
 	      && GET_MODE (dest) == SImode
 	      && SUBREG_P (src)
-	      && SUBREG_PROMOTED_VAR_P (src)
-	      && REGNO (SUBREG_REG (src)) == REGNO (last_dest))
+	      && SUBREG_PROMOTED_VAR_P (src))
 	    {
 	      riscv_if_info.original_cost += COSTS_N_INSNS (1);
 	      riscv_if_info.max_seq_cost += COSTS_N_INSNS (1);
@@ -4544,6 +4711,19 @@ riscv_split_64bit_move_p (rtx dest, rtx src)
   if (TARGET_64BIT)
     return false;
 
+  /* Zilsd provides load/store with even-odd register pair. */
+  if (TARGET_ZILSD
+      && (((REG_P (dest) && MEM_P (src))
+	  || (MEM_P (dest) && REG_P (src)))))
+    {
+      rtx reg = REG_P (dest) ? dest : src;
+      unsigned regno = REGNO (reg);
+      /* GCC may still generating some load/store with odd-even reg pair
+	 because the ABI handling, but that's fine, just split that later.  */
+      if (GP_REG_P (regno))
+	return (regno < FIRST_PSEUDO_REGISTER) && ((regno % 2) != 0);
+    }
+
   /* There is no need to split if the FLI instruction in the `Zfa` extension can be used.  */
   if (satisfies_constraint_zfli (src))
     return false;
@@ -5253,40 +5433,137 @@ riscv_expand_conditional_branch (rtx label, rtx_code code, rtx op0, rtx op1)
   emit_jump_insn (gen_condjump (condition, label));
 }
 
+/* canonicalization of the comparands.  */
+void
+canonicalize_comparands (rtx_code code, rtx *op0, rtx *op1)
+{
+  /* An integer comparison must be comparing WORD_MODE objects.
+     Extend the comparison arguments as necessary.  */
+  if ((INTEGRAL_MODE_P (GET_MODE (*op0)) && GET_MODE (*op0) != word_mode)
+      || (INTEGRAL_MODE_P (GET_MODE (*op1)) && GET_MODE (*op1) != word_mode))
+    riscv_extend_comparands (code, op0, op1);
+
+  /* We might have been handed back a SUBREG.  Just to make things
+     easy, force it into a REG.  */
+  if (!REG_P (*op0) && !CONST_INT_P (*op0) && INTEGRAL_MODE_P (GET_MODE (*op0)))
+    *op0 = force_reg (word_mode, *op0);
+  if (!REG_P (*op1) && !CONST_INT_P (*op1) && INTEGRAL_MODE_P (GET_MODE (*op1)))
+    *op1 = force_reg (word_mode, *op1);
+}
+
+/* Emit target specific conditional move like TARGET_XMIPSCMOV etc.  */
+bool
+riscv_target_conditional_move (rtx dest, rtx op0, rtx op1, rtx_code code,
+				rtx cons, rtx alt)
+{
+  machine_mode dst_mode = GET_MODE (dest);
+  rtx target;
+
+  /* force the operands to the register.  */
+  cons = force_reg (dst_mode, cons);
+  alt = force_reg (dst_mode, alt);
+
+  if (TARGET_XMIPSCMOV)
+    {
+      if (code == EQ || code == NE)
+	{
+	  op0 = riscv_zero_if_equal (op0, op1);
+	  op1 = const0_rtx;
+	}
+      else
+	{
+	  target = gen_reg_rtx (GET_MODE (op0));
+	  riscv_emit_int_order_test (code, 0, target, op0, op1);
+	  op0 = target;
+	  op1 = const0_rtx;
+	  code = NE;
+	}
+      riscv_emit_int_compare (&code, &op0, &op1);
+      rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
+      emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (dst_mode,
+							   cond, cons, alt)));
+      return true;
+    }
+  /* TARGET_SFB_ALU || TARGET_XTHEADCONDMOV.  */
+  else
+   {
+     riscv_emit_int_compare (&code, &op0, &op1, !TARGET_SFB_ALU);
+     rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
+     emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (dst_mode, cond,
+							 cons, alt)));
+     return true;
+   }
+}
+
 /* Emit a cond move: If OP holds, move CONS to DEST; else move ALT to DEST.
    Return 0 if expansion failed.  */
 
 bool
 riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt)
 {
-  machine_mode mode = GET_MODE (dest);
+  machine_mode dst_mode = GET_MODE (dest);
+  machine_mode cond_mode = GET_MODE (dest);
   rtx_code code = GET_CODE (op);
   rtx op0 = XEXP (op, 0);
   rtx op1 = XEXP (op, 1);
 
+  /* General note.  This is called from the conditional move
+     expander.  That simplifies the cases we need to worry about
+     as we know the destination will have the same mode as the
+     true/false arms.  Furthermore we know that mode will be
+     DI/SI for rv64 or SI for rv32.  */
+
+  /* For some tests, we can easily construct a 0, -1 value
+     which can then be used to synthesize more efficient
+     sequences that don't use zicond.  */
+  if ((code == LT || code == GE)
+      && (REG_P (op0) || SUBREG_P (op0))
+      && op1 == CONST0_RTX (GET_MODE (op0)))
+    {
+      /* The code to expand signed division by a power of 2 uses a
+	 conditional add by 2^n-1 idiom.  It can be more efficiently
+	 synthesized without zicond using srai+srli+add.
+
+	 But we don't see the constants here.  Just a conditional move
+	 with registers as the true/false values.  So this is a little
+	 over-aggressive and can result in a few missed if-conversions.  */
+      if ((REG_P (cons) || SUBREG_P (cons))
+	  && (REG_P (alt) || SUBREG_P (alt)))
+	return false;
+
+      /* If one value is a nonzero constant and the other value is
+	 not a constant, then avoid zicond as more efficient sequences
+	 using the splatted sign bit are often possible.  */
+      if (CONST_INT_P (alt)
+	  && alt != CONST0_RTX (dst_mode)
+	  && !CONST_INT_P (cons))
+	return false;
+
+      if (CONST_INT_P (cons)
+	  && cons != CONST0_RTX (dst_mode)
+	  && !CONST_INT_P (alt))
+	return false;
+
+      /* If we need more special cases, add them here.  */
+    }
+
+
   if (((TARGET_ZICOND_LIKE
-	|| (arith_operand (cons, mode) && arith_operand (alt, mode)))
-       && (GET_MODE_CLASS (mode) == MODE_INT))
-      || TARGET_SFB_ALU || TARGET_XTHEADCONDMOV)
+	|| (arith_operand (cons, dst_mode) && arith_operand (alt, dst_mode)))
+       && GET_MODE_CLASS (dst_mode) == MODE_INT
+       && GET_MODE_CLASS (cond_mode) == MODE_INT)
+       || TARGET_COND_MOV)
     {
       machine_mode mode0 = GET_MODE (op0);
-      machine_mode mode1 = GET_MODE (op1);
-
-      /* An integer comparison must be comparing WORD_MODE objects.  We
-	 must enforce that so that we don't strip away a sign_extension
-	 thinking it is unnecessary.  We might consider using
-	 riscv_extend_operands if they are not already properly extended.  */
-      if ((INTEGRAL_MODE_P (mode0) && mode0 != word_mode)
-	  || (INTEGRAL_MODE_P (mode1) && mode1 != word_mode))
-	return false;
 
-      /* In the fallback generic case use MODE rather than WORD_MODE for
-	 the output of the SCC instruction, to match the mode of the NEG
+      canonicalize_comparands (code,&op0,&op1);
+
+      /* In the fallback generic case use DST_MODE rather than WORD_MODE
+	 for the output of the SCC instruction, to match the mode of the NEG
 	 operation below.  The output of SCC is 0 or 1 boolean, so it is
 	 valid for input in any scalar integer mode.  */
-      rtx tmp = gen_reg_rtx ((TARGET_ZICOND_LIKE
-			      || TARGET_SFB_ALU || TARGET_XTHEADCONDMOV)
-			     ? word_mode : mode);
+      rtx tmp = gen_reg_rtx ((TARGET_ZICOND_LIKE || TARGET_COND_MOV)
+			     ? word_mode : dst_mode);
       bool invert = false;
 
       /* Canonicalize the comparison.  It must be an equality comparison
@@ -5315,41 +5592,28 @@ riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt)
 	  else
 	    return false;
 
-	  op = gen_rtx_fmt_ee (invert ? EQ : NE, mode, tmp, const0_rtx);
+	  op = gen_rtx_fmt_ee (invert ? EQ : NE, cond_mode, tmp, const0_rtx);
 
 	  /* We've generated a new comparison.  Update the local variables.  */
 	  code = GET_CODE (op);
 	  op0 = XEXP (op, 0);
 	  op1 = XEXP (op, 1);
 	}
-      else if (!TARGET_ZICOND_LIKE && !TARGET_SFB_ALU && !TARGET_XTHEADCONDMOV)
+      else if (!TARGET_ZICOND_LIKE && !TARGET_COND_MOV)
 	riscv_expand_int_scc (tmp, code, op0, op1, &invert);
 
-      if (TARGET_SFB_ALU || TARGET_XTHEADCONDMOV)
-	{
-	  riscv_emit_int_compare (&code, &op0, &op1, !TARGET_SFB_ALU);
-	  rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
+      if (TARGET_COND_MOV)
+	return riscv_target_conditional_move (dest, op0, op1, code, cons, alt);
 
-	  /* The expander is a bit loose in its specification of the true
-	     arm of the conditional move.  That allows us to support more
-	     cases for extensions which are more general than SFB.  But
-	     does mean we need to force CONS into a register at this point.  */
-	  cons = force_reg (mode, cons);
-	  /* With XTheadCondMov we need to force ALT into a register too.  */
-	  alt = force_reg (mode, alt);
-	  emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cond,
-							      cons, alt)));
-	  return true;
-	}
       else if (!TARGET_ZICOND_LIKE)
 	{
 	  if (invert)
 	    std::swap (cons, alt);
 
-	  rtx reg1 = gen_reg_rtx (mode);
-	  rtx reg2 = gen_reg_rtx (mode);
-	  rtx reg3 = gen_reg_rtx (mode);
-	  rtx reg4 = gen_reg_rtx (mode);
+	  rtx reg1 = gen_reg_rtx (dst_mode);
+	  rtx reg2 = gen_reg_rtx (dst_mode);
+	  rtx reg3 = gen_reg_rtx (dst_mode);
+	  rtx reg4 = gen_reg_rtx (dst_mode);
 
 	  riscv_emit_unary (NEG, reg1, tmp);
 	  riscv_emit_binary (AND, reg2, reg1, cons);
@@ -5359,48 +5623,52 @@ riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt)
 	  return true;
 	}
       /* 0, reg or 0, imm */
-      else if (cons == CONST0_RTX (mode)
-	       && (REG_P (alt)
-		   || (CONST_INT_P (alt) && alt != CONST0_RTX (mode))))
+      else if (cons == CONST0_RTX (dst_mode)
+	       && ((REG_P (alt) || SUBREG_P (alt))
+		   || (CONST_INT_P (alt) && alt != CONST0_RTX (dst_mode))))
 	{
 	  riscv_emit_int_compare (&code, &op0, &op1, true);
 	  rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
-	  alt = force_reg (mode, alt);
+	  alt = force_reg (dst_mode, alt);
 	  emit_insn (gen_rtx_SET (dest,
-				  gen_rtx_IF_THEN_ELSE (mode, cond,
+				  gen_rtx_IF_THEN_ELSE (dst_mode, cond,
 							cons, alt)));
 	  return true;
 	}
       /* imm, imm */
-      else if (CONST_INT_P (cons) && cons != CONST0_RTX (mode)
-	       && CONST_INT_P (alt) && alt != CONST0_RTX (mode))
+      else if (CONST_INT_P (cons) && cons != CONST0_RTX (dst_mode)
+	       && CONST_INT_P (alt) && alt != CONST0_RTX (dst_mode))
 	{
 	  riscv_emit_int_compare (&code, &op0, &op1, true);
 	  rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
 	  HOST_WIDE_INT t = INTVAL (alt) - INTVAL (cons);
-	  alt = force_reg (mode, gen_int_mode (t, mode));
+	  alt = force_reg (dst_mode, gen_int_mode (t, dst_mode));
 	  emit_insn (gen_rtx_SET (dest,
-				  gen_rtx_IF_THEN_ELSE (mode, cond,
-							CONST0_RTX (mode),
+				  gen_rtx_IF_THEN_ELSE (dst_mode, cond,
+							CONST0_RTX (dst_mode),
 							alt)));
 	  /* CONS might not fit into a signed 12 bit immediate suitable
 	     for an addi instruction.  If that's the case, force it
 	     into a register.  */
 	  if (!SMALL_OPERAND (INTVAL (cons)))
-	    cons = force_reg (mode, cons);
+	    cons = force_reg (dst_mode, cons);
 	  riscv_emit_binary (PLUS, dest, dest, cons);
 	  return true;
 	}
       /* imm, reg  */
-      else if (CONST_INT_P (cons) && cons != CONST0_RTX (mode) && REG_P (alt))
+      else if (CONST_INT_P (cons)
+	       && cons != CONST0_RTX (dst_mode)
+	       && (REG_P (alt) || SUBREG_P (alt)))
 	{
 	  /* Optimize for register value of 0.  */
-	  if (code == NE && rtx_equal_p (op0, alt) && op1 == CONST0_RTX (mode))
+	  if (code == NE
+	      && rtx_equal_p (op0, alt)
+	      && op1 == CONST0_RTX (dst_mode))
 	    {
 	      rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
-	      cons = force_reg (mode, cons);
+	      cons = force_reg (dst_mode, cons);
 	      emit_insn (gen_rtx_SET (dest,
-				      gen_rtx_IF_THEN_ELSE (mode, cond,
+				      gen_rtx_IF_THEN_ELSE (dst_mode, cond,
 							    cons, alt)));
 	      return true;
 	    }
@@ -5408,47 +5676,51 @@ riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt)
 	  riscv_emit_int_compare (&code, &op0, &op1, true);
 	  rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
 
-	  rtx temp1 = gen_reg_rtx (mode);
-	  rtx temp2 = gen_int_mode (-1 * INTVAL (cons), mode);
+	  rtx temp1 = gen_reg_rtx (dst_mode);
+	  rtx temp2 = gen_int_mode (-1 * INTVAL (cons), dst_mode);
 
 	  /* TEMP2 and/or CONS might not fit into a signed 12 bit immediate
 	     suitable for an addi instruction.  If that's the case, force it
 	     into a register.  */
 	  if (!SMALL_OPERAND (INTVAL (temp2)))
-	    temp2 = force_reg (mode, temp2);
+	    temp2 = force_reg (dst_mode, temp2);
 	  if (!SMALL_OPERAND (INTVAL (cons)))
-	    cons = force_reg (mode, cons);
+	    cons = force_reg (dst_mode, cons);
 
 	  riscv_emit_binary (PLUS, temp1, alt, temp2);
 	  emit_insn (gen_rtx_SET (dest,
-				  gen_rtx_IF_THEN_ELSE (mode, cond,
-							CONST0_RTX (mode),
+				  gen_rtx_IF_THEN_ELSE (dst_mode, cond,
+							CONST0_RTX (dst_mode),
 							temp1)));
 	  riscv_emit_binary (PLUS, dest, dest, cons);
 	  return true;
 	}
       /* reg, 0 or imm, 0  */
-      else if ((REG_P (cons)
-		|| (CONST_INT_P (cons) && cons != CONST0_RTX (mode)))
-	       && alt == CONST0_RTX (mode))
+      else if (((REG_P (cons) || SUBREG_P (cons))
+		|| (CONST_INT_P (cons) && cons != CONST0_RTX (dst_mode)))
+	       && alt == CONST0_RTX (dst_mode))
 	{
 	  riscv_emit_int_compare (&code, &op0, &op1, true);
 	  rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
-	  cons = force_reg (mode, cons);
-	  emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cond,
+	  cons = force_reg (dst_mode, cons);
+	  emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (dst_mode, cond,
 							      cons, alt)));
 	  return true;
 	}
       /* reg, imm  */
-      else if (REG_P (cons) && CONST_INT_P (alt) && alt != CONST0_RTX (mode))
+      else if ((REG_P (cons) || (SUBREG_P (cons)))
+	       && CONST_INT_P (alt)
+	       && alt != CONST0_RTX (dst_mode))
 	{
 	  /* Optimize for register value of 0.  */
-	  if (code == EQ && rtx_equal_p (op0, cons) && op1 == CONST0_RTX (mode))
+	  if (code == EQ
+	      && rtx_equal_p (op0, cons)
+	      && op1 == CONST0_RTX (dst_mode))
 	    {
 	      rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
-	      alt = force_reg (mode, alt);
+	      alt = force_reg (dst_mode, alt);
 	      emit_insn (gen_rtx_SET (dest,
-				      gen_rtx_IF_THEN_ELSE (mode, cond,
+				      gen_rtx_IF_THEN_ELSE (dst_mode, cond,
 							    cons, alt)));
 	      return true;
 	    }
@@ -5456,53 +5728,54 @@ riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt)
 	  riscv_emit_int_compare (&code, &op0, &op1, true);
 	  rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
 
-	  rtx temp1 = gen_reg_rtx (mode);
-	  rtx temp2 = gen_int_mode (-1 * INTVAL (alt), mode);
+	  rtx temp1 = gen_reg_rtx (dst_mode);
+	  rtx temp2 = gen_int_mode (-1 * INTVAL (alt), dst_mode);
 
 	  /* TEMP2 and/or ALT might not fit into a signed 12 bit immediate
 	     suitable for an addi instruction.  If that's the case, force it
 	     into a register.  */
 	  if (!SMALL_OPERAND (INTVAL (temp2)))
-	    temp2 = force_reg (mode, temp2);
+	    temp2 = force_reg (dst_mode, temp2);
 	  if (!SMALL_OPERAND (INTVAL (alt)))
-	    alt = force_reg (mode, alt);
+	    alt = force_reg (dst_mode, alt);
 
 	  riscv_emit_binary (PLUS, temp1, cons, temp2);
 	  emit_insn (gen_rtx_SET (dest,
-				  gen_rtx_IF_THEN_ELSE (mode, cond,
+				  gen_rtx_IF_THEN_ELSE (dst_mode, cond,
 							temp1,
-							CONST0_RTX (mode))));
+							CONST0_RTX (dst_mode))));
 	  riscv_emit_binary (PLUS, dest, dest, alt);
 	  return true;
 	}
       /* reg, reg  */
-      else if (REG_P (cons) && REG_P (alt))
+      else if ((REG_P (cons) || SUBREG_P (cons))
+	       && (REG_P (alt) || SUBREG_P (alt)))
 	{
 	  if (((code == EQ && rtx_equal_p (cons, op0))
 	       || (code == NE && rtx_equal_p (alt, op0)))
-	      && op1 == CONST0_RTX (mode))
+	      && op1 == CONST0_RTX (dst_mode))
 	    {
 	      rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
-	      alt = force_reg (mode, alt);
+	      alt = force_reg (dst_mode, alt);
 	      emit_insn (gen_rtx_SET (dest,
-				      gen_rtx_IF_THEN_ELSE (mode, cond,
+				      gen_rtx_IF_THEN_ELSE (dst_mode, cond,
 							    cons, alt)));
 	      return true;
 	    }
 
-	  rtx reg1 = gen_reg_rtx (mode);
-	  rtx reg2 = gen_reg_rtx (mode);
+	  rtx reg1 = gen_reg_rtx (dst_mode);
+	  rtx reg2 = gen_reg_rtx (dst_mode);
 	  riscv_emit_int_compare (&code, &op0, &op1, true);
 	  rtx cond1 = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
 	  rtx cond2 = gen_rtx_fmt_ee (code == NE ? EQ : NE,
 				      GET_MODE (op0), op0, op1);
 	  emit_insn (gen_rtx_SET (reg2,
-				  gen_rtx_IF_THEN_ELSE (mode, cond2,
-							CONST0_RTX (mode),
+				  gen_rtx_IF_THEN_ELSE (dst_mode, cond2,
+							CONST0_RTX (dst_mode),
 							cons)));
 	  emit_insn (gen_rtx_SET (reg1,
-				  gen_rtx_IF_THEN_ELSE (mode, cond1,
-							CONST0_RTX (mode),
+				  gen_rtx_IF_THEN_ELSE (dst_mode, cond1,
+							CONST0_RTX (dst_mode),
 							alt)));
 	  riscv_emit_binary (PLUS, dest, reg1, reg2);
 	  return true;
@@ -5853,7 +6126,8 @@ riscv_pass_vls_aggregate_in_gpr (struct riscv_arg_info *info, machine_mode mode,
    For a library call, FNTYPE is 0.  */
 
 void
-riscv_init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype, rtx, tree, int)
+riscv_init_cumulative_args (CUMULATIVE_ARGS *cum, const_tree fntype,
+			    rtx, tree, int)
 {
   memset (cum, 0, sizeof (*cum));
 
@@ -6134,30 +6408,44 @@ riscv_arg_partial_bytes (cumulative_args_t cum,
   return arg.stack_p ? arg.num_gprs * UNITS_PER_WORD : 0;
 }
 
-/* Implement FUNCTION_VALUE and LIBCALL_VALUE.  For normal calls,
-   VALTYPE is the return type and MODE is VOIDmode.  For libcalls,
-   VALTYPE is null and MODE is the mode of the return value.  */
+/* Implements hook TARGET_FUNCTION_VALUE.  */
 
 rtx
-riscv_function_value (const_tree type, const_tree func, machine_mode mode)
+riscv_function_value (const_tree ret_type, const_tree fn_decl_or_type,
+		      bool)
 {
   struct riscv_arg_info info;
   CUMULATIVE_ARGS args;
 
-  if (type)
+  if (fn_decl_or_type)
     {
-      int unsigned_p = TYPE_UNSIGNED (type);
+      const_tree fntype = TREE_CODE (fn_decl_or_type) == FUNCTION_DECL ?
+			    TREE_TYPE (fn_decl_or_type) : fn_decl_or_type;
+      riscv_init_cumulative_args (&args, fntype, NULL_RTX, NULL_TREE, 0);
+    }
+  else
+    memset (&args, 0, sizeof args);
 
-      mode = TYPE_MODE (type);
+  int unsigned_p = TYPE_UNSIGNED (ret_type);
 
-      /* Since TARGET_PROMOTE_FUNCTION_MODE unconditionally promotes,
-	 return values, promote the mode here too.  */
-      mode = promote_function_mode (type, mode, &unsigned_p, func, 1);
-    }
+  machine_mode mode = TYPE_MODE (ret_type);
 
-  memset (&args, 0, sizeof args);
+  /* Since TARGET_PROMOTE_FUNCTION_MODE unconditionally promotes,
+     return values, promote the mode here too.  */
+  mode = promote_function_mode (ret_type, mode, &unsigned_p, fn_decl_or_type, 1);
+
+  return riscv_get_arg_info (&info, &args, mode, ret_type, true, true);
+}
+
+/* Implements hook TARGET_LIBCALL_VALUE.  */
 
-  return riscv_get_arg_info (&info, &args, mode, type, true, true);
+rtx
+riscv_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
+{
+  struct riscv_arg_info info;
+  CUMULATIVE_ARGS args;
+  memset (&args, 0, sizeof args);
+  return riscv_get_arg_info (&info, &args, mode, NULL_TREE, true, true);
 }
 
 /* Implement TARGET_PASS_BY_REFERENCE. */
@@ -6565,12 +6853,18 @@ riscv_handle_type_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
 	    }
 
 	  string = TREE_STRING_POINTER (cst);
-	  if (strcmp (string, "user") && strcmp (string, "supervisor")
-	      && strcmp (string, "machine"))
+	  if (!strcmp (string, "rnmi") && !TARGET_SMRNMI)
+	    {
+	      error ("attribute 'rnmi' requires the Smrnmi ISA extension");
+	      *no_add_attrs = true;
+	    }
+	  else if (strcmp (string, "supervisor")
+		   && strcmp (string, "machine")
+		   && strcmp (string, "rnmi"))
 	    {
 	      warning (OPT_Wattributes,
-		       "argument to %qE attribute is not %<\"user\"%>, %<\"supervisor\"%>, "
-		       "or %<\"machine\"%>", name);
+		       "argument to %qE attribute is not %<\"supervisor\"%>, "
+		       "%<\"machine\"%>, or %<\"rnmi\"%>", name);
 	      *no_add_attrs = true;
 	    }
 	}
@@ -6879,6 +7173,7 @@ riscv_asm_output_opcode (FILE *asm_out_file, const char *p)
    'T'	Print shift-index of inverted single-bit mask OP.
    '~'	Print w if TARGET_64BIT is true; otherwise not print anything.
    'N'  Print register encoding as integer (0-31).
+   'H'  Print the name of the next register for integer.
 
    Note please keep this list and the list in riscv.md in sync.  */
 
@@ -7174,6 +7469,27 @@ riscv_print_operand (FILE *file, rtx op, int letter)
 	asm_fprintf (file, "%u", (regno - offset));
 	break;
       }
+    case 'H':
+      {
+	if (!REG_P (op))
+	  {
+	    output_operand_lossage ("modifier 'H' require register operand");
+	    break;
+	  }
+	if (REGNO (op) > 31)
+	  {
+	    output_operand_lossage ("modifier 'H' is for integer registers only");
+	    break;
+	  }
+	if (REGNO (op) == 31)
+	  {
+	    output_operand_lossage ("modifier 'H' cannot be applied to R31");
+	    break;
+	  }
+
+	fputs (reg_names[REGNO (op) + 1], file);
+	break;
+      }
     default:
       switch (code)
 	{
@@ -7863,11 +8179,9 @@ riscv_can_inline_p (tree caller, tree callee)
   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
   struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
 
-  int isa_flag_mask = riscv_x_target_flags_isa_mask ();
-
-  /* Callee and caller should have the same target options except for ISA.  */
-  int callee_target_flags = callee_opts->x_target_flags & ~isa_flag_mask;
-  int caller_target_flags = caller_opts->x_target_flags & ~isa_flag_mask;
+  /* Callee and caller should have the same target options.  */
+  int callee_target_flags = callee_opts->x_target_flags;
+  int caller_target_flags = caller_opts->x_target_flags;
 
   if (callee_target_flags != caller_target_flags)
     return false;
@@ -8686,12 +9000,20 @@ riscv_allocate_and_probe_stack_space (rtx temp1, HOST_WIDE_INT size)
       temp2 = riscv_force_temporary (temp2, gen_int_mode (rounded_size, Pmode));
       insn = emit_insn (gen_sub3_insn (temp2, stack_pointer_rtx, temp2));
 
+      /* The size does not represent actual stack pointer address shift
+	 from the top of the frame, as it might be lowered before.
+	 To consider the correct SP addresses for the CFA notes, it is needed
+	 to correct them with the initial offset value.  */
+      HOST_WIDE_INT initial_cfa_offset
+	= cfun->machine->frame.total_size.to_constant () - size;
+
       if (!frame_pointer_needed)
 	{
 	  /* We want the CFA independent of the stack pointer for the
 	     duration of the loop.  */
 	  add_reg_note (insn, REG_CFA_DEF_CFA,
-			plus_constant (Pmode, temp1, rounded_size));
+			plus_constant (Pmode, temp2,
+				       initial_cfa_offset + rounded_size));
 	  RTX_FRAME_RELATED_P (insn) = 1;
 	}
 
@@ -8704,7 +9026,8 @@ riscv_allocate_and_probe_stack_space (rtx temp1, HOST_WIDE_INT size)
 	{
 	  insn = get_last_insn ();
 	  add_reg_note (insn, REG_CFA_DEF_CFA,
-			plus_constant (Pmode, stack_pointer_rtx, rounded_size));
+			plus_constant (Pmode, stack_pointer_rtx,
+				       initial_cfa_offset + rounded_size));
 	  RTX_FRAME_RELATED_P (insn) = 1;
 	}
 
@@ -9322,12 +9645,12 @@ riscv_expand_epilogue (int style)
 
       if (th_int_mask && TH_INT_INTERRUPT (cfun))
 	emit_jump_insn (gen_th_int_pop ());
-      else if (mode == MACHINE_MODE)
-	emit_jump_insn (gen_riscv_mret ());
       else if (mode == SUPERVISOR_MODE)
 	emit_jump_insn (gen_riscv_sret ());
-      else
-	emit_jump_insn (gen_riscv_uret ());
+      else if (mode == RNMI_MODE)
+	emit_jump_insn (gen_riscv_mnret ());
+      else /* Must be MACHINE_MODE.  */
+	emit_jump_insn (gen_riscv_mret ());
     }
   else if (style != SIBCALL_RETURN)
     {
@@ -9650,27 +9973,27 @@ int
 riscv_register_move_cost (machine_mode mode,
 			  reg_class_t from, reg_class_t to)
 {
-  bool from_is_fpr = from == FP_REGS || from == RVC_FP_REGS;
-  bool from_is_gpr = from == GR_REGS || from == RVC_GR_REGS;
-  bool to_is_fpr = to == FP_REGS || to == RVC_FP_REGS;
-  bool to_is_gpr = to == GR_REGS || to == RVC_GR_REGS;
+  bool from_is_fpr = reg_class_subset_p (from, FP_REGS);
+  bool from_is_gpr = reg_class_subset_p (from, GR_REGS);
+  bool to_is_fpr = reg_class_subset_p (to, FP_REGS);
+  bool to_is_gpr = reg_class_subset_p (to, GR_REGS);
   if ((from_is_fpr && to_is_gpr) || (from_is_gpr && to_is_fpr))
     return tune_param->fmv_cost;
 
   if (from == V_REGS)
     {
-      if (to == GR_REGS)
+      if (to_is_gpr)
 	return get_vector_costs ()->regmove->VR2GR;
-      else if (to == FP_REGS)
+      else if (to_is_fpr)
 	return get_vector_costs ()->regmove->VR2FR;
     }
 
   if (to == V_REGS)
     {
-      if (from == GR_REGS)
-	return get_vector_costs ()->regmove->GR2VR;
-      else if (from == FP_REGS)
-	return get_vector_costs ()->regmove->FR2VR;
+      if (from_is_gpr)
+	return get_gr2vr_cost ();
+      else if (from_is_fpr)
+	return get_fr2vr_cost ();
     }
 
   return riscv_secondary_memory_needed (mode, from, to) ? 8 : 2;
@@ -9746,6 +10069,10 @@ riscv_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
       if (riscv_v_ext_mode_p (mode))
 	return false;
 
+      /* Zilsd require load/store with even-odd reg pair.  */
+      if (TARGET_ZILSD && riscv_2x_xlen_mode_p (mode) && ((regno % 2) != 0))
+	return false;
+
       if (!GP_REG_P (regno + nregs - 1))
 	return false;
     }
@@ -9898,23 +10225,114 @@ riscv_fusion_enabled_p(enum riscv_fusion_pairs op)
   return tune_param->fusible_ops & op;
 }
 
+/* Matches an add:
+   (set (reg:DI rd) (plus:SI (reg:SI rs1) (reg:SI rs2))) */
+
+static bool
+riscv_set_is_add (rtx set)
+{
+  return (GET_CODE (SET_SRC (set)) == PLUS
+	  && REG_P (XEXP (SET_SRC (set), 0))
+	  && REG_P (XEXP (SET_SRC (set), 1))
+	  && REG_P (SET_DEST (set)));
+}
+
+/* Matches an addi:
+   (set (reg:DI rd) (plus:SI (reg:SI rs1) (const_int imm))) */
+
+static bool
+riscv_set_is_addi (rtx set)
+{
+  return (GET_CODE (SET_SRC (set)) == PLUS
+	  && REG_P (XEXP (SET_SRC (set), 0))
+	  && CONST_INT_P (XEXP (SET_SRC (set), 1))
+	  && REG_P (SET_DEST (set)));
+}
+
+/* Matches an add.uw:
+  (set (reg:DI rd)
+    (plus:DI (zero_extend:DI (reg:SI rs1)) (reg:DI rs2))) */
+
+static bool
+riscv_set_is_adduw (rtx set)
+{
+  return (GET_CODE (SET_SRC (set)) == PLUS
+	  && GET_CODE (XEXP (SET_SRC (set), 0)) == ZERO_EXTEND
+	  && REG_P (XEXP (XEXP (SET_SRC (set), 0), 0))
+	  && REG_P (XEXP (SET_SRC (set), 1))
+	  && REG_P (SET_DEST (set)));
+}
+
+/* Matches a shNadd:
+  (set (reg:DI rd)
+       (plus:DI (ashift:DI (reg:DI rs1) (const_int N)) (reg:DI rS2)) */
+
+static bool
+riscv_set_is_shNadd (rtx set)
+{
+  return (GET_CODE (SET_SRC (set)) == PLUS
+	  && GET_CODE (XEXP (SET_SRC (set), 0)) == ASHIFT
+	  && REG_P (XEXP (XEXP (SET_SRC (set), 0), 0))
+	  && CONST_INT_P (XEXP (XEXP (SET_SRC (set), 0), 1))
+	  && (INTVAL (XEXP (XEXP (SET_SRC (set), 0), 1)) == 1
+	      || INTVAL (XEXP (XEXP (SET_SRC (set), 0), 1)) == 2
+	      || INTVAL (XEXP (XEXP (SET_SRC (set), 0), 1)) == 3)
+	  && REG_P (SET_DEST (set)));
+}
+
+/* Matches a shNadd.uw:
+  (set (reg:DI rd)
+       (plus:DI (and:DI (ashift:DI (reg:DI rs1) (const_int N))
+			(const_int N))
+		(reg:DI rs2)) */
+
+static bool
+riscv_set_is_shNadduw (rtx set)
+{
+  return (GET_CODE (SET_SRC (set)) == PLUS
+	  && GET_CODE (XEXP (SET_SRC (set), 0)) == AND
+	  && GET_CODE (XEXP (XEXP (SET_SRC (set), 0), 0)) == ASHIFT
+	  && REG_P (XEXP (XEXP (XEXP (SET_SRC (set), 0), 0), 0))
+	  && CONST_INT_P (XEXP (XEXP (XEXP (SET_SRC (set), 0), 0), 1))
+	  && (INTVAL (XEXP (XEXP (XEXP (SET_SRC (set), 0), 0), 1)) == 1
+	      || INTVAL (XEXP (XEXP (XEXP (SET_SRC (set), 0), 0), 1)) == 2
+	      || INTVAL (XEXP (XEXP (XEXP (SET_SRC (set), 0), 0), 1)) == 3)
+	  && REG_P (SET_DEST (set)));
+}
+
 /* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P.  Return true if PREV and CURR
    should be kept together during scheduling.  */
 
 static bool
 riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
 {
+  /* If fusion is not enabled, then there's nothing to do.  */
+  if (!riscv_macro_fusion_p ())
+    return false;
+
+  /* If PREV is already marked as fused, then we can't fuse CURR with PREV
+     and if we were to fuse them we'd end up with a blob of insns that
+     essentially are an atomic unit which is bad for scheduling.  */
+  if (SCHED_GROUP_P (prev))
+    return false;
+
   rtx prev_set = single_set (prev);
   rtx curr_set = single_set (curr);
   /* prev and curr are simple SET insns i.e. no flag setting or branching.  */
   bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
+  bool sched1 = can_create_pseudo_p ();
 
-  if (!riscv_macro_fusion_p ())
-    return false;
+  unsigned int prev_dest_regno = (prev_set && REG_P (SET_DEST (prev_set))
+				  ? REGNO (SET_DEST (prev_set))
+				  : FIRST_PSEUDO_REGISTER);
+  unsigned int curr_dest_regno = (curr_set && REG_P (SET_DEST (curr_set))
+				  ? REGNO (SET_DEST (curr_set))
+				  : FIRST_PSEUDO_REGISTER);
 
   if (simple_sets_p
       && (riscv_fusion_enabled_p (RISCV_FUSE_ZEXTW)
-	  || riscv_fusion_enabled_p (RISCV_FUSE_ZEXTWS)))
+	  || riscv_fusion_enabled_p (RISCV_FUSE_ZEXTWS))
+      && (sched1 || prev_dest_regno == curr_dest_regno))
     {
       /* We are trying to match the following:
 	   prev (slli) == (set (reg:DI rD)
@@ -9928,19 +10346,23 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
 	  && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
 	  && REG_P (SET_DEST (prev_set))
 	  && REG_P (SET_DEST (curr_set))
-	  && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
-	  && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST (curr_set))
+	  && REGNO (XEXP (SET_SRC (curr_set), 0)) == curr_dest_regno
 	  && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
 	  && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
 	  && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 32
-	  && (( INTVAL (XEXP (SET_SRC (curr_set), 1)) == 32
-		&& riscv_fusion_enabled_p(RISCV_FUSE_ZEXTW) )
-	      || ( INTVAL (XEXP (SET_SRC (curr_set), 1)) < 32
-		   && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTWS))))
-	return true;
+	  && ((INTVAL (XEXP (SET_SRC (curr_set), 1)) == 32
+	       && riscv_fusion_enabled_p (RISCV_FUSE_ZEXTW) )
+	      || (INTVAL (XEXP (SET_SRC (curr_set), 1)) < 32
+		  && riscv_fusion_enabled_p (RISCV_FUSE_ZEXTWS))))
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "RISCV_FUSE_ZEXTWS\n");
+	  return true;
+	}
     }
 
-  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ZEXTH))
+  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ZEXTH)
+      && (sched1 || prev_dest_regno == curr_dest_regno))
     {
       /* We are trying to match the following:
 	   prev (slli) == (set (reg:DI rD)
@@ -9952,16 +10374,20 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
 	  && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
 	  && REG_P (SET_DEST (prev_set))
 	  && REG_P (SET_DEST (curr_set))
-	  && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
-	  && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST (curr_set))
+	  && REGNO (XEXP (SET_SRC (curr_set), 0)) == curr_dest_regno
 	  && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
 	  && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
 	  && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 48
 	  && INTVAL (XEXP (SET_SRC (curr_set), 1)) == 48)
-	return true;
+	{
+	  if (dump_file)
+	    fprintf (dump_file,"RISCV_FUSE_ZEXTH\n");
+	  return true;
+	}
     }
 
-  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDINDEXED))
+  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDINDEXED)
+      && (sched1 || prev_dest_regno == curr_dest_regno))
     {
       /* We are trying to match the following:
 	   prev (add) == (set (reg:DI rD)
@@ -9970,12 +10396,17 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
 			      (mem:DI (reg:DI rD))) */
 
       if (MEM_P (SET_SRC (curr_set))
+	  && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set)))
 	  && REG_P (XEXP (SET_SRC (curr_set), 0))
-	  && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO (SET_DEST (prev_set))
+	  && REGNO (XEXP (SET_SRC (curr_set), 0)) == prev_dest_regno
 	  && GET_CODE (SET_SRC (prev_set)) == PLUS
 	  && REG_P (XEXP (SET_SRC (prev_set), 0))
 	  && REG_P (XEXP (SET_SRC (prev_set), 1)))
-	return true;
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "RISCV_FUSE_LDINDEXED\n");
+	  return true;
+	}
 
       /* We are trying to match the following:
 	   prev (add) == (set (reg:DI rD)
@@ -9985,15 +10416,154 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
       if ((GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND
 	   || (GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND))
 	  && MEM_P (XEXP (SET_SRC (curr_set), 0))
+	  && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set)))
 	  && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
-	  && REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == REGNO (SET_DEST (prev_set))
+	  && REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == prev_dest_regno
 	  && GET_CODE (SET_SRC (prev_set)) == PLUS
 	  && REG_P (XEXP (SET_SRC (prev_set), 0))
 	  && REG_P (XEXP (SET_SRC (prev_set), 1)))
-	return true;
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "RISCV_FUSE_LDINDEXED\n");
+	  return true;
+	}
+    }
+
+  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_EXPANDED_LD)
+      && (sched1 || prev_dest_regno == curr_dest_regno))
+    {
+      /* For the "expanded add/load fusion" family we have 2 main
+	 categories: memory loads with displacement (i.e. with imm offset)
+	 and loads without displacement (i.e. with offset = x0).
+
+	 For loads without displacement we'll need:
+	 - add + ld (done in RISCV_FUSE_LDINDEXED)
+	 - addi + ld (done in RISCV_FUSE_LDPREINCREMENT)
+	 - shNadd + ld
+	 - add.uw + lw
+	 - shNadd.uw + lw
+
+	 For loads with displacement/immediates:
+	 with lw with immediate):
+	 - add + ld with displacement
+	 - addi + ld with displacement
+	 - shNadd + ld with displacement
+	 - add.uw + lw with displacement
+	 - shNadd.uw + lw with displacement */
+
+      /* We're trying to match a curr_set ld with displacement:
+	  prev (add|addi) = (set (reg:DI rd) (...))
+	  curr (ld)  == (set (reg:DI rD)
+		(mem:DI (plus:DI (reg:DI rD) (const_int IMM12)))) */
+      if (MEM_P (SET_SRC (curr_set))
+	  && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set)))
+	  && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS
+	  && REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == prev_dest_regno)
+	{
+	  if (riscv_set_is_add (prev_set))
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "RISCV_FUSE_EXPANDED_LD\n");
+	      return true;
+	    }
+
+	  if (riscv_set_is_addi (prev_set))
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "RISCV_FUSE_EXPANDED_LD\n");
+	      return true;
+	    }
+
+	  if (riscv_set_is_shNadd (prev_set))
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "RISCV_FUSE_EXPANDED_LD\n");
+	      return true;
+	    }
+	}
+
+      /* We're trying to match a ld without displacement:
+	  prev (addi|shNadd) = (reg:DI rD) (...))
+	  curr (ld)  == (set (reg:DI rD)
+			     (mem:DI (reg:DI rD))) */
+      if (MEM_P (SET_SRC (curr_set))
+	  && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set)))
+	  && REG_P (XEXP (SET_SRC (curr_set), 0))
+	  && REGNO (XEXP (SET_SRC (curr_set), 0)) == prev_dest_regno)
+	{
+	  if (riscv_set_is_addi (prev_set))
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "RISCV_FUSE_EXPANDED_LD\n");
+	      return true;
+	    }
+
+	  if (riscv_set_is_shNadd (prev_set))
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "RISCV_FUSE_EXPANDED_LD\n");
+	      return true;
+	    }
+	}
+
+      /* We're trying to match a curr_set lw with displacement:
+	  prev (add.uw|shNadd.uw) = (set (reg:DI rd) (...))
+	  curr (lw)  == (set (reg:DI rd)
+		(any_extend:DI (mem:SUBX (plus:DI ((reg:DI rd)
+						   (const_int IMM)))) */
+      if ((GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND
+	   || (GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND))
+	  && MEM_P (XEXP (SET_SRC (curr_set), 0))
+	  && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set)))
+	  && GET_CODE (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == PLUS
+	  && REG_P (XEXP (XEXP (XEXP (SET_SRC (curr_set), 0), 0),0))
+	  && (REGNO (XEXP (XEXP (XEXP (SET_SRC (curr_set), 0), 0),0))
+	      == prev_dest_regno))
+	{
+	  if (riscv_set_is_adduw (prev_set))
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "RISCV_FUSE_EXPANDED_LD\n");
+	      return true;
+	    }
+
+	  if (riscv_set_is_shNadduw (prev_set))
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "RISCV_FUSE_EXPANDED_LD\n");
+	      return true;
+	    }
+	}
+
+      /* We're trying to match a curr_set lw without displacement:
+	  prev (add.uw|shNadd.uw) = (set (reg:DI rd) (...))
+	  curr (ld|lh|lw)  == (set (reg:DI rd)
+		(any_extend:DI (mem:SUBX (reg:DI rsd)))) */
+      if ((GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND
+	   || (GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND))
+	  && MEM_P (XEXP (SET_SRC (curr_set), 0))
+	  && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set)))
+	  && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
+	  && REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == prev_dest_regno)
+	{
+	  if (riscv_set_is_adduw (prev_set))
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "RISCV_FUSE_EXPANDED_LD\n");
+	      return true;
+	    }
+
+	  if (riscv_set_is_shNadduw (prev_set))
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "RISCV_FUSE_EXPANDED_LD\n");
+	      return true;
+	    }
+	}
     }
 
-    if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDPREINCREMENT))
+  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDPREINCREMENT)
+      && (sched1 || prev_dest_regno == curr_dest_regno))
     {
       /* We are trying to match the following:
 	   prev (add) == (set (reg:DI rS)
@@ -10002,15 +10572,21 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
 			      (mem:DI (reg:DI rS))) */
 
       if (MEM_P (SET_SRC (curr_set))
+	  && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set)))
 	  && REG_P (XEXP (SET_SRC (curr_set), 0))
-	  && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO (SET_DEST (prev_set))
+	  && REGNO (XEXP (SET_SRC (curr_set), 0)) == prev_dest_regno
 	  && GET_CODE (SET_SRC (prev_set)) == PLUS
 	  && REG_P (XEXP (SET_SRC (prev_set), 0))
 	  && CONST_INT_P (XEXP (SET_SRC (prev_set), 1)))
-	return true;
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "RISCV_FUSE_LDPREINCREMENT\n");
+	  return true;
+	}
     }
 
-  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_ADDI))
+  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_ADDI)
+      && (sched1 || prev_dest_regno == curr_dest_regno))
     {
       /* We are trying to match the following:
 	   prev (lui)  == (set (reg:DI rD) (const_int UPPER_IMM_20))
@@ -10024,10 +10600,15 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
 	  && (GET_CODE (SET_SRC (prev_set)) == HIGH
 	      || (CONST_INT_P (SET_SRC (prev_set))
 		  && LUI_OPERAND (INTVAL (SET_SRC (prev_set))))))
-	return true;
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "RISCV_FUSE_LUI_ADDI\n");
+	  return true;
+	}
     }
 
-  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_ADDI))
+  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_ADDI)
+      && (sched1 || prev_dest_regno == curr_dest_regno))
     {
       /* We are trying to match the following:
 	   prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
@@ -10045,38 +10626,64 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
 		  && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
 		  && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1))))))
 
-	return true;
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "RISCV_FUSE_AUIPC_ADDI\n");
+	  return true;
+	}
     }
 
-  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_LD))
+  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_LD)
+      && (sched1 || prev_dest_regno == curr_dest_regno))
     {
       /* We are trying to match the following:
 	   prev (lui)  == (set (reg:DI rD) (const_int UPPER_IMM_20))
 	   curr (ld)  == (set (reg:DI rD)
 			      (mem:DI (plus:DI (reg:DI rD) (const_int IMM12)))) */
 
+      /* A LUI_OPERAND accepts (const_int 0), but we won't emit that as LUI.  So
+	 reject that case explicitly.  */
       if (CONST_INT_P (SET_SRC (prev_set))
+	  && SET_SRC (prev_set) != CONST0_RTX (GET_MODE (SET_DEST (prev_set)))
 	  && LUI_OPERAND (INTVAL (SET_SRC (prev_set)))
 	  && MEM_P (SET_SRC (curr_set))
-	  && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
-	return true;
+	  && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set)))
+	  && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS
+	  && REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == prev_dest_regno)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "RISCV_FUSE_LUI_LD\n");
+	  return true;
+	}
 
       if (GET_CODE (SET_SRC (prev_set)) == HIGH
 	  && MEM_P (SET_SRC (curr_set))
+	  && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set)))
 	  && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == LO_SUM
-	  && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)))
-	return true;
+	  && REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == prev_dest_regno)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "RISCV_FUSE_LUI_LD\n");
+	  return true;
+	}
 
       if (GET_CODE (SET_SRC (prev_set)) == HIGH
 	  && (GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND
 	      || GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND)
 	  && MEM_P (XEXP (SET_SRC (curr_set), 0))
+	  && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set)))
 	  && (GET_CODE (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == LO_SUM
-	      && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (XEXP (XEXP (SET_SRC (curr_set), 0), 0), 0))))
-	return true;
+	      && (REGNO (XEXP (XEXP (XEXP (SET_SRC (curr_set), 0), 0), 0))
+		  == prev_dest_regno)))
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "RISCV_FUSE_LUI_LD\n");
+	  return true;
+	}
     }
 
-  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_LD))
+  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_LD)
+      && (sched1 || prev_dest_regno == curr_dest_regno))
     {
       /* We are trying to match the following:
 	   prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
@@ -10086,11 +10693,16 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
       if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
 	  && XINT (prev_set, 1) == UNSPEC_AUIPC
 	  && MEM_P (SET_SRC (curr_set))
+	  && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set)))
 	  && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
-	return true;
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "RISCV_FUSE_AUIPC_LD\n");
+	  return true;
+	}
     }
 
-  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ALIGNED_STD))
+  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_CACHE_ALIGNED_STD))
     {
       /* We are trying to match the following:
 	   prev (sd) == (set (mem (plus (reg sp|fp) (const_int)))
@@ -10100,6 +10712,7 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
 
       if (MEM_P (SET_DEST (prev_set))
 	  && MEM_P (SET_DEST (curr_set))
+	  && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set)))
 	  /* We can probably relax this condition.  The documentation is a bit
 	     unclear about sub-word cases.  So we just model DImode for now.  */
 	  && GET_MODE (SET_DEST (curr_set)) == DImode
@@ -10110,43 +10723,205 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
 	  extract_base_offset_in_addr (SET_DEST (prev_set), &base_prev, &offset_prev);
 	  extract_base_offset_in_addr (SET_DEST (curr_set), &base_curr, &offset_curr);
 
-	  /* Fail if we did not find both bases.  */
-	  if (base_prev == NULL_RTX || base_curr == NULL_RTX)
-	    return false;
+	  /* Proceed only if we find both bases, both bases are register and
+	     bases are the same register.  */
+	  if (base_prev != NULL_RTX && base_curr != NULL_RTX
+	      && REG_P (base_prev) && REG_P (base_curr)
+	      && REGNO (base_prev) != REGNO (base_curr)
+	      /* The alignment of hte base pointer is more useful than the
+		 alignment of the memory reference for determining if we're
+		 on opposite sides of a cache line.  */
+	      && REGNO_POINTER_ALIGN (ORIGINAL_REGNO (base_prev)) >= 128)
+	    {
+	      /* The two stores must be contained within opposite halves of the
+		 same 16 byte aligned block of memory.  We know the pointer
+		 has suitable alignment, so we just need to check the offsets
+		 of the two stores for suitable alignment.  */
+
+	      /* Get the smaller offset into OFFSET_PREV.  */
+	      if (INTVAL (offset_prev) > INTVAL (offset_curr))
+		std::swap (offset_prev, offset_curr);
+
+	      /* We have a match if the smaller offset (OFFSET_PREV) is 16
+		 byte aligned and the higher offset is 8 bytes more than the
+		 lower offset.  */
+	      if ((INTVAL (offset_prev) % 16) == 0
+		  && (INTVAL (offset_prev) + 8 == INTVAL (offset_curr)))
+		{
+		  if (dump_file)
+		    fprintf (dump_file, "RISCV_FUSE_ALIGNED_STD\n");
+		  return true;
+		}
+	    }
+	}
+    }
 
-	  /* Fail if either base is not a register.  */
-	  if (!REG_P (base_prev) || !REG_P (base_curr))
-	    return false;
+  /* More general form of the RISCV_FUSE_CACHE_ALIGNED_STD.  The
+     major difference is the dependency on the stores being opposite
+     halves of a cache line is dropped.  Instead the lowest address
+     needs 2X the alignment of the object and the higher address
+     immediately followed the first object.  */
+  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ALIGNED_STD))
+    {
+      /* We are trying to match the following:
+	prev (sd) == (set (mem (plus (reg rS1) (const_int)))
+			  (reg rS2))
+	curr (sd) == (set (mem (plus (reg rS1) (const_int)))
+			  (reg rS3)) */
 
-	  /* Fail if the bases are not the same register.  */
-	  if (REGNO (base_prev) != REGNO (base_curr))
-	    return false;
+      if (MEM_P (SET_DEST (prev_set))
+	  && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set)))
+	  && MEM_P (SET_DEST (curr_set))
+	  /* Stores must have the same width */
+	  && GET_MODE (SET_DEST (curr_set)) == GET_MODE (SET_DEST (prev_set)))
+	{
+	  rtx base_prev, base_curr, offset_prev, offset_curr;
+	  unsigned mode_size;
+
+	  extract_base_offset_in_addr (SET_DEST (prev_set),
+				       &base_prev, &offset_prev);
+	  extract_base_offset_in_addr (SET_DEST (curr_set),
+				       &base_curr, &offset_curr);
+
+	  /* Proceed only if we find both bases, both bases
+	     are registers and bases are the same register.  */
+	  if (base_prev != NULL_RTX && base_curr != NULL_RTX
+	      && REG_P (base_prev) && REG_P (base_curr)
+	      && REGNO (base_prev) == REGNO (base_curr))
+	    {
+	      machine_mode mode = GET_MODE (SET_DEST (curr_set));
+	      mode_size = estimated_poly_value (GET_MODE_SIZE (mode));
+
+	      HOST_WIDE_INT offset_prev_int = INTVAL (offset_prev);
+	      HOST_WIDE_INT offset_curr_int = INTVAL (offset_curr);
+
+	      /* Get the smaller offset into OFFSET_PREV_INT.  */
+	      if (offset_prev_int > offset_curr_int)
+		std::swap (offset_prev_int, offset_curr_int);
+
+	      /* We've normalized, so we need to check that the lower
+		 address is aligned to 2X the size of the object.  The
+		 higher address must be the lower address plus the
+		 size of the object.  */
+	      if (((offset_prev_int % (2 * mode_size)) == 0)
+		  && offset_prev_int + mode_size == offset_curr_int)
+		{
+		  if (dump_file)
+		    fprintf (dump_file, "RISCV_FUSE_ALIGNED_STD\n");
+		  return true;
+		}
+	    }
+	}
+    }
 
-	  /* Originally the thought was to check MEM_ALIGN, but that was
-	     reporting incorrect alignments, even for SP/FP accesses, so we
-	     gave up on that approach.  Instead just check for stack/hfp
-	     which we know are aligned.  */
-	  if (REGNO (base_prev) != STACK_POINTER_REGNUM
-	      && REGNO (base_prev) != HARD_FRAME_POINTER_REGNUM)
-	    return false;
+  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_BFEXT)
+      && (sched1 || prev_dest_regno == curr_dest_regno))
+    {
+      /* We are trying to match the following:
+	   prev (slli) == (set (reg:DI rD)
+			       (ashift:DI (reg:DI rS) (const_int)))
+	   curr (srli) == (set (reg:DI rD)
+			       (lshiftrt:DI (reg:DI rD) (const_int))) */
 
-	  /* The two stores must be contained within opposite halves of the
-	     same 16 byte aligned block of memory.  We know that the stack
-	     pointer and the frame pointer have suitable alignment.  So we
-	     just need to check the offsets of the two stores for suitable
-	     alignment.  */
-	  /* Get the smaller offset into OFFSET_PREV.  */
-	  if (INTVAL (offset_prev) > INTVAL (offset_curr))
-	    std::swap (offset_prev, offset_curr);
-
-	  /* If the smaller offset (OFFSET_PREV) is not 16 byte aligned,
-	     then fail.  */
-	  if ((INTVAL (offset_prev) % 16) != 0)
-	    return false;
+      if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
+	  && (GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
+	      || GET_CODE (SET_SRC (curr_set)) == ASHIFTRT)
+	  && REG_P (SET_DEST (prev_set))
+	  && REG_P (SET_DEST (curr_set))
+	  && REGNO (XEXP (SET_SRC (curr_set), 0)) == prev_dest_regno
+	  && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
+	  && CONST_INT_P (XEXP (SET_SRC (curr_set), 1)))
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "RISCV_FUSE_BFEXT\n");
+	  return true;
+	}
+    }
+
+  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_B_ALUI)
+      && (sched1 || prev_dest_regno == curr_dest_regno))
+    {
+      /* We are trying to match the following:
+	  prev (orc.b) == (set (reg rD)
+				(unspec (reg rS1)))
+	  curr (not) == (set (reg rD2) (not (reg rD))) */
+
+      if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
+	  && GET_CODE (SET_SRC (curr_set)) == NOT
+	  && XINT (SET_SRC (prev_set), 1) == UNSPEC_ORC_B
+	  && REG_P (SET_DEST (prev_set))
+	  && REG_P (SET_DEST (curr_set))
+	  && REG_P (XEXP (SET_SRC (curr_set), 0))
+	  && REGNO (XEXP (SET_SRC (curr_set), 0)) == prev_dest_regno)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "RISCV_FUSE_B_ALUI\n");
+	  return true;
+	}
+
+      /* We are trying to match the following:
+	  prev (ctz) == (set (reg rD) (ctz (reg rS1)))
+	  curr (andi) == (set (reg rD)
+				(and (reg rD) (const_int 63))) */
+
+      if (GET_CODE (SET_SRC (prev_set)) == CTZ
+	  && GET_CODE (SET_SRC (curr_set)) == AND
+	  && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
+	  && INTVAL (XEXP (SET_SRC (curr_set), 1)) == 63
+	  && REG_P (SET_DEST (prev_set))
+	  && REG_P (SET_DEST (curr_set))
+	  && REG_P (XEXP (SET_SRC (curr_set), 0))
+	  && REGNO (XEXP (SET_SRC (curr_set), 0)) == prev_dest_regno)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "RISCV_FUSE_B_ALUI\n");
+	  return true;
+	}
+
+      /* We are trying to match the following:
+	  prev (sub) == (set (reg rD)
+				(minus (const_int 0) (reg rS2))
+	  curr (max) == (set (reg rD)
+				(smax (reg rD) (reg rS2))) */
+
+      if (GET_CODE (SET_SRC (prev_set)) == MINUS
+	  && (XEXP (SET_SRC (prev_set), 0)
+	      == CONST0_RTX (GET_MODE (SET_SRC (prev_set))))
+	  && CONST_INT_P (XEXP (SET_SRC (prev_set), 0))
+	  && GET_CODE (SET_SRC (curr_set)) == SMAX
+	  && REG_P (SET_DEST (prev_set))
+	  && REG_P (SET_DEST (curr_set))
+	  && REG_P (XEXP (SET_SRC (curr_set), 0))
+	  && REGNO (XEXP (SET_SRC (curr_set), 0)) == prev_dest_regno
+	  && REG_P (XEXP (SET_SRC (prev_set), 1))
+	  && REG_P (XEXP (SET_SRC (curr_set), 1))
+	  && (REGNO (XEXP (SET_SRC (prev_set), 1))
+	      == REGNO (XEXP (SET_SRC (curr_set), 1))))
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "RISCV_FUSE_B_ALUI\n");
+	  return true;
+	}
+
+      /* We are trying to match the following:
+	  prev (neg) == (set (reg rD) (neg (reg rS1)))
+	  curr (max) == (set (reg rD)
+				(smax (reg rD) (reg rS1))) */
 
-	  /* The higher offset must be 8 bytes more than the lower
-	     offset.  */
-	  return (INTVAL (offset_prev) + 8 == INTVAL (offset_curr));
+      if (GET_CODE (SET_SRC (prev_set)) == NEG
+	  && GET_CODE (SET_SRC (curr_set)) == SMAX
+	  && REG_P (SET_DEST (prev_set))
+	  && REG_P (SET_DEST (curr_set))
+	  && REG_P (XEXP (SET_SRC (curr_set), 0))
+	  && REGNO (XEXP (SET_SRC (curr_set), 0)) == prev_dest_regno
+	  && REG_P (XEXP (SET_SRC (prev_set), 0))
+	  && REG_P (XEXP (SET_SRC (curr_set), 1))
+	  && (REGNO (XEXP (SET_SRC (prev_set), 0))
+	      == REGNO (XEXP (SET_SRC (curr_set), 1))))
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "RISCV_FUSE_B_ALUI\n");
+	  return true;
 	}
     }
 
@@ -10222,6 +10997,27 @@ riscv_sched_adjust_cost (rtx_insn *, int, rtx_insn *insn, int cost,
   return new_cost;
 }
 
+/* Implement TARGET_SCHED_CAN_SPECULATE_INSN hook.  Return true if insn can
+   can be scheduled for speculative execution.  Reject vsetvl instructions to
+   prevent the scheduler from hoisting them out of basic blocks without
+   checking for data dependencies PR117974.  */
+static bool
+riscv_sched_can_speculate_insn (rtx_insn *insn)
+{
+  /* Gate speculative scheduling of vsetvl instructions behind tune param.  */
+  if (tune_param->speculative_sched_vsetvl)
+    return true;
+
+  switch (get_attr_type (insn))
+    {
+      case TYPE_VSETVL:
+      case TYPE_VSETVL_PRE:
+	return false;
+      default:
+	return true;
+    }
+}
+
 /* Auxiliary function to emit RISC-V ELF attribute. */
 static void
 riscv_emit_attribute ()
@@ -11196,10 +11992,10 @@ riscv_get_interrupt_type (tree decl)
     {
       const char *string = TREE_STRING_POINTER (TREE_VALUE (attr_args));
 
-      if (!strcmp (string, "user"))
-	return USER_MODE;
-      else if (!strcmp (string, "supervisor"))
+      if (!strcmp (string, "supervisor"))
 	return SUPERVISOR_MODE;
+      else if (!strcmp (string, "rnmi"))
+	return RNMI_MODE;
       else /* Must be "machine".  */
 	return MACHINE_MODE;
     }
@@ -11528,11 +12324,10 @@ riscv_gpr_save_operation_p (rtx op)
 	  /* Two CLOBBER and USEs, must check the order.  */
 	  unsigned expect_code = i < 3 ? CLOBBER : USE;
 	  if (GET_CODE (elt) != expect_code
-	      || !REG_P (XEXP (elt, 1))
-	      || (REGNO (XEXP (elt, 1)) != gpr_save_reg_order[i]))
+	      || !REG_P (XEXP (elt, 0))
+	      || (REGNO (XEXP (elt, 0)) != gpr_save_reg_order[i]))
 	    return false;
 	}
-	break;
     }
   return true;
 }
@@ -11817,14 +12612,31 @@ riscv_estimated_poly_value (poly_int64 val,
 /* Return true if the vector misalignment factor is supported by the
    target.  */
 bool
-riscv_support_vector_misalignment (machine_mode mode,
-				   const_tree type ATTRIBUTE_UNUSED,
-				   int misalignment,
-				   bool is_packed ATTRIBUTE_UNUSED)
+riscv_support_vector_misalignment (machine_mode mode, const_tree type,
+				   int misalignment, bool is_packed,
+				   bool is_gather_scatter)
 {
-  /* Depend on movmisalign pattern.  */
+  /* IS_PACKED is true if the corresponding scalar element is not naturally
+     aligned.  If the misalignment is unknown and the the access is packed
+     we defer to the default hook which will check if movmisalign is present.
+     Movmisalign, in turn, depends on TARGET_VECTOR_MISALIGN_SUPPORTED.  */
+  if (misalignment == DR_MISALIGNMENT_UNKNOWN)
+    {
+      if (!is_packed)
+	return true;
+    }
+  else
+    {
+      /* If we know that misalignment is a multiple of the element size, we're
+	 good.  */
+      if (misalignment % TYPE_ALIGN_UNIT (type) == 0)
+	return true;
+    }
+
+  /* Otherwise fall back to movmisalign again.  */
   return default_builtin_support_vector_misalignment (mode, type, misalignment,
-						      is_packed);
+						      is_packed,
+						      is_gather_scatter);
 }
 
 /* Implement TARGET_VECTORIZE_GET_MASK_MODE.  */
@@ -11986,6 +12798,14 @@ riscv_lshift_subword (machine_mode mode ATTRIBUTE_UNUSED, rtx value, rtx shift,
 						  gen_lowpart (QImode, shift)));
 }
 
+/* Return TRUE if we should use the zero stride load, FALSE otherwise. */
+
+bool
+strided_load_broadcast_p ()
+{
+  return tune_param->use_zero_stride_load;
+}
+
 /* Return TRUE if we should use the divmod expander, FALSE otherwise.  This
    allows the behavior to be tuned for specific implementations as well as
    when optimizing for size.  */
@@ -12047,27 +12867,30 @@ riscv_emit_frm_mode_set (int mode, int prev_mode)
   if (prev_mode == riscv_vector::FRM_DYN_CALL)
     emit_insn (gen_frrmsi (backup_reg)); /* Backup frm when DYN_CALL.  */
 
-  if (mode != prev_mode)
-    {
-      rtx frm = gen_int_mode (mode, SImode);
+  if (mode == prev_mode)
+    return;
 
-      if (mode == riscv_vector::FRM_DYN_CALL
-	&& prev_mode != riscv_vector::FRM_DYN && STATIC_FRM_P (cfun))
-	/* No need to emit when prev mode is DYN already.  */
-	emit_insn (gen_fsrmsi_restore_volatile (backup_reg));
-      else if (mode == riscv_vector::FRM_DYN_EXIT && STATIC_FRM_P (cfun)
-	&& prev_mode != riscv_vector::FRM_DYN
-	&& prev_mode != riscv_vector::FRM_DYN_CALL)
-	/* No need to emit when prev mode is DYN or DYN_CALL already.  */
-	emit_insn (gen_fsrmsi_restore_volatile (backup_reg));
-      else if (mode == riscv_vector::FRM_DYN
-	&& prev_mode != riscv_vector::FRM_DYN_CALL)
-	/* Restore frm value from backup when switch to DYN mode.  */
-	emit_insn (gen_fsrmsi_restore (backup_reg));
-      else if (riscv_static_frm_mode_p (mode))
-	/* Set frm value when switch to static mode.  */
-	emit_insn (gen_fsrmsi_restore (frm));
+  if (riscv_static_frm_mode_p (mode))
+    {
+      /* Set frm value when switch to static mode.  */
+      emit_insn (gen_fsrmsi_restore (gen_int_mode (mode, SImode)));
+      return;
     }
+
+  bool restore_p
+    = /* No need to emit when prev mode is DYN.  */
+      (STATIC_FRM_P (cfun) && mode == riscv_vector::FRM_DYN_CALL
+       && prev_mode != riscv_vector::FRM_DYN)
+      /* No need to emit if prev mode is DYN or DYN_CALL.  */
+      || (STATIC_FRM_P (cfun) && mode == riscv_vector::FRM_DYN_EXIT
+	  && prev_mode != riscv_vector::FRM_DYN
+	  && prev_mode != riscv_vector::FRM_DYN_CALL)
+      /* Restore frm value when switch to DYN mode.  */
+      || (STATIC_FRM_P (cfun) && mode == riscv_vector::FRM_DYN
+	  && prev_mode != riscv_vector::FRM_DYN_CALL);
+
+  if (restore_p)
+    emit_insn (gen_fsrmsi_restore (backup_reg));
 }
 
 /* Implement Mode switching.  */
@@ -12090,59 +12913,6 @@ riscv_emit_mode_set (int entity, int mode, int prev_mode,
     }
 }
 
-/* Adjust the FRM_NONE insn after a call to FRM_DYN for the
-   underlying emit.  */
-
-static int
-riscv_frm_adjust_mode_after_call (rtx_insn *cur_insn, int mode)
-{
-  rtx_insn *insn = prev_nonnote_nondebug_insn_bb (cur_insn);
-
-  if (insn && CALL_P (insn))
-    return riscv_vector::FRM_DYN;
-
-  return mode;
-}
-
-/* Insert the backup frm insn to the end of the bb if and only if the call
-   is the last insn of this bb.  */
-
-static void
-riscv_frm_emit_after_bb_end (rtx_insn *cur_insn)
-{
-  edge eg;
-  bool abnormal_edge_p = false;
-  edge_iterator eg_iterator;
-  basic_block bb = BLOCK_FOR_INSN (cur_insn);
-
-  FOR_EACH_EDGE (eg, eg_iterator, bb->succs)
-    {
-      if (eg->flags & EDGE_ABNORMAL)
-	abnormal_edge_p = true;
-      else
-	{
-	  start_sequence ();
-	  emit_insn (gen_frrmsi (DYNAMIC_FRM_RTL (cfun)));
-	  rtx_insn *backup_insn = get_insns ();
-	  end_sequence ();
-
-	  insert_insn_on_edge (backup_insn, eg);
-	}
-    }
-
-  if (abnormal_edge_p)
-    {
-      start_sequence ();
-      emit_insn (gen_frrmsi (DYNAMIC_FRM_RTL (cfun)));
-      rtx_insn *backup_insn = get_insns ();
-      end_sequence ();
-
-      insert_insn_end_basic_block (backup_insn, bb);
-    }
-
-  commit_edge_insertions ();
-}
-
 /* Return mode that frm must be switched into
    prior to the execution of insn.  */
 
@@ -12154,33 +12924,25 @@ riscv_frm_mode_needed (rtx_insn *cur_insn, int code)
       /* The dynamic frm will be initialized only onece during cfun.  */
       DYNAMIC_FRM_RTL (cfun) = gen_reg_rtx (SImode);
       emit_insn_at_entry (gen_frrmsi (DYNAMIC_FRM_RTL (cfun)));
+      CFUN_IN_CALL (cfun) = false;
     }
 
   if (CALL_P (cur_insn))
     {
-      rtx_insn *insn = next_nonnote_nondebug_insn_bb (cur_insn);
-
-      if (!insn)
-	riscv_frm_emit_after_bb_end (cur_insn);
-
+      CFUN_IN_CALL (cfun) = true;
       return riscv_vector::FRM_DYN_CALL;
     }
 
   int mode = code >= 0 ? get_attr_frm_mode (cur_insn) : riscv_vector::FRM_NONE;
 
   if (mode == riscv_vector::FRM_NONE)
-      /* After meet a call, we need to backup the frm because it may be
-	 updated during the call. Here, for each insn, we will check if
-	 the previous insn is a call or not. When previous insn is call,
-	 there will be 2 cases for the emit mode set.
-
-	 1. Current insn is not MODE_NONE, then the mode switch framework
-	    will do the mode switch from MODE_CALL to MODE_NONE natively.
-	 2. Current insn is MODE_NONE, we need to adjust the MODE_NONE to
-	    the MODE_DYN, and leave the mode switch itself to perform
-	    the emit mode set.
-       */
-    mode = riscv_frm_adjust_mode_after_call (cur_insn, mode);
+    {
+      if (CFUN_IN_CALL (cfun))
+	{
+	  CFUN_IN_CALL (cfun) = false;
+	  return riscv_vector::FRM_DYN;
+	}
+    }
 
   return mode;
 }
@@ -12207,7 +12969,7 @@ singleton_vxrm_need (void)
   /* Walk the IL noting if VXRM is needed and if there's more than one
      mode needed.  */
   bool found = false;
-  int saved_vxrm_mode;
+  int saved_vxrm_mode = VXRM_MODE_NONE;
   for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
     {
       if (!INSN_P (insn) || DEBUG_INSN_P (insn))
@@ -12480,6 +13242,36 @@ get_vector_costs ()
   return costs;
 }
 
+/* Return the cost of operation that move from gpr to vr.
+   It will take the value of --param=gpr2vr_cost if it is provided.
+   Or the default regmove->GR2VR will be returned.  */
+
+int
+get_gr2vr_cost ()
+{
+  int cost = get_vector_costs ()->regmove->GR2VR;
+
+  if (gpr2vr_cost != GPR2VR_COST_UNPROVIDED)
+    cost = gpr2vr_cost;
+
+  return cost;
+}
+
+/* Return the cost of moving data from floating-point to vector register.
+   It will take the value of --param=fpr2vr-cost if it is provided.
+   Otherwise the default regmove->FR2VR will be returned.  */
+
+int
+get_fr2vr_cost ()
+{
+  int cost = get_vector_costs ()->regmove->FR2VR;
+
+  if (fpr2vr_cost != FPR2VR_COST_UNPROVIDED)
+    cost = fpr2vr_cost;
+
+  return cost;
+}
+
 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
 
 static int
@@ -12545,8 +13337,7 @@ riscv_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
     case vec_construct:
 	{
 	  /* TODO: This is too pessimistic in case we can splat.  */
-	  int regmove_cost = fp ? costs->regmove->FR2VR
-	    : costs->regmove->GR2VR;
+	  int regmove_cost = fp ? get_fr2vr_cost () : get_gr2vr_cost ();
 	  return (regmove_cost + common_costs->scalar_to_vec_cost)
 	    * estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
 	}
@@ -12979,6 +13770,88 @@ riscv_expand_sssub (rtx dest, rtx x, rtx y)
   emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
 }
 
+/* Implement the Xmode usmul.
+
+   b = SAT_MUL (a, b);
+   =>
+   _1 = a * b;
+   _2 = mulhu (a, b);
+   _overflow_p = _2 == 0;
+   _mask = - _overflow_p;
+   b = _1 | _mask;
+ */
+
+static void
+riscv_expand_xmode_usmul (rtx dest, rtx x, rtx y)
+{
+  machine_mode mode = GET_MODE (dest);
+
+  gcc_assert (mode == Xmode);
+
+  rtx mul = gen_reg_rtx (Xmode);
+  rtx mulhu = gen_reg_rtx (Xmode);
+  rtx overflow_p = gen_reg_rtx (Xmode);
+
+  riscv_emit_binary (MULT, mul, x, y);
+
+  if (TARGET_64BIT)
+    emit_insn (gen_umuldi3_highpart (mulhu, x, y));
+  else
+    emit_insn (gen_umulsi3_highpart (mulhu, x, y));
+
+  riscv_emit_binary (NE, overflow_p, mulhu, CONST0_RTX (Xmode));
+  riscv_emit_unary (NEG, overflow_p, overflow_p);
+  riscv_emit_binary (IOR, dest, mul, overflow_p);
+}
+
+/* Implement the non-Xmode usmul.
+
+   b = SAT_MUL (a, b);
+   =>
+   _1 = a * b;
+   _max = (T)-1
+   _overflow_p = _1 > _max;
+   _mask = - _overflow_p;
+   b = _1 | _mask;
+ */
+
+static void
+riscv_expand_non_xmode_usmul (rtx dest, rtx x, rtx y)
+{
+  machine_mode mode = GET_MODE (dest);
+  unsigned bitsize = GET_MODE_BITSIZE (mode).to_constant ();
+
+  gcc_assert (mode != Xmode);
+
+  rtx xmode_x = riscv_extend_to_xmode_reg (x, mode, ZERO_EXTEND);
+  rtx xmode_y = riscv_extend_to_xmode_reg (y, mode, ZERO_EXTEND);
+  rtx xmode_mul = gen_reg_rtx (Xmode);
+  rtx mul_max = gen_reg_rtx (Xmode);
+  rtx overflow_p = gen_reg_rtx (Xmode);
+
+  uint64_t max = ((uint64_t)1 << bitsize) - 1;
+
+  emit_move_insn (mul_max, GEN_INT (max));
+  riscv_emit_binary (MULT, xmode_mul, xmode_x, xmode_y);
+
+  riscv_emit_binary (LTU, overflow_p, mul_max, xmode_mul);
+  riscv_emit_unary (NEG, overflow_p, overflow_p);
+  riscv_emit_binary (IOR, xmode_mul, xmode_mul, overflow_p);
+
+  emit_move_insn (dest, gen_lowpart (mode, xmode_mul));
+}
+
+/* Implements the unsigned saturation mult standard name usmul for int mode.  */
+
+void
+riscv_expand_usmul (rtx dest, rtx x, rtx y)
+{
+  if (GET_MODE (dest) == Xmode)
+    return riscv_expand_xmode_usmul (dest, x, y) ;
+  else
+    return riscv_expand_non_xmode_usmul (dest, x, y);
+}
+
 /* Implement the unsigned saturation truncation for int mode.
 
    b = SAT_TRUNC (a);
@@ -13092,10 +13965,13 @@ riscv_c_mode_for_floating_type (enum tree_index ti)
   return default_mode_for_floating_type (ti);
 }
 
-/* This parses the attribute arguments to target_version in DECL and modifies
-   the feature mask and priority required to select those targets.  */
+/* Parse the attribute arguments to target_version in DECL and modify
+   the feature mask and priority required to select those targets.
+   If LOC is nonnull, report diagnostics against *LOC, otherwise
+   remain silent.  */
 static void
 parse_features_for_version (tree decl,
+			    location_t *loc,
 			    struct riscv_feature_bits &res,
 			    int &priority)
 {
@@ -13126,19 +14002,14 @@ parse_features_for_version (tree decl,
   cl_target_option_restore (&global_options, &global_options_set,
 			    default_opts);
 
-  riscv_process_target_version_attr (TREE_VALUE (version_attr),
-				     DECL_SOURCE_LOCATION (decl));
+  riscv_process_target_version_attr (TREE_VALUE (version_attr), loc);
 
   priority = global_options.x_riscv_fmv_priority;
   const char *arch_string = global_options.x_riscv_arch_string;
   bool parse_res
-    = riscv_minimal_hwprobe_feature_bits (arch_string, &res,
-					  DECL_SOURCE_LOCATION (decl));
+    = riscv_minimal_hwprobe_feature_bits (arch_string, &res, loc);
   gcc_assert (parse_res);
 
-  if (arch_string != default_opts->x_riscv_arch_string)
-    free (CONST_CAST (void *, (const void *) arch_string));
-
   cl_target_option_restore (&global_options, &global_options_set,
 			    &cur_target);
 }
@@ -13193,8 +14064,8 @@ riscv_compare_version_priority (tree decl1, tree decl2)
   struct riscv_feature_bits mask1, mask2;
   int prio1, prio2;
 
-  parse_features_for_version (decl1, mask1, prio1);
-  parse_features_for_version (decl2, mask2, prio2);
+  parse_features_for_version (decl1, nullptr, mask1, prio1);
+  parse_features_for_version (decl2, nullptr, mask2, prio2);
 
   return compare_fmv_features (mask1, mask2, prio1, prio2);
 }
@@ -13497,6 +14368,7 @@ dispatch_function_versions (tree dispatch_decl,
       version_info.version_decl = version_decl;
       // Get attribute string, parse it and find the right features.
       parse_features_for_version (version_decl,
+				  &DECL_SOURCE_LOCATION (version_decl),
 				  version_info.features,
 				  version_info.prio);
       function_versions.push_back (version_info);
@@ -13735,7 +14607,6 @@ riscv_get_function_versions_dispatcher (void *decl)
   struct cgraph_node *node = NULL;
   struct cgraph_node *default_node = NULL;
   struct cgraph_function_version_info *node_v = NULL;
-  struct cgraph_function_version_info *first_v = NULL;
 
   tree dispatch_decl = NULL;
 
@@ -13752,41 +14623,16 @@ riscv_get_function_versions_dispatcher (void *decl)
   if (node_v->dispatcher_resolver != NULL)
     return node_v->dispatcher_resolver;
 
-  /* Find the default version and make it the first node.  */
-  first_v = node_v;
-  /* Go to the beginning of the chain.  */
-  while (first_v->prev != NULL)
-    first_v = first_v->prev;
-  default_version_info = first_v;
-
-  while (default_version_info != NULL)
-    {
-      struct riscv_feature_bits res;
-      int priority; /* Unused.  */
-      parse_features_for_version (default_version_info->this_node->decl,
-				  res, priority);
-      if (res.length == 0)
-	break;
-      default_version_info = default_version_info->next;
-    }
+  /* The default node is always the beginning of the chain.  */
+  default_version_info = node_v;
+  while (default_version_info->prev)
+    default_version_info = default_version_info->prev;
+  default_node = default_version_info->this_node;
 
   /* If there is no default node, just return NULL.  */
-  if (default_version_info == NULL)
+  if (!is_function_default_version (default_node->decl))
     return NULL;
 
-  /* Make default info the first node.  */
-  if (first_v != default_version_info)
-    {
-      default_version_info->prev->next = default_version_info->next;
-      if (default_version_info->next)
-	default_version_info->next->prev = default_version_info->prev;
-      first_v->prev = default_version_info;
-      default_version_info->next = first_v;
-      default_version_info->prev = NULL;
-    }
-
-  default_node = default_version_info->this_node;
-
   if (targetm.has_ifunc_p ())
     {
       struct cgraph_function_version_info *it_v = NULL;
@@ -13930,17 +14776,53 @@ expand_crc_using_clmul (scalar_mode crc_mode, scalar_mode data_mode,
   rtx data = gen_rtx_ZERO_EXTEND (word_mode, operands[2]);
   riscv_expand_op (XOR, word_mode, a0, crc, data);
 
-  if (TARGET_64BIT)
-    emit_insn (gen_riscv_clmul_di (a0, a0, t0));
-  else
-    emit_insn (gen_riscv_clmul_si (a0, a0, t0));
+  if (TARGET_ZBKC || TARGET_ZBC)
+    {
+      if (TARGET_64BIT)
+	emit_insn (gen_riscv_clmul_di (a0, a0, t0));
+      else
+	emit_insn (gen_riscv_clmul_si (a0, a0, t0));
 
-  riscv_expand_op (LSHIFTRT, word_mode, a0, a0,
-		   gen_int_mode (crc_size, word_mode));
-  if (TARGET_64BIT)
-    emit_insn (gen_riscv_clmul_di (a0, a0, t1));
+      riscv_expand_op (LSHIFTRT, word_mode, a0, a0,
+		       gen_int_mode (crc_size, word_mode));
+      if (TARGET_64BIT)
+	emit_insn (gen_riscv_clmul_di (a0, a0, t1));
+      else
+	emit_insn (gen_riscv_clmul_si (a0, a0, t1));
+    }
   else
-    emit_insn (gen_riscv_clmul_si (a0, a0, t1));
+    {
+      machine_mode vmode;
+      if (!riscv_vector::get_vector_mode (DImode, 1).exists (&vmode))
+	gcc_unreachable ();
+
+      rtx vec = gen_reg_rtx (vmode);
+
+      insn_code icode1 = code_for_pred_broadcast (vmode);
+      rtx ops1[] = {vec, a0};
+      emit_nonvlmax_insn (icode1, UNARY_OP, ops1, CONST1_RTX (Pmode));
+
+      rtx rvv1di_reg = gen_rtx_SUBREG (RVVM1DImode, vec, 0);
+      insn_code icode2 = code_for_pred_vclmul_scalar (UNSPEC_VCLMUL,
+						      E_RVVM1DImode);
+      rtx ops2[] = {rvv1di_reg, rvv1di_reg, t0};
+      emit_nonvlmax_insn (icode2, riscv_vector::BINARY_OP, ops2, CONST1_RTX
+			  (Pmode));
+
+      rtx shift_amount = gen_int_mode (data_size, Pmode);
+      insn_code icode3 = code_for_pred_scalar (LSHIFTRT, vmode);
+      rtx ops3[] = {vec, vec, shift_amount};
+      emit_nonvlmax_insn (icode3, BINARY_OP, ops3, CONST1_RTX (Pmode));
+
+      insn_code icode4 = code_for_pred_vclmul_scalar (UNSPEC_VCLMULH,
+						      E_RVVM1DImode);
+      rtx ops4[] = {rvv1di_reg, rvv1di_reg, t1};
+      emit_nonvlmax_insn (icode4, riscv_vector::BINARY_OP, ops4, CONST1_RTX
+			  (Pmode));
+
+      rtx vec_low_lane = gen_lowpart (DImode, vec);
+      riscv_emit_move (a0, vec_low_lane);
+    }
 
   if (crc_size > data_size)
     {
@@ -13989,19 +14871,53 @@ expand_reversed_crc_using_clmul (scalar_mode crc_mode, scalar_mode data_mode,
   rtx a0 = gen_reg_rtx (word_mode);
   riscv_expand_op (XOR, word_mode, a0, crc, data);
 
-  if (TARGET_64BIT)
-    emit_insn (gen_riscv_clmul_di (a0, a0, t0));
-  else
-    emit_insn (gen_riscv_clmul_si (a0, a0, t0));
+  if (TARGET_ZBKC || TARGET_ZBC)
+    {
+      if (TARGET_64BIT)
+	emit_insn (gen_riscv_clmul_di (a0, a0, t0));
+      else
+	emit_insn (gen_riscv_clmul_si (a0, a0, t0));
 
-  rtx num_shift = gen_int_mode (GET_MODE_BITSIZE (word_mode) - data_size,
-				word_mode);
-  riscv_expand_op (ASHIFT, word_mode, a0, a0, num_shift);
+      rtx num_shift = gen_int_mode (BITS_PER_WORD - data_size, word_mode);
+      riscv_expand_op (ASHIFT, word_mode, a0, a0, num_shift);
 
-  if (TARGET_64BIT)
-    emit_insn (gen_riscv_clmulh_di (a0, a0, t1));
+      if (TARGET_64BIT)
+	emit_insn (gen_riscv_clmulh_di (a0, a0, t1));
+      else
+	emit_insn (gen_riscv_clmulh_si (a0, a0, t1));
+    }
   else
-    emit_insn (gen_riscv_clmulh_si (a0, a0, t1));
+    {
+      machine_mode vmode;
+      if (!riscv_vector::get_vector_mode (DImode, 1).exists (&vmode))
+	gcc_unreachable ();
+
+      rtx vec = gen_reg_rtx (vmode);
+      insn_code icode1 = code_for_pred_broadcast (vmode);
+      rtx ops1[] = {vec, a0};
+      emit_nonvlmax_insn (icode1, UNARY_OP, ops1, CONST1_RTX (Pmode));
+
+      rtx rvv1di_reg = gen_rtx_SUBREG (RVVM1DImode, vec, 0);
+      insn_code icode2 = code_for_pred_vclmul_scalar (UNSPEC_VCLMUL,
+						      E_RVVM1DImode);
+      rtx ops2[] = {rvv1di_reg, rvv1di_reg, t0};
+      emit_nonvlmax_insn (icode2, riscv_vector::BINARY_OP, ops2, CONST1_RTX
+			  (Pmode));
+
+      rtx shift_amount = gen_int_mode (BITS_PER_WORD - data_size, Pmode);
+      insn_code icode3 = code_for_pred_scalar (ASHIFT, vmode);
+      rtx ops3[] = {vec, vec, shift_amount};
+      emit_nonvlmax_insn (icode3, BINARY_OP, ops3, CONST1_RTX (Pmode));
+
+      insn_code icode4 = code_for_pred_vclmul_scalar (UNSPEC_VCLMULH,
+						      E_RVVM1DImode);
+      rtx ops4[] = {rvv1di_reg, rvv1di_reg, t1};
+      emit_nonvlmax_insn (icode4, riscv_vector::BINARY_OP, ops4, CONST1_RTX
+			  (Pmode));
+
+      rtx vec_low_lane = gen_lowpart (DImode, vec);
+      riscv_emit_move (a0, vec_low_lane);
+    }
 
   if (crc_size > data_size)
     {
@@ -14035,6 +14951,638 @@ bool need_shadow_stack_push_pop_p ()
   return is_zicfiss_p () && riscv_save_return_addr_reg_p ();
 }
 
+/* Synthesize OPERANDS[0] = OPERANDS[1] CODE OPERANDS[2].
+
+    OPERANDS[0] and OPERANDS[1] will be a REG and may be the same
+    REG.
+
+    OPERANDS[2] is a CONST_INT.
+
+    CODE is IOR or XOR.
+
+    Return TRUE if the operation was fully synthesized and the caller
+    need not generate additional code.  Return FALSE if the operation
+    was not synthesized and the caller is responsible for emitting the
+    proper sequence.  */
+
+bool
+synthesize_ior_xor (rtx_code code, rtx operands[3])
+{
+  /* Trivial cases that don't need synthesis.  */
+  if (SMALL_OPERAND (INTVAL (operands[2]))
+     || ((TARGET_ZBS || TARGET_ZBKB)
+	 && single_bit_mask_operand (operands[2], word_mode)))
+    return false;
+
+  /* The number of instructions to synthesize the constant is a good
+     estimate of the budget.  That does not account for out of order
+     execution an fusion in the constant synthesis those would naturally
+     decrease the budget.  It also does not account for the IOR/XOR at
+     the end of the sequence which would increase the budget.  */
+  int budget = (TARGET_ZBS ? riscv_const_insns (operands[2], true) : -1);
+  int original_budget = budget;
+
+  /* Bits we need to set in operands[0].  As we synthesize the operation,
+     we clear bits in IVAL.  Once IVAL is zero, then synthesis of the
+     operation is complete.  */
+  unsigned HOST_WIDE_INT ival = INTVAL (operands[2]);
+
+  /* Check if we want to use [x]ori. Then get the remaining bits
+     and decrease the budget by one. */
+  if ((ival & HOST_WIDE_INT_UC (0x7ff)) != 0)
+    {
+      ival &= ~HOST_WIDE_INT_UC (0x7ff);
+      budget--;
+    }
+
+  /* Check for bseti cases. For each remaining bit in ival,
+     decrease the budget by one. */
+  while (ival)
+    {
+      HOST_WIDE_INT tmpval = HOST_WIDE_INT_UC (1) << ctz_hwi (ival);
+      ival &= ~tmpval;
+      budget--;
+    }
+
+  /* If we're flipping all but a small number of bits we can pre-flip
+     the outliers, then flip all the bits, which would restore those
+     bits that were pre-flipped. */
+  if ((TARGET_ZBS || TARGET_ZBKB)
+      && budget < 0
+      && code == XOR
+      && popcount_hwi (~INTVAL (operands[2])) < original_budget)
+    {
+      /* Pre-flipping bits we want to preserve.  */
+      rtx input = operands[1];
+      rtx output = NULL_RTX;
+      ival = ~INTVAL (operands[2]);
+      while (ival)
+	{
+	  HOST_WIDE_INT tmpval = HOST_WIDE_INT_UC (1) << ctz_hwi (ival);
+	  rtx x = GEN_INT (tmpval);
+	  x = gen_rtx_XOR (word_mode, input, x);
+	  output = gen_reg_rtx (word_mode);
+	  emit_insn (gen_rtx_SET (output, x));
+	  input = output;
+	  ival &= ~tmpval;
+	}
+
+      gcc_assert (output);
+
+      /* Now flip all the bits, which restores the bits we were
+	 preserving.  */
+      rtx x = gen_rtx_NOT (word_mode, input);
+      emit_insn (gen_rtx_SET (operands[0], x));
+      return true;
+    }
+
+  /* One more approach we can try.  If our budget is 3+ instructions,
+     then we can try to rotate the source so that the bits we want to
+     set are in the low 11 bits.  We then use [x]ori to set those low
+     bits, then rotate things back into their proper place.  */
+  if ((TARGET_ZBB || TARGET_XTHEADBB || TARGET_ZBKB)
+      && budget < 0
+      && popcount_hwi (INTVAL (operands[2])) <= 11
+      && riscv_const_insns (operands[2], true) >= 3)
+    {
+      ival = INTVAL (operands[2]);
+      /* First see if the constant trivially fits into 11 bits in the LSB.  */
+      int lsb = ctz_hwi (ival);
+      int msb = BITS_PER_WORD - 1 - clz_hwi (ival);
+      if (msb - lsb + 1 <= 11)
+	{
+	  rtx output = gen_reg_rtx (word_mode);
+	  rtx input = operands[1];
+
+	  /* Rotate the source right by LSB bits.  */
+	  rtx x = GEN_INT (lsb);
+	  x = gen_rtx_ROTATERT (word_mode, input, x);
+	  emit_insn (gen_rtx_SET (output, x));
+	  input = output;
+
+	  /* Shift the constant right by LSB bits.  */
+	  x = GEN_INT (ival >> lsb);
+
+	  /* Perform the IOR/XOR operation.  */
+	  x = gen_rtx_fmt_ee (code, word_mode, input, x);
+	  output = gen_reg_rtx (word_mode);
+	  emit_insn (gen_rtx_SET (output, x));
+	  input = output;
+
+	  /* And rotate left to put everything back in place, we don't
+	     have rotate left by a constant, so use rotate right by
+	     an adjusted constant.  */
+	  x = GEN_INT (BITS_PER_WORD - lsb);
+	  x = gen_rtx_ROTATERT (word_mode, input, x);
+	  emit_insn (gen_rtx_SET (operands[0], x));
+	  return true;
+	}
+
+      /* Maybe the bits are split between the high and low parts
+	 of the constant.  A bit more complex, but still manageable.
+
+	 Conceptually we want to rotate left the constant by the number
+	 of leading zeros after masking off all but the low 11 bits.  */
+      int rotcount = clz_hwi (ival & 0x7ff) - (BITS_PER_WORD - 11);
+
+      /* Rotate the constant left by MSB bits.  */
+      ival = (ival << rotcount) | (ival >> (BITS_PER_WORD - rotcount));
+
+      /* Now we can do the same tests as before. */
+      lsb = ctz_hwi (ival);
+      msb = BITS_PER_WORD - clz_hwi (ival);
+      if ((INTVAL (operands[2]) & HOST_WIDE_INT_UC (0x7ff)) != 0
+	  && msb - lsb + 1 <= 11)
+	{
+	  rtx output = gen_reg_rtx (word_mode);
+	  rtx input = operands[1];
+
+	  /* Rotate the source left by ROTCOUNT bits, we don't have
+	     rotate left by a constant, so use rotate right by an
+	     adjusted constant.  */
+	  rtx x = GEN_INT (BITS_PER_WORD - rotcount);
+	  x = gen_rtx_ROTATERT (word_mode, input, x);
+	  emit_insn (gen_rtx_SET (output, x));
+	  input = output;
+
+	  /* We've already rotated the constant.  So perform the IOR/XOR
+	     operation.  */
+	  x = GEN_INT (ival);
+	  x = gen_rtx_fmt_ee (code, word_mode, input, x);
+	  output = gen_reg_rtx (word_mode);
+	  emit_insn (gen_rtx_SET (output, x));
+	  input = output;
+
+	  /* And rotate right to put everything into its proper place.  */
+	  x = GEN_INT (rotcount);
+	  x = gen_rtx_ROTATERT (word_mode, input, x);
+	  emit_insn (gen_rtx_SET (operands[0], x));
+	  return true;
+	}
+    }
+
+  /* If after accounting for bseti the remaining budget has
+     gone to less than zero, it forces the value into a
+     register and performs the IOR operation.  It returns
+     TRUE to the caller so the caller knows code generation
+     is complete. */
+  if (budget < 0)
+    {
+      rtx x = force_reg (word_mode, operands[2]);
+      x = gen_rtx_fmt_ee (code, word_mode, operands[1], x);
+      emit_insn (gen_rtx_SET (operands[0], x));
+      return true;
+    }
+
+  /* Synthesis is better than loading the constant.  */
+  ival = INTVAL (operands[2]);
+  rtx input = operands[1];
+  rtx output = NULL_RTX;
+
+  /* Emit the [x]ori insn that sets the low 11 bits into
+     the proper state.  */
+  if ((ival & HOST_WIDE_INT_UC (0x7ff)) != 0)
+    {
+      rtx x = GEN_INT (ival & HOST_WIDE_INT_UC (0x7ff));
+      x = gen_rtx_fmt_ee (code, word_mode, input, x);
+      output = gen_reg_rtx (word_mode);
+      emit_insn (gen_rtx_SET (output, x));
+      input = output;
+      ival &= ~HOST_WIDE_INT_UC (0x7ff);
+    }
+
+  /* We figure out a single bit as a constant and
+     generate a CONST_INT node for that.  Then we
+     construct the IOR node, then the SET node and
+     emit it.  An IOR with a suitable constant that is
+     a single bit will be implemented with a bseti. */
+  while (ival)
+    {
+      HOST_WIDE_INT tmpval = HOST_WIDE_INT_UC (1) << ctz_hwi (ival);
+      rtx x = GEN_INT (tmpval);
+      x = gen_rtx_fmt_ee (code, word_mode, input, x);
+      output = gen_reg_rtx (word_mode);
+      emit_insn (gen_rtx_SET (output, x));
+      input = output;
+      ival &= ~tmpval;
+    }
+
+  gcc_assert (output);
+  emit_move_insn (operands[0], output);
+  return true;
+}
+
+/* Synthesize OPERANDS[0] = OPERANDS[1] & OPERANDS[2].
+
+    OPERANDS[0] and OPERANDS[1] will be a REG and may be the same
+    REG.
+
+    OPERANDS[2] is a CONST_INT.
+
+    Return TRUE if the operation was fully synthesized and the caller
+    need not generate additional code.  Return FALSE if the operation
+    was not synthesized and the caller is responsible for emitting the
+    proper sequence.  */
+
+bool
+synthesize_and (rtx operands[3])
+{
+  /* Trivial cases that don't need synthesis.  */
+  if (SMALL_OPERAND (INTVAL (operands[2]))
+     || (TARGET_ZBS && not_single_bit_mask_operand (operands[2], word_mode)))
+    return false;
+
+  /* If the second operand is a mode mask, emit an extension
+     insn instead.  */
+  if (CONST_INT_P (operands[2]))
+    {
+      enum machine_mode tmode = VOIDmode;
+      if (UINTVAL (operands[2]) == GET_MODE_MASK (HImode))
+	tmode = HImode;
+      else if (UINTVAL (operands[2]) == GET_MODE_MASK (SImode))
+	tmode = SImode;
+
+      if (tmode != VOIDmode)
+	{
+	  rtx tmp = gen_lowpart (tmode, operands[1]);
+	  emit_insn (gen_extend_insn (operands[0], tmp, word_mode, tmode, 1));
+	  return true;
+	}
+    }
+
+  /* The number of instructions to synthesize the constant is a good
+     estimate of the budget.  That does not account for out of order
+     execution an fusion in the constant synthesis those would naturally
+     decrease the budget.  It also does not account for the AND at
+     the end of the sequence which would increase the budget. */
+  int budget = riscv_const_insns (operands[2], true);
+  rtx input = NULL_RTX;
+  rtx output = NULL_RTX;
+
+  /* Left shift + right shift to clear high bits.  */
+  if (budget >= 2 && p2m1_shift_operand (operands[2], word_mode))
+    {
+      int count = (GET_MODE_BITSIZE (GET_MODE (operands[1])).to_constant ()
+		   - exact_log2 (INTVAL (operands[2]) + 1));
+      rtx x = gen_rtx_ASHIFT (word_mode, operands[1], GEN_INT (count));
+      output = gen_reg_rtx (word_mode);
+      emit_insn (gen_rtx_SET (output, x));
+      input = output;
+      x = gen_rtx_LSHIFTRT (word_mode, input, GEN_INT (count));
+      emit_insn (gen_rtx_SET (operands[0], x));
+      return true;
+    }
+
+  /* Clears a bunch of low bits with only high bits set.  */
+  unsigned HOST_WIDE_INT t = ~INTVAL (operands[2]);
+  if (budget >= 2 && exact_log2 (t + 1) >= 0)
+    {
+      int count = ctz_hwi (INTVAL (operands[2]));
+      rtx x = gen_rtx_LSHIFTRT (word_mode, operands[1], GEN_INT (count));
+      output = gen_reg_rtx (word_mode);
+      emit_insn (gen_rtx_SET (output, x));
+      input = output;
+      x = gen_rtx_ASHIFT (word_mode, input, GEN_INT (count));
+      emit_insn (gen_rtx_SET (operands[0], x));
+      return true;
+    }
+
+  /* If we shift right to eliminate the trailing zeros and
+     the result is a SMALL_OPERAND, then it's a shift right,
+     andi and shift left.  */
+  t = INTVAL (operands[2]);
+  t >>= ctz_hwi (t);
+  if (budget >= 3 && SMALL_OPERAND (t) && popcount_hwi (t) > 2)
+    {
+      /* Shift right to clear the low order bits.  */
+      unsigned HOST_WIDE_INT count = ctz_hwi (INTVAL (operands[2]));
+      rtx x = gen_rtx_LSHIFTRT (word_mode, operands[1], GEN_INT (count));
+      output = gen_reg_rtx (word_mode);
+      emit_insn (gen_rtx_SET (output, x));
+      input = output;
+
+      /* Now emit the ANDI.  */
+      unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
+      mask >>= ctz_hwi (mask);
+      x = gen_rtx_AND (word_mode, input, GEN_INT (mask));
+      output = gen_reg_rtx (word_mode);
+      emit_insn (gen_rtx_SET (output, x));
+      input = output;
+
+      /* Shift left to move bits into position.  */
+      count = INTVAL (operands[2]);
+      count = ctz_hwi (count);
+      x = gen_rtx_ASHIFT (word_mode, input, GEN_INT (count));
+      emit_insn (gen_rtx_SET (operands[0], x));
+      return true;
+    }
+
+  /* If there are all zeros, except for a run of 1s somewhere in the middle
+     of the constant, then this is at worst 3 shifts.  */
+  t = INTVAL (operands[2]);
+  if (budget >= 3
+      && consecutive_bits_operand (GEN_INT (t), word_mode)
+      && popcount_hwi (t) > 3)
+    {
+      /* Shift right to clear the low order bits.  */
+      int count = ctz_hwi (INTVAL (operands[2]));
+      rtx x = gen_rtx_LSHIFTRT (word_mode, operands[1], GEN_INT (count));
+      output = gen_reg_rtx (word_mode);
+      emit_insn (gen_rtx_SET (output, x));
+      input = output;
+
+      /* Shift left to clear the high order bits.  */
+      count += clz_hwi (INTVAL (operands[2])) % BITS_PER_WORD;
+      x = gen_rtx_ASHIFT (word_mode, input, GEN_INT (count));
+      output = gen_reg_rtx (word_mode);
+      emit_insn (gen_rtx_SET (output, x));
+      input = output;
+
+      /* And shift back right to put the bits into position.  */
+      count = clz_hwi (INTVAL (operands[2])) % BITS_PER_WORD;
+      x = gen_rtx_LSHIFTRT (word_mode, input, GEN_INT (count));
+      emit_insn (gen_rtx_SET (operands[0], x));
+      return true;
+    }
+
+  /* The special cases didn't apply.  It's entirely possible we may
+     want to combine some of the ideas above with bclr, but for now
+     those are deferred until we see them popping up in practice.  */
+
+  unsigned HOST_WIDE_INT ival = ~INTVAL (operands[2]);
+
+  /* Clear as many bits using andi as we can.  */
+  if ((ival & HOST_WIDE_INT_UC (0x7ff)) != 0x0)
+    {
+      ival &= ~HOST_WIDE_INT_UC (0x7ff);
+      budget--;
+    }
+
+  /* And handle remaining bits via bclr.  */
+  while (TARGET_ZBS && ival)
+    {
+      unsigned HOST_WIDE_INT tmpval = HOST_WIDE_INT_UC (1) << ctz_hwi (ival);
+      ival &= ~tmpval;
+      budget--;
+    }
+
+  /* If the remaining budget has gone to less than zero, it
+     forces the value into a register and performs the AND
+     operation.  It returns TRUE to the caller so the caller
+     knows code generation is complete.
+     FIXME: This is hacked to always be enabled until the last
+     patch in the series is enabled.  */
+  if (ival || budget < 0)
+    {
+      rtx x = force_reg (word_mode, operands[2]);
+      x = gen_rtx_AND (word_mode, operands[1], x);
+      emit_insn (gen_rtx_SET (operands[0], x));
+      return true;
+    }
+
+  /* Synthesis is better than loading the constant.  */
+  ival = ~INTVAL (operands[2]);
+  input = operands[1];
+
+  /* Clear any of the lower 11 bits we need.  */
+  if ((ival & HOST_WIDE_INT_UC (0x7ff)) != 0)
+    {
+      rtx x = GEN_INT (~(ival & HOST_WIDE_INT_UC (0x7ff)));
+      x = gen_rtx_AND (word_mode, input, x);
+      output = gen_reg_rtx (word_mode);
+      emit_insn (gen_rtx_SET (output, x));
+      input = output;
+      ival &= ~HOST_WIDE_INT_UC (0x7ff);
+    }
+
+  /* Clear the rest with bclr.  */
+  while (ival)
+    {
+      unsigned HOST_WIDE_INT tmpval = HOST_WIDE_INT_UC (1) << ctz_hwi (ival);
+      rtx x = GEN_INT (~tmpval);
+      x = gen_rtx_AND (word_mode, input, x);
+      output = gen_reg_rtx (word_mode);
+      emit_insn (gen_rtx_SET (output, x));
+      input = output;
+      ival &= ~tmpval;
+    }
+
+  emit_move_insn (operands[0], input);
+  return true;
+}
+
+/* Synthesize OPERANDS[0] = OPERANDS[1] + OPERANDS[2].
+
+    OPERANDS[0] and OPERANDS[1] will be a REG and may be the same
+    REG.
+
+    OPERANDS[2] is a CONST_INT.
+
+    Return TRUE if the operation was fully synthesized and the caller
+    need not generate additional code.  Return FALSE if the operation
+    was not synthesized and the caller is responsible for emitting the
+    proper sequence.  */
+
+bool
+synthesize_add (rtx operands[3])
+{
+  /* Trivial cases that don't need synthesis.  */
+  if (SMALL_OPERAND (INTVAL (operands[2])))
+    return false;
+
+  int budget1 = riscv_const_insns (operands[2], true);
+  int budget2 = riscv_const_insns (GEN_INT (-INTVAL (operands[2])), true);
+
+  HOST_WIDE_INT ival = INTVAL (operands[2]);
+
+  /* If we can emit two addi insns then that's better than synthesizing
+     the constant into a temporary, then adding the temporary to the
+     other input.  The exception is when the constant can be loaded
+     in a single instruction which can issue whenever its convenient.  */
+  if (SUM_OF_TWO_S12 (ival) && budget1 >= 2)
+    {
+      HOST_WIDE_INT saturated = HOST_WIDE_INT_M1U << (IMM_BITS - 1);
+
+      if (ival >= 0)
+	saturated = ~saturated;
+
+      ival -= saturated;
+
+      rtx x = gen_rtx_PLUS (word_mode, operands[1], GEN_INT (saturated));
+      emit_insn (gen_rtx_SET (operands[0], x));
+      rtx output = gen_rtx_PLUS (word_mode, operands[0], GEN_INT (ival));
+      emit_insn (gen_rtx_SET (operands[0], output));
+      return true;
+    }
+
+  /* If we can shift the constant by 1, 2, or 3 bit positions
+     and the result is a cheaper constant, then do so.  */
+  ival = INTVAL (operands[2]);
+  if (TARGET_ZBA
+      && (((ival % 2) == 0 && budget1
+	   > riscv_const_insns (GEN_INT (ival >> 1), true))
+	   || ((ival % 4) == 0 && budget1
+	       > riscv_const_insns (GEN_INT (ival >> 2), true))
+	   || ((ival % 8) == 0 && budget1
+	       > riscv_const_insns (GEN_INT (ival >> 3), true))))
+    {
+      // Load the shifted constant into a temporary
+      int shct = ctz_hwi (ival);
+
+      /* We can handle shifting up to 3 bit positions via shNadd.  */
+      if (shct > 3)
+	shct = 3;
+
+      /* The adjusted constant may still need synthesis, so do not copy
+	 it directly into register.  Let the expander handle it.  */
+      rtx tmp = force_reg (word_mode, GEN_INT (ival >> shct));
+
+      /* Generate shift-add of temporary and operands[1]
+	 into the final destination.  */
+      rtx x = gen_rtx_ASHIFT (word_mode, tmp, GEN_INT (shct));
+      rtx output = gen_rtx_PLUS (word_mode, x, operands[1]);
+      emit_insn (gen_rtx_SET (operands[0], output));
+      return true;
+    }
+
+  /* If the negated constant is cheaper than the original, then negate
+     the constant and use sub.  */
+  if (budget2 < budget1)
+    {
+      // load -INTVAL (operands[2]) into a temporary
+      rtx tmp = force_reg (word_mode, GEN_INT (-INTVAL (operands[2])));
+
+      // subtract operads[2] from operands[1]
+      rtx output = gen_rtx_MINUS (word_mode, operands[1], tmp);
+      emit_insn (gen_rtx_SET (operands[0], output));
+      return true;
+    }
+
+  /* No add synthesis was found.  Synthesize the constant into
+     a temporary and use that.  */
+  rtx x = force_reg (word_mode, operands[2]);
+  x = gen_rtx_PLUS (word_mode, operands[1], x);
+  emit_insn (gen_rtx_SET (operands[0], x));
+  return true;
+}
+
+/*  Synthesize OPERANDS[0] = OPERANDS[1] + OPERANDS[2].
+
+    For 32-bit object cases with a 64-bit target.
+
+    OPERANDS[0] and OPERANDS[1] will be a REG and may be the same
+    REG.
+
+    OPERANDS[2] is a CONST_INT.
+
+    Return TRUE if the operation was fully synthesized and the caller
+    need not generate additional code.  Return FALSE if the operation
+    was not synthesized and the caller is responsible for emitting the
+    proper sequence.  */
+
+
+bool
+synthesize_add_extended (rtx operands[3])
+{
+
+/*  If operands[2] is a 12-bit signed immediate,
+    no synthesis needs to be done.  */
+
+  if (SMALL_OPERAND (INTVAL (operands[2])))
+    return false;
+
+  HOST_WIDE_INT ival = INTVAL (operands[2]);
+  int budget1 = riscv_const_insns (operands[2], true);
+  int budget2 = riscv_const_insns (GEN_INT (-INTVAL (operands[2])), true);
+
+/*  If operands[2] can be split into two 12-bit signed immediates,
+    split add into two adds.  */
+
+  if (SUM_OF_TWO_S12 (ival))
+    {
+      HOST_WIDE_INT saturated = HOST_WIDE_INT_M1U << (IMM_BITS - 1);
+
+      if (ival >= 0)
+	saturated = ~saturated;
+
+      ival -= saturated;
+
+      rtx temp = gen_reg_rtx (DImode);
+      emit_insn (gen_addsi3_extended (temp, operands[1], GEN_INT (saturated)));
+      temp = gen_lowpart (SImode, temp);
+      SUBREG_PROMOTED_VAR_P (temp) = 1;
+      SUBREG_PROMOTED_SET (temp, SRP_SIGNED);
+      emit_insn (gen_rtx_SET (operands[0], temp));
+      rtx t = gen_reg_rtx (DImode);
+      emit_insn (gen_addsi3_extended (t, operands[0], GEN_INT (ival)));
+      t = gen_lowpart (SImode, t);
+      SUBREG_PROMOTED_VAR_P (t) = 1;
+      SUBREG_PROMOTED_SET (t, SRP_SIGNED);
+      emit_move_insn (operands[0], t);
+      return true;
+    }
+
+
+/*  If the negated value is cheaper to synthesize, subtract that from
+    operands[1]. */
+
+  if (budget2 < budget1)
+    {
+      rtx tmp = gen_reg_rtx (SImode);
+      emit_insn (gen_rtx_SET (tmp, GEN_INT (-INTVAL (operands[2]))));
+
+      rtx t = gen_reg_rtx (DImode);
+      emit_insn (gen_subsi3_extended (t, operands[1], tmp));
+      t = gen_lowpart (SImode, t);
+      SUBREG_PROMOTED_VAR_P (t) = 1;
+      SUBREG_PROMOTED_SET (t, SRP_SIGNED);
+      emit_move_insn (operands[0], t);
+      return true;
+    }
+
+  rtx tsrc = force_reg (SImode, operands[2]);
+  rtx tdest = gen_reg_rtx (DImode);
+  emit_insn (gen_addsi3_extended (tdest, operands[1], tsrc));
+  tdest = gen_lowpart (SImode, tdest);
+  SUBREG_PROMOTED_VAR_P (tdest) = 1;
+  SUBREG_PROMOTED_SET (tdest, SRP_SIGNED);
+  emit_move_insn (operands[0], tdest);
+  return true;
+
+}
+
+
+/*
+    HINT : argument specify the target cache
+
+    TODO : LOCALITY is unused.
+
+    Return the first operand of the associated PREF or PREFX insn.  */
+rtx
+riscv_prefetch_cookie (rtx hint, rtx locality)
+{
+  return (GEN_INT (INTVAL (hint)
+		   + CacheHint::DCACHE_HINT + INTVAL (locality) * 0));
+}
+
+/* Return true if X is a legitimate address with offset for prefetch.
+   MODE is the mode of the value being accessed.  */
+bool
+riscv_prefetch_offset_address_p (rtx x, machine_mode mode)
+{
+  struct riscv_address_info addr;
+
+  if (riscv_classify_address (&addr, x, mode, false)
+      && addr.type == ADDRESS_REG)
+    {
+      if (TARGET_XMIPSCBOP)
+	return (CONST_INT_P (addr.offset)
+		&& MIPS_RISCV_9BIT_OFFSET_P (INTVAL (addr.offset)));
+    }
+
+  return true;
+}
+
 /* Initialize the GCC target structure.  */
 #undef TARGET_ASM_ALIGNED_HI_OP
 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
@@ -14068,6 +15616,9 @@ bool need_shadow_stack_push_pop_p ()
 #undef  TARGET_SCHED_ADJUST_COST
 #define TARGET_SCHED_ADJUST_COST riscv_sched_adjust_cost
 
+#undef TARGET_SCHED_CAN_SPECULATE_INSN
+#define TARGET_SCHED_CAN_SPECULATE_INSN riscv_sched_can_speculate_insn
+
 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
 #define TARGET_FUNCTION_OK_FOR_SIBCALL riscv_function_ok_for_sibcall
 
@@ -14394,6 +15945,12 @@ bool need_shadow_stack_push_pop_p ()
 #undef TARGET_VECTOR_MODE_SUPPORTED_ANY_TARGET_P
 #define TARGET_VECTOR_MODE_SUPPORTED_ANY_TARGET_P riscv_vector_mode_supported_any_target_p
 
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE riscv_function_value
+
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE riscv_libcall_value
+
 #undef TARGET_FUNCTION_VALUE_REGNO_P
 #define TARGET_FUNCTION_VALUE_REGNO_P riscv_function_value_regno_p
 
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 2759a4c..9146571 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -43,7 +43,7 @@ along with GCC; see the file COPYING3.  If not see
 #endif
 
 #ifndef RISCV_TUNE_STRING_DEFAULT
-#define RISCV_TUNE_STRING_DEFAULT "rocket"
+#define RISCV_TUNE_STRING_DEFAULT "generic"
 #endif
 
 extern const char *riscv_expand_arch (int argc, const char **argv);
@@ -71,7 +71,7 @@ extern const char *riscv_arch_help (int argc, const char **argv);
   {"tune", "%{!mtune=*:"						\
 	   "  %{!mcpu=*:-mtune=%(VALUE)}"				\
 	   "  %{mcpu=*:-mtune=%:riscv_default_mtune(%* %(VALUE))}}" },	\
-  {"arch", "%{!march=*:"						\
+  {"arch", "%{!march=*|march=unset:"					\
 	   "  %{!mcpu=*:-march=%(VALUE)}"				\
 	   "  %{mcpu=*:%:riscv_expand_arch_from_cpu(%* %(VALUE))}}" },	\
   {"abi", "%{!mabi=*:-mabi=%(VALUE)}" },				\
@@ -111,13 +111,19 @@ extern const char *riscv_arch_help (int argc, const char **argv);
 %(subtarget_asm_spec)" \
 ASM_MISA_SPEC
 
+/* Drop all -march=* options before -march=unset.  */
+#define ARCH_UNSET_CLEANUP_SPECS  \
+  "%{march=unset:%<march=*} "  \
+
 #undef DRIVER_SELF_SPECS
 #define DRIVER_SELF_SPECS					\
+ARCH_UNSET_CLEANUP_SPECS \
 "%{march=help:%:riscv_arch_help()} "				\
 "%{print-supported-extensions:%:riscv_arch_help()} "		\
 "%{-print-supported-extensions:%:riscv_arch_help()} "		\
 "%{march=*:%:riscv_expand_arch(%*)} "				\
-"%{!march=*:%{mcpu=*:%:riscv_expand_arch_from_cpu(%*)}} "
+"%{!march=*|march=unset:%{mcpu=*:%:riscv_expand_arch_from_cpu(%*)}} " \
+"%{march=unset:%{!mcpu=*:%eAt least one valid -mcpu option must be given after -march=unset}} "
 
 #define LOCAL_LABEL_PREFIX	"."
 #define USER_LABEL_PREFIX	""
@@ -759,12 +765,6 @@ enum reg_class
 
 #define CALLEE_SAVED_FREG_NUMBER(REGNO) CALLEE_SAVED_REG_NUMBER (REGNO - 32)
 
-#define LIBCALL_VALUE(MODE) \
-  riscv_function_value (NULL_TREE, NULL_TREE, MODE)
-
-#define FUNCTION_VALUE(VALTYPE, FUNC) \
-  riscv_function_value (VALTYPE, FUNC, VOIDmode)
-
 /* 1 if N is a possible register number for function argument passing.
    We have no FP argument registers when soft-float.  */
 
@@ -1319,4 +1319,15 @@ extern void riscv_remove_unneeded_save_restore_calls (void);
 
 #define TARGET_HAS_FMV_TARGET_ATTRIBUTE 0
 
+/* mips pref valid offset range.  */
+#define MIPS_RISCV_9BIT_OFFSET_P(OFFSET) (IN_RANGE (OFFSET, 0, 511))
+
+/* mips pref cache hint type.  */
+typedef enum {
+    ICACHE_HINT = 0 << 3,
+    DCACHE_HINT = 1 << 3,
+    SCACHE_HINT = 2 << 3,
+    TCACHE_HINT = 3 << 3
+} CacheHint;
+
 #endif /* ! GCC_RISCV_H */
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 26a247c..d34405c 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -120,7 +120,7 @@
   ;; Interrupt handler instructions.
   UNSPECV_MRET
   UNSPECV_SRET
-  UNSPECV_URET
+  UNSPECV_MNRET
 
   ;; Blockage and synchronization.
   UNSPECV_BLOCKAGE
@@ -495,6 +495,8 @@
 ;; SiFive custom extension instrctions
 ;; sf_vqmacc      vector matrix integer multiply-add instructions
 ;; sf_vfnrclip     vector fp32 to int8 ranged clip instructions
+;; sf_vc vector coprocessor interface without side effect
+;; sf_vc_se vector coprocessor interface with side effect
 (define_attr "type"
   "unknown,branch,jump,jalr,ret,call,load,fpload,store,fpstore,
    mtc,mfc,const,arith,logical,shift,slt,imul,idiv,move,fmove,fadd,fmul,
@@ -516,7 +518,8 @@
    vslideup,vslidedown,vislide1up,vislide1down,vfslide1up,vfslide1down,
    vgather,vcompress,vmov,vector,vandn,vbrev,vbrev8,vrev8,vclz,vctz,vcpop,vrol,vror,vwsll,
    vclmul,vclmulh,vghsh,vgmul,vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,vaesz,
-   vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,vfncvtbf16,vfwcvtbf16,vfwmaccbf16"
+   vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,vfncvtbf16,vfwcvtbf16,vfwmaccbf16,
+   sf_vc,sf_vc_se"
   (cond [(eq_attr "got" "load") (const_string "load")
 
 	 ;; If a doubleword move uses these expensive instructions,
@@ -669,7 +672,7 @@
 ;; Microarchitectures we know how to tune for.
 ;; Keep this in sync with enum riscv_microarchitecture.
 (define_attr "tune"
-  "generic,sifive_7,sifive_p400,sifive_p600,xiangshan,generic_ooo"
+  "generic,sifive_7,sifive_p400,sifive_p600,xiangshan,generic_ooo,mips_p8700"
   (const (symbol_ref "((enum attr_tune) riscv_microarchitecture)")))
 
 ;; Describe a user's asm statement.
@@ -709,24 +712,45 @@
    (set_attr "mode" "SI")])
 
 (define_expand "addsi3"
-  [(set (match_operand:SI          0 "register_operand" "=r,r")
-	(plus:SI (match_operand:SI 1 "register_operand" " r,r")
-		 (match_operand:SI 2 "arith_operand"    " r,I")))]
+  [(set (match_operand:SI          0 "register_operand")
+	(plus:SI (match_operand:SI 1 "register_operand")
+		 (match_operand:SI 2 "reg_or_const_int_operand")))]
   ""
 {
+  /* We may be able to find a faster sequence, if so, then we are
+     done.  Otherwise let expansion continue normally.  */
+  if (CONST_INT_P (operands[2])
+      && ((!TARGET_64BIT && synthesize_add (operands))
+	  || (TARGET_64BIT && synthesize_add_extended (operands))))
+    DONE;
+
+  /* Constants have already been handled already.  */
   if (TARGET_64BIT)
     {
-      rtx t = gen_reg_rtx (DImode);
-      emit_insn (gen_addsi3_extended (t, operands[1], operands[2]));
-      t = gen_lowpart (SImode, t);
-      SUBREG_PROMOTED_VAR_P (t) = 1;
-      SUBREG_PROMOTED_SET (t, SRP_SIGNED);
-      emit_move_insn (operands[0], t);
+      rtx tdest = gen_reg_rtx (DImode);
+      emit_insn (gen_addsi3_extended (tdest, operands[1], operands[2]));
+      tdest = gen_lowpart (SImode, tdest);
+      SUBREG_PROMOTED_VAR_P (tdest) = 1;
+      SUBREG_PROMOTED_SET (tdest, SRP_SIGNED);
+      emit_move_insn (operands[0], tdest);
       DONE;
     }
+
+})
+
+(define_expand "adddi3"
+  [(set (match_operand:DI          0 "register_operand")
+	(plus:DI (match_operand:DI 1 "register_operand")
+		 (match_operand:DI 2 "reg_or_const_int_operand")))]
+  "TARGET_64BIT"
+{
+  /* We may be able to find a faster sequence, if so, then we are
+     done.  Otherwise let expansion continue normally.  */
+  if (CONST_INT_P (operands[2]) && synthesize_add (operands))
+    DONE;
 })
 
-(define_insn "adddi3"
+(define_insn "*adddi3"
   [(set (match_operand:DI          0 "register_operand" "=r,r")
 	(plus:DI (match_operand:DI 1 "register_operand" " r,r")
 		 (match_operand:DI 2 "arith_operand"    " r,I")))]
@@ -789,7 +813,7 @@
       rtx t5 = gen_reg_rtx (DImode);
       rtx t6 = gen_reg_rtx (DImode);
 
-      riscv_emit_binary (PLUS, operands[0], operands[1], operands[2]);
+      emit_insn (gen_addsi3_extended (t6, operands[1], operands[2]));
       if (GET_CODE (operands[1]) != CONST_INT)
 	emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
       else
@@ -799,7 +823,10 @@
       else
 	t5 = operands[2];
       emit_insn (gen_adddi3 (t3, t4, t5));
-      emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0));
+      rtx t7 = gen_lowpart (SImode, t6);
+      SUBREG_PROMOTED_VAR_P (t7) = 1;
+      SUBREG_PROMOTED_SET (t7, SRP_SIGNED);
+      emit_move_insn (operands[0], t7);
 
       riscv_expand_conditional_branch (operands[3], NE, t6, t3);
     }
@@ -835,8 +862,11 @@
 	emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0));
       else
 	t3 = operands[1];
-      riscv_emit_binary (PLUS, operands[0], operands[1], operands[2]);
-      emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0));
+      emit_insn (gen_addsi3_extended (t4, operands[1], operands[2]));
+      rtx t5 = gen_lowpart (SImode, t4);
+      SUBREG_PROMOTED_VAR_P (t5) = 1;
+      SUBREG_PROMOTED_SET (t5, SRP_SIGNED);
+      emit_move_insn (operands[0], t5);
 
       riscv_expand_conditional_branch (operands[3], LTU, t4, t3);
     }
@@ -875,7 +905,7 @@
 ;; Where C1 is not a LUI operand, but ~C1 is a LUI operand
 
 (define_insn_and_split "*lui_constraint<X:mode>_and_to_or"
-	[(set (match_operand:X 0 "register_operand" "=r")
+  [(set (match_operand:X 0 "register_operand" "=r")
 	(plus:X (and:X (match_operand:X 1 "register_operand" "r")
 		       (match_operand 2 "const_int_operand"))
 		(match_operand 3 "const_int_operand")))
@@ -889,13 +919,21 @@
 	<= riscv_const_insns (operands[3], false)))"
   "#"
   "&& reload_completed"
-  [(set (match_dup 4) (match_dup 5))
-   (set (match_dup 0) (ior:X (match_dup 1) (match_dup 4)))
-   (set (match_dup 4) (match_dup 6))
-   (set (match_dup 0) (minus:X (match_dup 0) (match_dup 4)))]
+  [(const_int 0)]
   {
     operands[5] = GEN_INT (~INTVAL (operands[2]));
     operands[6] = GEN_INT ((~INTVAL (operands[2])) | (-INTVAL (operands[3])));
+
+    /* This is always a LUI operand, so it's safe to just emit.  */
+    emit_move_insn (operands[4], operands[5]);
+
+    rtx x = gen_rtx_IOR (word_mode, operands[1], operands[4]);
+    emit_move_insn (operands[0], x);
+
+    /* This may require multiple steps to synthesize.  */
+    riscv_emit_move (operands[4], operands[6]);
+    x = gen_rtx_MINUS (word_mode, operands[0], operands[4]);
+    emit_move_insn (operands[0], x);
   }
   [(set_attr "type" "arith")])
 
@@ -966,7 +1004,7 @@
       rtx t5 = gen_reg_rtx (DImode);
       rtx t6 = gen_reg_rtx (DImode);
 
-      riscv_emit_binary (MINUS, operands[0], operands[1], operands[2]);
+      emit_insn (gen_subsi3_extended (t6, operands[1], operands[2]));
       if (GET_CODE (operands[1]) != CONST_INT)
 	emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
       else
@@ -976,7 +1014,10 @@
       else
 	t5 = operands[2];
       emit_insn (gen_subdi3 (t3, t4, t5));
-      emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0));
+      rtx t7 = gen_lowpart (SImode, t6);
+      SUBREG_PROMOTED_VAR_P (t7) = 1;
+      SUBREG_PROMOTED_SET (t7, SRP_SIGNED);
+      emit_move_insn (operands[0], t7);
 
       riscv_expand_conditional_branch (operands[3], NE, t6, t3);
     }
@@ -1015,8 +1056,11 @@
 	emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0));
       else
 	t3 = operands[1];
-      riscv_emit_binary (MINUS, operands[0], operands[1], operands[2]);
-      emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0));
+      emit_insn (gen_subsi3_extended (t4, operands[1], operands[2]));
+      rtx t5 = gen_lowpart (SImode, t4);
+      SUBREG_PROMOTED_VAR_P (t5) = 1;
+      SUBREG_PROMOTED_SET (t5, SRP_SIGNED);
+      emit_move_insn (operands[0], t5);
 
       riscv_expand_conditional_branch (operands[3], LTU, t3, t4);
     }
@@ -1709,26 +1753,11 @@
 (define_expand "and<mode>3"
   [(set (match_operand:X                0 "register_operand")
         (and:X (match_operand:X 1 "register_operand")
-	       (match_operand:X 2 "arith_or_mode_mask_or_zbs_operand")))]
+	       (match_operand:X 2 "reg_or_const_int_operand")))]
   ""
 {
-  /* If the second operand is a mode mask, emit an extension
-     insn instead.  */
-  if (CONST_INT_P (operands[2]))
-    {
-      enum machine_mode tmode = VOIDmode;
-      if (UINTVAL (operands[2]) == GET_MODE_MASK (HImode))
-	tmode = HImode;
-      else if (UINTVAL (operands[2]) == GET_MODE_MASK (SImode))
-	tmode = SImode;
-
-      if (tmode != VOIDmode)
-	{
-	  rtx tmp = gen_lowpart (tmode, operands[1]);
-	  emit_insn (gen_extend_insn (operands[0], tmp, <MODE>mode, tmode, 1));
-	  DONE;
-	}
-    }
+  if (CONST_INT_P (operands[2]) && synthesize_and (operands))
+    DONE;
 })
 
 (define_insn "*and<mode>3"
@@ -1752,8 +1781,15 @@
 (define_expand "<optab><mode>3"
   [(set (match_operand:X 0 "register_operand")
 	(any_or:X (match_operand:X 1 "register_operand" "")
-		   (match_operand:X 2 "arith_or_zbs_operand" "")))]
-  "")
+		   (match_operand:X 2 "reg_or_const_int_operand" "")))]
+  ""
+
+{
+  /* If synthesis of the logical op is successful, then no further code
+     generation is necessary.  Else just generate code normally.  */
+  if (CONST_INT_P (operands[2]) && synthesize_ior_xor (<OPTAB>, operands))
+    DONE;
+})
 
 (define_insn "*<optab><mode>3"
   [(set (match_operand:X                0 "register_operand" "=r,r")
@@ -2278,12 +2314,16 @@
       rtx abs_reg = gen_reg_rtx (<ANYF:MODE>mode);
       rtx coeff_reg = gen_reg_rtx (<ANYF:MODE>mode);
       rtx tmp_reg = gen_reg_rtx (<ANYF:MODE>mode);
+      rtx fflags = gen_reg_rtx (SImode);
 
       riscv_emit_move (tmp_reg, operands[1]);
       riscv_emit_move (coeff_reg,
 		       riscv_vector::get_fp_rounding_coefficient (<ANYF:MODE>mode));
       emit_insn (gen_abs<ANYF:mode>2 (abs_reg, operands[1]));
 
+      /* fp compare can set invalid flag for NaN, so backup fflags.  */
+      if (flag_trapping_math)
+        emit_insn (gen_riscv_frflags (fflags));
       riscv_expand_conditional_branch (label, LT, abs_reg, coeff_reg);
 
       emit_jump_insn (gen_jump (end_label));
@@ -2309,6 +2349,14 @@
       emit_insn (gen_copysign<ANYF:mode>3 (tmp_reg, abs_reg, operands[1]));
 
       emit_label (end_label);
+
+      /* Restore fflags, but after label.  This is slightly different
+         than glibc implementation which only needs to restore under
+         the label, since it checks for NaN first, meaning following fp
+         compare can't raise fp exceptons and thus not clobber fflags.  */
+      if (flag_trapping_math)
+        emit_insn (gen_riscv_fsflags (fflags));
+
       riscv_emit_move (operands[0], tmp_reg);
     }
 
@@ -2494,8 +2542,8 @@
 })
 
 (define_insn "*movdi_32bit"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,m,  *f,*f,*r,*f,*m,r")
-	(match_operand:DI 1 "move_operand"         " r,i,m,r,*J*r,*m,*f,*f,*f,vp"))]
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r, m,  *f,*f,*r,*f,*m,r")
+	(match_operand:DI 1 "move_operand"         " r,i,m,rJ,*J*r,*m,*f,*f,*f,vp"))]
   "!TARGET_64BIT
    && (register_operand (operands[0], DImode)
        || reg_or_0_operand (operands[1], DImode))"
@@ -2922,7 +2970,7 @@
   [(set_attr "type" "shift")
    (set_attr "mode" "DI")])
 
-(define_insn_and_split "*<optab><GPR:mode>3_mask_1"
+(define_insn "*<optab><GPR:mode>3_mask_1"
   [(set (match_operand:GPR     0 "register_operand" "= r")
 	(any_shift:GPR
 	    (match_operand:GPR 1 "register_operand" "  r")
@@ -2931,12 +2979,14 @@
 	       (match_operand:GPR2 2 "register_operand"  "r")
 	       (match_operand 3 "<GPR:shiftm1>"))])))]
   ""
-  "#"
-  "&& 1"
-  [(set (match_dup 0)
-	(any_shift:GPR (match_dup 1)
-		      (match_dup 2)))]
-  "operands[2] = gen_lowpart (QImode, operands[2]);"
+{
+  /* If the shift mode is not word mode, then it must be the
+     case that we're generating rv64 code, but this is a 32-bit
+     operation.  Thus we need to use the "w" variant.  */
+  if (E_<GPR:MODE>mode != word_mode)
+    return "<insn>w\t%0,%1,%2";
+  return "<insn>\t%0,%1,%2";
+}
   [(set_attr "type" "shift")
    (set_attr "mode" "<GPR:MODE>")])
 
@@ -2955,7 +3005,7 @@
   [(set_attr "type" "shift")
    (set_attr "mode" "SI")])
 
-(define_insn_and_split "*<optab>si3_extend_mask"
+(define_insn "*<optab>si3_extend_mask"
   [(set (match_operand:DI                   0 "register_operand" "= r")
 	(sign_extend:DI
 	    (any_shift:SI
@@ -2965,13 +3015,7 @@
 	        (match_operand:GPR 2 "register_operand" " r")
 	        (match_operand 3 "const_si_mask_operand"))]))))]
   "TARGET_64BIT"
-  "#"
-  "&& 1"
-  [(set (match_dup 0)
-	(sign_extend:DI
-	 (any_shift:SI (match_dup 1)
-		       (match_dup 2))))]
-  "operands[2] = gen_lowpart (QImode, operands[2]);"
+  "<insn>w\t%0,%1,%2"
   [(set_attr "type" "shift")
    (set_attr "mode" "SI")])
 
@@ -3169,15 +3213,25 @@
   "#"
   "&& reload_completed"
   [(set (match_dup 4) (lshiftrt:X (subreg:X (match_dup 2) 0) (match_dup 6)))
-   (set (match_dup 4) (and:X (match_dup 4) (match_dup 7)))
+   (set (match_dup 4) (match_dup 8))
    (set (pc) (if_then_else (match_op_dup 1 [(match_dup 4) (const_int 0)])
 			   (label_ref (match_dup 0)) (pc)))]
 {
-	HOST_WIDE_INT mask = INTVAL (operands[3]);
-	int trailing = ctz_hwi (mask);
+  HOST_WIDE_INT mask = INTVAL (operands[3]);
+  int trailing = ctz_hwi (mask);
 
-	operands[6] = GEN_INT (trailing);
-	operands[7] = GEN_INT (mask >> trailing);
+  operands[6] = GEN_INT (trailing);
+  operands[7] = GEN_INT (mask >> trailing);
+
+  /* This splits after reload, so there's little chance to clean things
+     up.  Rather than emit a ton of RTL here, we can just make a new
+     operand for that RHS and use it.  For the case where the AND would
+     have been redundant, we can make it a NOP move, which does get
+     cleaned up.  */
+  if (operands[7] == CONSTM1_RTX (word_mode))
+    operands[8] = operands[4];
+  else
+    operands[8] = gen_rtx_AND (word_mode, operands[4], operands[7]);
 }
 [(set_attr "type" "branch")])
 
@@ -3277,7 +3331,7 @@
 			  (match_operand:GPR 2 "movcc_operand")
 			  (match_operand:GPR 3 "movcc_operand")))]
   "TARGET_SFB_ALU || TARGET_XTHEADCONDMOV || TARGET_ZICOND_LIKE
-   || TARGET_MOVCC"
+   || TARGET_MOVCC || TARGET_XMIPSCMOV"
 {
   if (riscv_expand_conditional_move (operands[0], operands[1],
 				     operands[2], operands[3]))
@@ -4145,11 +4199,11 @@
   "sret"
   [(set_attr "type" "ret")])
 
-(define_insn "riscv_uret"
+(define_insn "riscv_mnret"
   [(return)
-   (unspec_volatile [(const_int 0)] UNSPECV_URET)]
-  ""
-  "uret"
+   (unspec_volatile [(const_int 0)] UNSPECV_MNRET)]
+  "TARGET_SMRNMI"
+  "mnret"
   [(set_attr "type" "ret")])
 
 (define_insn "stack_tie<mode>"
@@ -4381,11 +4435,21 @@
 )
 
 (define_insn "prefetch"
-  [(prefetch (match_operand 0 "address_operand" "r")
-             (match_operand 1 "imm5_operand" "i")
-             (match_operand 2 "const_int_operand" "n"))]
-  "TARGET_ZICBOP"
+  [(prefetch (match_operand 0 "prefetch_operand" "Qr,ZD")
+	     (match_operand 1 "imm5_operand" "i,i")
+	     (match_operand 2 "const_int_operand" "n,n"))]
+  "TARGET_ZICBOP || TARGET_XMIPSCBOP"
 {
+  if (TARGET_XMIPSCBOP)
+    {
+      /* Mips Prefetch write is nop for p8700.  */
+      if (operands[1] != CONST0_RTX (GET_MODE (operands[1])))
+	return "nop";
+
+      operands[1] = riscv_prefetch_cookie (operands[1], operands[2]);
+      return "mips.pref\t%1,%a0";
+    }
+
   switch (INTVAL (operands[1]))
   {
     case 0:
@@ -4401,7 +4465,7 @@
 				      (const_string "4")))])
 
 (define_insn "riscv_prefetchi_<mode>"
-  [(unspec_volatile:X [(match_operand:X 0 "address_operand" "r")
+  [(unspec_volatile:X [(match_operand:X 0 "prefetch_operand" "Q")
               (match_operand:X 1 "imm5_operand" "i")]
               UNSPECV_PREI)]
   "TARGET_ZICBOP"
@@ -4613,6 +4677,17 @@
   }
 )
 
+(define_expand "usmul<mode>3"
+  [(match_operand:ANYI 0 "register_operand")
+   (match_operand:ANYI 1 "register_operand")
+   (match_operand:ANYI 2 "register_operand")]
+  ""
+  {
+    riscv_expand_usmul (operands[0], operands[1], operands[2]);
+    DONE;
+  }
+)
+
 (define_expand "ustrunc<mode><anyi_double_truncated>2"
   [(match_operand:<ANYI_DOUBLE_TRUNCATED> 0 "register_operand")
    (match_operand:ANYI_DOUBLE_TRUNC       1 "register_operand")]
@@ -4691,23 +4766,38 @@
 			    (match_operand 2 "const_int_operand" "n"))
 		 (match_operand 3 "const_int_operand" "n")))
    (clobber (match_scratch:DI 4 "=&r"))]
-  "(TARGET_64BIT && riscv_const_insns (operands[3], false) == 1)"
+  "(TARGET_64BIT
+    && riscv_const_insns (operands[3], false) == 1
+    && riscv_const_insns (GEN_INT (INTVAL (operands[3])
+			  << INTVAL (operands[2])), false) != 1)"
   "#"
   "&& reload_completed"
   [(const_int 0)]
   "{
-     rtx x = gen_rtx_ASHIFT (DImode, operands[1], operands[2]);
-     emit_insn (gen_rtx_SET (operands[0], x));
-
-     /* If the constant fits in a simm12, use it directly as we do not
-	get another good chance to optimize things again.  */
-     if (!SMALL_OPERAND (INTVAL (operands[3])))
+     /* Prefer to generate shNadd when we can, even over using an
+	immediate form.  If we're not going to be able to generate
+	a shNadd, then use the constant directly if it fits in a
+	simm12 field since we won't get another chance to optimize this.  */
+     if ((TARGET_ZBA && imm123_operand (operands[2], word_mode))
+	 || !SMALL_OPERAND (INTVAL (operands[3])))
        emit_move_insn (operands[4], operands[3]);
      else
        operands[4] = operands[3];
 
-     x = gen_rtx_PLUS (DImode, operands[0], operands[4]);
-     emit_insn (gen_rtx_SET (operands[0], x));
+     if (TARGET_ZBA && imm123_operand (operands[2], word_mode))
+       {
+	 rtx x = gen_rtx_ASHIFT (DImode, operands[1], operands[2]);
+	 x = gen_rtx_PLUS (DImode, x, operands[4]);
+	 emit_insn (gen_rtx_SET (operands[0], x));
+       }
+     else
+       {
+	 rtx x = gen_rtx_ASHIFT (DImode, operands[1], operands[2]);
+	 emit_insn (gen_rtx_SET (operands[0], x));
+	 x = gen_rtx_PLUS (DImode, operands[0], operands[4]);
+	 emit_insn (gen_rtx_SET (operands[0], x));
+       }
+
      DONE;
    }"
   [(set_attr "type" "arith")])
@@ -4806,6 +4896,25 @@
   [(set_attr "type" "move")
    (set_attr "mode" "<MODE>")])
 
+;; If we're trying to create 0 or 2^n-1 based on the result of
+;; a test such as (lt (reg) (const_int 0)), we'll see a splat of
+;; the sign bit across a GPR using srai, then a logical and to
+;; mask off high bits.  We can replace the logical and with
+;; a logical right shift which works without constant synthesis
+;; for larger constants.
+(define_split
+  [(set (match_operand:X 0 "register_operand")
+	(and:X (ashiftrt:X (match_operand:X 1 "register_operand")
+			   (match_operand 2 "const_int_operand"))
+	       (match_operand 3 "const_int_operand")))]
+  "(INTVAL (operands[2]) == BITS_PER_WORD - 1
+    && exact_log2 (INTVAL (operands[3]) + 1) >= 0)"
+  [(set (match_dup 0) (ashiftrt:X (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (lshiftrt:X (match_dup 0) (match_dup 3)))]
+  { operands[3] = GEN_INT (BITS_PER_WORD
+			   - exact_log2 (INTVAL (operands[3]) + 1)); })
+
+;; Standard extensions and pattern for optimization
 (include "bitmanip.md")
 (include "crypto.md")
 (include "sync.md")
@@ -4813,18 +4922,22 @@
 (include "sync-ztso.md")
 (include "peephole.md")
 (include "pic.md")
-(include "generic.md")
-(include "sifive-7.md")
-(include "sifive-p400.md")
-(include "sifive-p600.md")
-(include "thead.md")
-(include "generic-vector-ooo.md")
-(include "generic-ooo.md")
 (include "vector.md")
 (include "vector-crypto.md")
 (include "vector-bfloat16.md")
 (include "zicond.md")
+(include "mips-insn.md")
 (include "sfb.md")
 (include "zc.md")
+;; Vendor extensions
+(include "thead.md")
 (include "corev.md")
+;; Pipeline models
+(include "generic.md")
 (include "xiangshan.md")
+(include "mips-p8700.md")
+(include "sifive-7.md")
+(include "sifive-p400.md")
+(include "sifive-p600.md")
+(include "generic-vector-ooo.md")
+(include "generic-ooo.md")
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index 7515c8e..6543fd1 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -168,23 +168,14 @@ momit-leaf-frame-pointer
 Target Mask(OMIT_LEAF_FRAME_POINTER) Save
 Omit the frame pointer in leaf functions.
 
-Mask(64BIT)
-
-Mask(MUL)
-
-Mask(ATOMIC)
-
-Mask(HARD_FLOAT)
-
-Mask(DOUBLE_FLOAT)
-
-Mask(RVC)
+TargetVariable
+int riscv_isa_flags
 
-Mask(RVE)
+Mask(64BIT) Var(riscv_isa_flags)
 
-Mask(VECTOR)
+Mask(VECTOR) Var(riscv_isa_flags)
 
-Mask(FULL_V)
+Mask(FULL_V) Var(riscv_isa_flags)
 
 mriscv-attribute
 Target Var(riscv_emit_attribute_p) Init(-1)
@@ -233,93 +224,6 @@ TargetVariable
 long riscv_stack_protector_guard_offset = 0
 
 TargetVariable
-int riscv_zi_subext
-
-Mask(ZICSR)       Var(riscv_zi_subext)
-
-Mask(ZIFENCEI)    Var(riscv_zi_subext)
-
-Mask(ZIHINTNTL)   Var(riscv_zi_subext)
-
-Mask(ZIHINTPAUSE) Var(riscv_zi_subext)
-
-Mask(ZICOND)      Var(riscv_zi_subext)
-
-Mask(ZICCAMOA)    Var(riscv_zi_subext)
-
-Mask(ZICCIF)      Var(riscv_zi_subext)
-
-Mask(ZICCLSM)     Var(riscv_zi_subext)
-
-Mask(ZICCRSE)     Var(riscv_zi_subext)
-
-Mask(ZICFISS)     Var(riscv_zi_subext)
-
-Mask(ZICFILP)     Var(riscv_zi_subext)
-
-TargetVariable
-int riscv_za_subext
-
-Mask(ZAWRS)  Var(riscv_za_subext)
-
-Mask(ZAAMO)  Var(riscv_za_subext)
-
-Mask(ZALRSC) Var(riscv_za_subext)
-
-Mask(ZABHA) Var(riscv_za_subext)
-
-Mask(ZACAS) Var(riscv_za_subext)
-
-Mask(ZA64RS)  Var(riscv_za_subext)
-
-Mask(ZA128RS) Var(riscv_za_subext)
-
-TargetVariable
-int riscv_zb_subext
-
-Mask(ZBA) Var(riscv_zb_subext)
-
-Mask(ZBB) Var(riscv_zb_subext)
-
-Mask(ZBC) Var(riscv_zb_subext)
-
-Mask(ZBS) Var(riscv_zb_subext)
-
-TargetVariable
-int riscv_zinx_subext
-
-Mask(ZFINX)    Var(riscv_zinx_subext)
-
-Mask(ZDINX)    Var(riscv_zinx_subext)
-
-Mask(ZHINX)    Var(riscv_zinx_subext)
-
-Mask(ZHINXMIN) Var(riscv_zinx_subext)
-
-TargetVariable
-int riscv_zk_subext
-
-Mask(ZBKB)  Var(riscv_zk_subext)
-
-Mask(ZBKC)  Var(riscv_zk_subext)
-
-Mask(ZBKX)  Var(riscv_zk_subext)
-
-Mask(ZKNE)  Var(riscv_zk_subext)
-
-Mask(ZKND)  Var(riscv_zk_subext)
-
-Mask(ZKNH)  Var(riscv_zk_subext)
-
-Mask(ZKR)   Var(riscv_zk_subext)
-
-Mask(ZKSED) Var(riscv_zk_subext)
-
-Mask(ZKSH)  Var(riscv_zk_subext)
-
-Mask(ZKT)   Var(riscv_zk_subext)
-
-TargetVariable
 int riscv_vector_elen_flags
 
 Mask(VECTOR_ELEN_32)    Var(riscv_vector_elen_flags)
@@ -335,207 +239,6 @@ Mask(VECTOR_ELEN_FP_16) Var(riscv_vector_elen_flags)
 Mask(VECTOR_ELEN_BF_16) Var(riscv_vector_elen_flags)
 
 TargetVariable
-int riscv_zvl_flags
-
-Mask(ZVL32B)    Var(riscv_zvl_flags)
-
-Mask(ZVL64B)    Var(riscv_zvl_flags)
-
-Mask(ZVL128B)   Var(riscv_zvl_flags)
-
-Mask(ZVL256B)   Var(riscv_zvl_flags)
-
-Mask(ZVL512B)   Var(riscv_zvl_flags)
-
-Mask(ZVL1024B)  Var(riscv_zvl_flags)
-
-Mask(ZVL2048B)  Var(riscv_zvl_flags)
-
-Mask(ZVL4096B)  Var(riscv_zvl_flags)
-
-Mask(ZVL8192B)  Var(riscv_zvl_flags)
-
-Mask(ZVL16384B) Var(riscv_zvl_flags)
-
-Mask(ZVL32768B) Var(riscv_zvl_flags)
-
-Mask(ZVL65536B) Var(riscv_zvl_flags)
-
-TargetVariable
-int riscv_zvb_subext
-
-Mask(ZVBB) Var(riscv_zvb_subext)
-
-Mask(ZVBC) Var(riscv_zvb_subext)
-
-Mask(ZVKB) Var(riscv_zvb_subext)
-
-TargetVariable
-int riscv_zvk_subext
-
-Mask(ZVKG)   Var(riscv_zvk_subext)
-
-Mask(ZVKNED) Var(riscv_zvk_subext)
-
-Mask(ZVKNHA) Var(riscv_zvk_subext)
-
-Mask(ZVKNHB) Var(riscv_zvk_subext)
-
-Mask(ZVKSED) Var(riscv_zvk_subext)
-
-Mask(ZVKSH)  Var(riscv_zvk_subext)
-
-Mask(ZVKN)   Var(riscv_zvk_subext)
-
-Mask(ZVKNC)  Var(riscv_zvk_subext)
-
-Mask(ZVKNG)  Var(riscv_zvk_subext)
-
-Mask(ZVKS)   Var(riscv_zvk_subext)
-
-Mask(ZVKSC)  Var(riscv_zvk_subext)
-
-Mask(ZVKSG)  Var(riscv_zvk_subext)
-
-Mask(ZVKT)   Var(riscv_zvk_subext)
-
-TargetVariable
-int riscv_zicmo_subext
-
-Mask(ZICBOZ) Var(riscv_zicmo_subext)
-
-Mask(ZICBOM) Var(riscv_zicmo_subext)
-
-Mask(ZICBOP) Var(riscv_zicmo_subext)
-
-Mask(ZIC64B) Var(riscv_zicmo_subext)
-
-TargetVariable
-int riscv_mop_subext
-
-Mask(ZIMOP) Var(riscv_mop_subext)
-
-Mask(ZCMOP) Var(riscv_mop_subext)
-
-TargetVariable
-int riscv_zf_subext
-
-Mask(ZFBFMIN)  Var(riscv_zf_subext)
-
-Mask(ZFHMIN)  Var(riscv_zf_subext)
-
-Mask(ZFH)     Var(riscv_zf_subext)
-
-Mask(ZVFBFMIN) Var(riscv_zf_subext)
-
-Mask(ZVFBFWMA) Var(riscv_zf_subext)
-
-Mask(ZVFHMIN) Var(riscv_zf_subext)
-
-Mask(ZVFH)    Var(riscv_zf_subext)
-
-TargetVariable
-int riscv_zfa_subext
-
-Mask(ZFA) Var(riscv_zfa_subext)
-
-TargetVariable
-int riscv_zm_subext
-
-Mask(ZMMUL) Var(riscv_zm_subext)
-
-TargetVariable
-int riscv_zc_subext
-
-Mask(ZCA)  Var(riscv_zc_subext)
-
-Mask(ZCB)  Var(riscv_zc_subext)
-
-Mask(ZCE)  Var(riscv_zc_subext)
-
-Mask(ZCF)  Var(riscv_zc_subext)
-
-Mask(ZCD)  Var(riscv_zc_subext)
-
-Mask(ZCMP) Var(riscv_zc_subext)
-
-Mask(ZCMT) Var(riscv_zc_subext)
-
-Mask(XCVBI) Var(riscv_xcv_subext)
-
-TargetVariable
-int riscv_sv_subext
-
-Mask(SVINVAL) Var(riscv_sv_subext)
-
-Mask(SVNAPOT) Var(riscv_sv_subext)
-
-Mask(SVVPTC) Var(riscv_sv_subext)
-
-TargetVariable
-int riscv_ztso_subext
-
-Mask(ZTSO) Var(riscv_ztso_subext)
-
-TargetVariable
-int riscv_xcv_subext
-
-Mask(XCVMAC) Var(riscv_xcv_subext)
-
-Mask(XCVALU) Var(riscv_xcv_subext)
-
-Mask(XCVELW) Var(riscv_xcv_subext)
-
-Mask(XCVSIMD) Var(riscv_xcv_subext)
-
-TargetVariable
-int riscv_xthead_subext
-
-Mask(XTHEADBA)      Var(riscv_xthead_subext)
-
-Mask(XTHEADBB)      Var(riscv_xthead_subext)
-
-Mask(XTHEADBS)      Var(riscv_xthead_subext)
-
-Mask(XTHEADCMO)     Var(riscv_xthead_subext)
-
-Mask(XTHEADCONDMOV) Var(riscv_xthead_subext)
-
-Mask(XTHEADFMEMIDX) Var(riscv_xthead_subext)
-
-Mask(XTHEADFMV)     Var(riscv_xthead_subext)
-
-Mask(XTHEADINT)     Var(riscv_xthead_subext)
-
-Mask(XTHEADMAC)     Var(riscv_xthead_subext)
-
-Mask(XTHEADMEMIDX)  Var(riscv_xthead_subext)
-
-Mask(XTHEADMEMPAIR) Var(riscv_xthead_subext)
-
-Mask(XTHEADSYNC)    Var(riscv_xthead_subext)
-
-Mask(XTHEADVECTOR)  Var(riscv_xthead_subext)
-
-TargetVariable
-int riscv_xventana_subext
-
-Mask(XVENTANACONDOPS) Var(riscv_xventana_subext)
-
-TargetVariable
-int riscv_sifive_subext
-
-Mask(XSFVCP) Var(riscv_sifive_subext)
-
-Mask(XSFCEASE) Var(riscv_sifive_subext)
-
-Mask(XSFVQMACCQOQ) Var(riscv_sifive_subext)
-
-Mask(XSFVQMACCDOD) Var(riscv_sifive_subext)
-
-Mask(XSFVFNRCLIPXFQF) Var(riscv_sifive_subext)
-
-TargetVariable
 int riscv_fmv_priority = 0
 
 Enum
@@ -579,6 +282,18 @@ Inline strlen calls if possible.
 Target RejectNegative Joined UInteger Var(riscv_strcmp_inline_limit) Init(64)
 Max number of bytes to compare as part of inlined strcmp/strncmp routines (default: 64).
 
+-param=gpr2vr-cost=
+Target RejectNegative Joined UInteger Var(gpr2vr_cost) Init(GPR2VR_COST_UNPROVIDED)
+Set the cost value of the rvv instruction when operate from GPR to VR.
+
+-param=fpr2vr-cost=
+Target RejectNegative Joined UInteger Var(fpr2vr_cost) Init(FPR2VR_COST_UNPROVIDED)
+Set the cost value of the rvv instruction when operate from FPR to VR.
+
+-param=riscv-autovec-mode=
+Target Undocumented RejectNegative Joined Var(riscv_autovec_mode) Save
+Set the only autovec mode to try.
+
 Enum
 Name(rvv_max_lmul) Type(enum rvv_max_lmul_enum)
 The RVV possible LMUL (-mrvv-max-lmul=):
diff --git a/gcc/config/riscv/sifive-7.md b/gcc/config/riscv/sifive-7.md
index c208541..a04b80b 100644
--- a/gcc/config/riscv/sifive-7.md
+++ b/gcc/config/riscv/sifive-7.md
@@ -1,4 +1,4 @@
-(define_automaton "sifive_7")
+(define_automaton "sifive_7,sifive_7_vec,sifive_7_vec_mem")
 
 ;; Sifive 7 Series Base Core
 ;; This has two pipelines, A (Address) and B (Branch).
@@ -11,6 +11,14 @@
 
 (define_cpu_unit "sifive_7_idiv" "sifive_7")
 (define_cpu_unit "sifive_7_fpu" "sifive_7")
+;; Vector command queue
+(define_cpu_unit "sifive_7_vcq" "sifive_7")
+;; Vector arithmetic sequencer
+(define_cpu_unit "sifive_7_va" "sifive_7_vec")
+;; Vector store sequencer
+(define_cpu_unit "sifive_7_vs" "sifive_7_vec_mem")
+;; Vector load sequencer
+(define_cpu_unit "sifive_7_vl" "sifive_7_vec_mem")
 
 (define_insn_reservation "sifive_7_load" 3
   (and (eq_attr "tune" "sifive_7")
@@ -60,9 +68,14 @@
 (define_insn_reservation "sifive_7_alu" 2
   (and (eq_attr "tune" "sifive_7")
        (eq_attr "type" "unknown,arith,shift,slt,multi,logical,move,bitmanip,\
-			rotate,min,max,minu,maxu,clz,ctz,atomic,condmove,mvpair,zicond"))
+			min,max,minu,maxu,atomic,condmove,mvpair,zicond"))
   "sifive_7_A|sifive_7_B")
 
+(define_insn_reservation "sifive_7_alu_b" 2
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "clz,ctz,rotate"))
+  "sifive_7_B")
+
 (define_insn_reservation "sifive_7_load_immediate" 1
   (and (eq_attr "tune" "sifive_7")
        (eq_attr "type" "nop,const,auipc"))
@@ -91,6 +104,12 @@
        (eq_attr "type" "fcvt,fcvt_i2f,fcvt_f2i,fcmp,fmove"))
   "sifive_7_B")
 
+(define_insn_reservation "sifive_7_fdiv_h" 14
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "fdiv,fsqrt")
+       (eq_attr "mode" "HF"))
+  "sifive_7_B,sifive_7_fpu*13")
+
 (define_insn_reservation "sifive_7_fdiv_s" 27
   (and (eq_attr "tune" "sifive_7")
        (eq_attr "type" "fdiv,fsqrt")
@@ -119,6 +138,21 @@
        (eq_attr "type" "cpop,clmul"))
   "sifive_7_A")
 
+(define_insn_reservation "sifive_7_csr" 5
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "rdfrm,wrfrm,wrvxrm"))
+  "sifive_7_A")
+
+(define_insn_reservation "sifive_7_crypto" 10
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "crypto"))
+  "sifive_7_A")
+
+(define_insn_reservation "sifive_7_unknown" 10
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "ghost"))
+  "sifive_7_A")
+
 (define_bypass 1 "sifive_7_load,sifive_7_alu,sifive_7_mul,sifive_7_f2i,sifive_7_sfb_alu"
   "sifive_7_alu,sifive_7_branch")
 
@@ -129,13 +163,140 @@
   "sifive_7_store" "riscv_store_data_bypass_p")
 
 (define_bypass 2 "sifive_7_i2f"
-  "sifive_7_sfma,sifive_7_dfma,sifive_7_fp_other,sifive_7_fdiv_s,sifive_7_fdiv_d")
+  "sifive_7_sfma,sifive_7_dfma,sifive_7_fp_other,sifive_7_fdiv_h,sifive_7_fdiv_s,sifive_7_fdiv_d,sifive_7_hfma")
 
 (define_bypass 2 "sifive_7_fp_other"
-  "sifive_7_sfma,sifive_7_dfma,sifive_7_fp_other,sifive_7_fdiv_s,sifive_7_fdiv_d")
+  "sifive_7_sfma,sifive_7_dfma,sifive_7_fp_other,sifive_7_fdiv_h,sifive_7_fdiv_s,sifive_7_fdiv_d,sifive_7_hfma")
 
 (define_bypass 2 "sifive_7_fp_other"
   "sifive_7_alu,sifive_7_branch")
 
 (define_bypass 2 "sifive_7_fp_other"
   "sifive_7_store" "riscv_store_data_bypass_p")
+
+;; Vector pipeline
+;; The latency is depend on LMUL, but we didn't model that yet since we don't
+;; want to expand the rule too much unless we prove model that could get
+;; meaningful performance difference.
+
+(define_insn_reservation "sifive_7_vsetvl" 2
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vsetvl_pre,vsetvl,rdvlenb,rdvl"))
+  "sifive_7_A")
+
+(define_insn_reservation "sifive_7_vec_load" 4
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vlde,vldm,vlds,vldux,vldox,vldff,vldr,
+                        vlsegde,vlsegds,vlsegdux,vlsegdox,vlsegdff"))
+  "sifive_7_vcq,sifive_7_vl*3")
+
+(define_insn_reservation "sifive_7_vec_store" 4
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vste,vstm,vsts,vstux,vstox,vstr,
+                        vssegte,vssegts,vssegtux,vssegtox"))
+  "sifive_7_vcq,sifive_7_vs*3")
+
+(define_insn_reservation "sifive_7_vec_ialu" 4
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vimovxv,vmov,vimovvx,vialu,vicalu,vext,
+                        vshift,viminmax,vimerge,vbrev,vrev8,
+                        vimov,vext,vbrev8,vclz,vctz,vcpop,vrol,vror,vandn"))
+  "sifive_7_vcq,sifive_7_va*3")
+
+(define_insn_reservation "sifive_7_vec_slow_ialu" 8
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vshift,vimul,vimuladd"))
+  "sifive_7_vcq,sifive_7_va*7")
+
+(define_insn_reservation "sifive_7_vec_cmp" 4
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vicmp"))
+  "sifive_7_vcq,sifive_7_va*3")
+
+(define_insn_reservation "sifive_7_vec_iwalu" 8
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "viwalu,viwmul,viwmuladd,vnshift,vwsll"))
+  "sifive_7_vcq,sifive_7_va*7")
+
+(define_insn_reservation "sifive_7_vec_div" 16
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vidiv,vfdiv"))
+  "sifive_7_vcq,sifive_7_va*15")
+
+(define_insn_reservation "sifive_7_vec_fixed_point" 8
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vsalu,vaalu,vsmul,vsshift"))
+  "sifive_7_vcq,sifive_7_va*7")
+
+(define_insn_reservation "sifive_7_vec_narrow_fixed_point" 8
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vnclip"))
+  "sifive_7_vcq,sifive_7_va*7")
+
+(define_insn_reservation "sifive_7_vec_fsimple" 4
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vfmovvf,vfmovfv,vfclass"))
+  "sifive_7_vcq,sifive_7_va*3")
+
+(define_insn_reservation "sifive_7_vec_falu" 8
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vfalu,vfmul,vfmuladd,vfrecp,
+                        vfcvtitof,vfcvtftoi,vfmerge,vfmov,vfsgnj"))
+  "sifive_7_vcq,sifive_7_va*7")
+
+(define_insn_reservation "sifive_7_vec_fcmp" 4
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vfcmp,vfminmax"))
+  "sifive_7_vcq,sifive_7_va*3")
+
+(define_insn_reservation "sifive_7_vec_fsqrt_fdiv" 16
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vfsqrt,vfdiv"))
+  "sifive_7_vcq,sifive_7_va*15")
+
+(define_insn_reservation "sifive_7_vec_fwalu" 8
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vfwalu,vfwmul,vfwmuladd,vfwmaccbf16,vfwcvtitof,
+                        vfwcvtftoi,vfwcvtftof,vfwcvtbf16,
+                        vfncvtitof,vfncvtftoi,vfncvtftof,vfncvtbf16,
+                        sf_vfnrclip,sf_vqmacc"))
+  "sifive_7_vcq,sifive_7_va*7")
+
+(define_insn_reservation "sifive_7_vec_red" 12
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vired,vfredu,vfredo,viwred,vfwredu,vfwredo"))
+  "sifive_7_vcq,sifive_7_va*11")
+
+(define_insn_reservation "sifive_7_vec_mask" 4
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vmalu,vmpop,vmffs,vmsfs"))
+  "sifive_7_vcq,sifive_7_va*3")
+
+(define_insn_reservation "sifive_7_vec_mask_special" 4
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vmiota,vmidx"))
+  "sifive_7_vcq,sifive_7_va*3")
+
+(define_insn_reservation "sifive_7_vec_gather" 8
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vgather"))
+  "sifive_7_vcq,sifive_7_va*7")
+
+(define_insn_reservation "sifive_7_vec_compress" 16
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vcompress"))
+  "sifive_7_vcq,sifive_7_va*15")
+
+(define_insn_reservation "sifive_7_vec_slide" 4
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vslideup,vslidedown,vislide1up,vislide1down,vfslide1up,vfslide1down"))
+  "sifive_7_vcq,sifive_7_va*3")
+
+;; Assume that's slow if it's unknown instruction vector type.
+(define_insn_reservation "sifive_7_vec_unknown" 16
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vector,vclmul,vclmulh,vghsh,vgmul,
+                        vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,
+                        vaesz,vsha2ms,vsha2ch,vsha2cl,
+                        vsm4k,vsm4r,vsm3me,vsm3c,sf_vc,sf_vc_se"))
+  "sifive_7_vcq,sifive_7_va*15")
diff --git a/gcc/config/riscv/sifive-p400.md b/gcc/config/riscv/sifive-p400.md
index ed8b8ec..0acdbda 100644
--- a/gcc/config/riscv/sifive-p400.md
+++ b/gcc/config/riscv/sifive-p400.md
@@ -153,10 +153,13 @@
        (eq_attr "type" "fmove,fcvt"))
   "p400_float_pipe,sifive_p400_fpu")
 
+;; We need something for HF so that we don't abort during
+;; scheduling if someone was to ask for p400 scheduling, but
+;; enable the various HF mode extensions.
 (define_insn_reservation "sifive_p400_fdiv_s" 18
   (and (eq_attr "tune" "sifive_p400")
        (eq_attr "type" "fdiv,fsqrt")
-       (eq_attr "mode" "SF"))
+       (eq_attr "mode" "HF,SF"))
   "sifive_p400_FM, sifive_p400_fdiv*5")
 
 (define_insn_reservation "sifive_p400_fdiv_d" 31
@@ -178,3 +181,18 @@
 (define_bypass 1 "sifive_p400_f2i"
   "sifive_p400_branch,sifive_p400_sfb_alu,sifive_p400_mul,
    sifive_p400_div,sifive_p400_alu,sifive_p400_cpop")
+
+
+;; Someone familiar with the p400 uarch needs to put
+;; these into the right reservations.  This is just a placeholder
+;; for everything I found that had no mapping to a reservation.
+;;
+;; Note that even if the processor does not implementat a particular
+;; instruction it should still have suitable reservations, even if
+;; they are just dummies like this one.
+(define_insn_reservation "sifive_p400_unknown" 1
+  (and (eq_attr "tune" "sifive_p400")
+       (eq_attr "type" "ghost,vfrecp,vclmul,vldm,vmffs,vclmulh,vlsegde,vfcvtitof,vsm4k,vfcvtftoi,vfdiv,vsm3c,vsm4r,viwmuladd,vfwredu,vcpop,vfwmuladd,vstux,vsshift,vfwcvtftof,vfncvtftof,vfwmaccbf16,vext,vssegte,rdvl,vaeskf1,vfslide1up,vmov,vimovvx,vaesef,vfsqrt,viminmax,vfwcvtftoi,vssegtox,vfclass,viwmul,vector,vgmul,vsm3me,vfcmp,vstm,vfredo,vfwmul,vaeskf2,vstox,vfncvtbf16,vislide1up,vgather,vldox,viwred,vctz,vghsh,vsts,vslidedown,vfmerge,vicmp,vsmul,vlsegdff,vfalu,vfmov,vislide1down,vfminmax,vcompress,vldr,vldff,vlsegdux,vimuladd,vsalu,vidiv,sf_vqmacc,vfslide1down,vaesem,vimerge,vfncvtftoi,vfwcvtitof,vicalu,vaesz,sf_vc_se,vsha2cl,vmsfs,vldux,vmidx,vslideup,vired,vlde,vfwredo,vfmovfv,vbrev,vfncvtitof,rdfrm,vsetvl,vssegts,vimul,vialu,vbrev8,vfwalu,rdvlenb,sf_vfnrclip,vclz,vnclip,sf_vc,vimov,vste,vfmuladd,vfmovvf,vwsll,vsetvl_pre,vlds,vlsegds,vmiota,vmalu,wrvxrm,wrfrm,viwalu,vaesdm,vssegtux,vaesdf,vimovxv,vror,vnshift,vstr,vaalu,vsha2ms,crypto,vfwcvtbf16,vlsegdox,vrol,vandn,vfsgnj,vmpop,vfredu,vsha2ch,vshift,vrev8,vfmul"))
+  "p400_int_pipe+sifive_p400_ialu")
+
+
diff --git a/gcc/config/riscv/sifive-p600.md b/gcc/config/riscv/sifive-p600.md
index 2401349..ccd006d 100644
--- a/gcc/config/riscv/sifive-p600.md
+++ b/gcc/config/riscv/sifive-p600.md
@@ -157,10 +157,13 @@
        (eq_attr "type" "fmove,fcvt"))
   "float_pipe,sifive_p600_fpu")
 
+;; We need something for HF so that we don't abort during
+;; scheduling if someone was to ask for p600 scheduling, but
+;; enable the various HF mode extensions.
 (define_insn_reservation "sifive_p600_fdiv_s" 11
   (and (eq_attr "tune" "sifive_p600")
        (eq_attr "type" "fdiv,fsqrt")
-       (eq_attr "mode" "SF"))
+       (eq_attr "mode" "HF,SF"))
   "sifive_p600_FM, sifive_p600_fdiv*5")
 
 (define_insn_reservation "sifive_p600_fdiv_d" 19
@@ -182,3 +185,15 @@
 (define_bypass 1 "sifive_p600_f2i"
   "sifive_p600_branch,sifive_p600_sfb_alu,sifive_p600_mul,
    sifive_p600_div,sifive_p600_alu,sifive_p600_cpop")
+
+;; Someone familiar with the p600 uarch needs to put
+;; these into the right reservations.  This is just a placeholder
+;; for everything I found that had no mapping to a reservation.
+;;
+;; Note that even if the processor does not implementat a particular
+;; instruction it should still have suitable reservations, even if
+;; they are just dummies like this one.
+(define_insn_reservation "sifive_p600_unknown" 1
+  (and (eq_attr "tune" "sifive_p600")
+       (eq_attr "type" "vicmp,vssegte,vbrev8,vfwalu,vimov,vmpop,vaesdf,vislide1up,vror,vsha2cl,vrol,vslideup,vimuladd,vclmul,vaesef,vext,vlsegdff,vfmuladd,vfclass,vmsfs,vfcmp,vsmul,vsm3me,vmalu,vshift,viwmuladd,vfslide1up,vlsegde,vsm4k,wrvxrm,vislide1down,vsm3c,vfwmuladd,vaesdm,vclmulh,vfwcvtftof,vfwredu,vfredo,sf_vfnrclip,vaesz,vwsll,vmiota,vctz,vsetvl_pre,vstm,vidiv,vssegtux,vfwmul,vcompress,vste,vired,vlsegds,vaesem,vfminmax,ghost,vandn,crypto,vfmul,vialu,vfmovvf,rdfrm,vldff,vfmerge,vsshift,vnclip,sf_vqmacc,vnshift,vfdiv,vfslide1down,vfncvtitof,vfsqrt,vimovxv,vstr,vfwcvtbf16,vfwcvtitof,vbrev,vssegtox,vssegts,vcpop,vmffs,viwmul,vldr,vmidx,rdvlenb,vfalu,vslidedown,vlde,vfsgnj,vfmov,viwalu,vsha2ch,vfncvtbf16,vfcvtitof,rdvl,vsetvl,vsha2ms,vector,vstux,vimerge,vclz,sf_vc,vfcvtftoi,viminmax,vsm4r,sf_vc_se,wrfrm,vstox,vfmovfv,vfncvtftoi,vimul,vsalu,vmov,vgmul,vgather,vldux,vlsegdox,vfncvtftof,vimovvx,vghsh,vldm,vldox,vfwcvtftoi,vlds,vfrecp,vaeskf2,vsts,vfredu,vicalu,vaalu,vfwmaccbf16,vrev8,vfwredo,vlsegdux,viwred,vaeskf1"))
+  "int_pipe+sifive_p600_ialu")
diff --git a/gcc/config/riscv/sifive-vector-builtins-bases.cc b/gcc/config/riscv/sifive-vector-builtins-bases.cc
index 85e1b6f..be530ca 100644
--- a/gcc/config/riscv/sifive-vector-builtins-bases.cc
+++ b/gcc/config/riscv/sifive-vector-builtins-bases.cc
@@ -195,12 +195,89 @@ public:
   }
 };
 
+/* Implements SiFive sf.vc.  */
+class sf_vc : public function_base
+{
+public:
+
+  unsigned int call_properties (const function_instance &) const override
+  {
+    return CP_USE_COPROCESSORS;
+  }
+
+  rtx expand (function_expander &e) const override
+  {
+    switch (e.op_info->op)
+      {
+      case OP_TYPE_x:
+	return e.use_exact_insn (code_for_sf_vc_x_se (e.vector_mode ()));
+      case OP_TYPE_i:
+	return e.use_exact_insn (code_for_sf_vc_i_se (e.vector_mode ()));
+      case OP_TYPE_vv:
+	return e.use_exact_insn (code_for_sf_vc_vv_se (e.vector_mode ()));
+      case OP_TYPE_xv:
+	return e.use_exact_insn (code_for_sf_vc_xv_se (e.vector_mode ()));
+      case OP_TYPE_iv:
+	return e.use_exact_insn (code_for_sf_vc_iv_se (e.vector_mode ()));
+      case OP_TYPE_fv:
+	return e.use_exact_insn (code_for_sf_vc_fv_se (e.vector_mode ()));
+      case OP_TYPE_v_x:
+	return e.use_exact_insn (code_for_sf_vc_v_x_se (e.vector_mode ()));
+      case OP_TYPE_v_i:
+	return e.use_exact_insn (code_for_sf_vc_v_i_se (e.vector_mode ()));
+      case OP_TYPE_v_vv:
+	return e.use_exact_insn (code_for_sf_vc_v_vv_se (e.vector_mode ()));
+      case OP_TYPE_v_xv:
+	return e.use_exact_insn (code_for_sf_vc_v_xv_se (e.vector_mode ()));
+      case OP_TYPE_v_iv:
+	return e.use_exact_insn (code_for_sf_vc_v_iv_se (e.vector_mode ()));
+      case OP_TYPE_v_fv:
+	return e.use_exact_insn (code_for_sf_vc_v_fv_se (e.vector_mode ()));
+      case OP_TYPE_vvv:
+	return e.use_exact_insn (code_for_sf_vc_vvv_se (e.vector_mode ()));
+      case OP_TYPE_xvv:
+	return e.use_exact_insn (code_for_sf_vc_xvv_se (e.vector_mode ()));
+      case OP_TYPE_ivv:
+	return e.use_exact_insn (code_for_sf_vc_ivv_se (e.vector_mode ()));
+      case OP_TYPE_fvv:
+	return e.use_exact_insn (code_for_sf_vc_fvv_se (e.vector_mode ()));
+      case OP_TYPE_vvw:
+	return e.use_exact_insn (code_for_sf_vc_vvw_se (e.vector_mode ()));
+      case OP_TYPE_xvw:
+	return e.use_exact_insn (code_for_sf_vc_xvw_se (e.vector_mode ()));
+      case OP_TYPE_ivw:
+	return e.use_exact_insn (code_for_sf_vc_ivw_se (e.vector_mode ()));
+      case OP_TYPE_fvw:
+	return e.use_exact_insn (code_for_sf_vc_fvw_se (e.vector_mode ()));
+      case OP_TYPE_v_vvv:
+	return e.use_exact_insn (code_for_sf_vc_v_vvv_se (e.vector_mode ()));
+      case OP_TYPE_v_xvv:
+	return e.use_exact_insn (code_for_sf_vc_v_xvv_se (e.vector_mode ()));
+      case OP_TYPE_v_ivv:
+	return e.use_exact_insn (code_for_sf_vc_v_ivv_se (e.vector_mode ()));
+      case OP_TYPE_v_fvv:
+	return e.use_exact_insn (code_for_sf_vc_v_fvv_se (e.vector_mode ()));
+      case OP_TYPE_v_vvw:
+	return e.use_exact_insn (code_for_sf_vc_v_vvw_se (e.vector_mode ()));
+      case OP_TYPE_v_xvw:
+	return e.use_exact_insn (code_for_sf_vc_v_xvw_se (e.vector_mode ()));
+      case OP_TYPE_v_ivw:
+	return e.use_exact_insn (code_for_sf_vc_v_ivw_se (e.vector_mode ()));
+      case OP_TYPE_v_fvw:
+	return e.use_exact_insn (code_for_sf_vc_v_fvw_se (e.vector_mode ()));
+      default:
+	gcc_unreachable ();
+      }
+  }
+};
+
 static CONSTEXPR const sf_vqmacc sf_vqmacc_obj;
 static CONSTEXPR const sf_vqmaccu sf_vqmaccu_obj;
 static CONSTEXPR const sf_vqmaccsu sf_vqmaccsu_obj;
 static CONSTEXPR const sf_vqmaccus sf_vqmaccus_obj;
 static CONSTEXPR const sf_vfnrclip_x_f_qf<UNSPEC_SF_VFNRCLIP> sf_vfnrclip_x_f_qf_obj;
 static CONSTEXPR const sf_vfnrclip_xu_f_qf<UNSPEC_SF_VFNRCLIPU> sf_vfnrclip_xu_f_qf_obj;
+static CONSTEXPR const sf_vc sf_vc_obj;
 
 /* Declare the function base NAME, pointing it to an instance
    of class <NAME>_obj.  */
@@ -213,4 +290,5 @@ BASE (sf_vqmaccsu)
 BASE (sf_vqmaccus)
 BASE (sf_vfnrclip_x_f_qf)
 BASE (sf_vfnrclip_xu_f_qf)
+BASE (sf_vc)
 } // end namespace riscv_vector
diff --git a/gcc/config/riscv/sifive-vector-builtins-bases.h b/gcc/config/riscv/sifive-vector-builtins-bases.h
index 69e5540..4ec1e30 100644
--- a/gcc/config/riscv/sifive-vector-builtins-bases.h
+++ b/gcc/config/riscv/sifive-vector-builtins-bases.h
@@ -23,6 +23,8 @@
 
 namespace riscv_vector {
 
+static const unsigned int CP_USE_COPROCESSORS = 1U << 6;
+
 namespace bases {
 extern const function_base *const sf_vqmacc;
 extern const function_base *const sf_vqmaccu;
@@ -30,6 +32,7 @@ extern const function_base *const sf_vqmaccsu;
 extern const function_base *const sf_vqmaccus;
 extern const function_base *const sf_vfnrclip_x_f_qf;
 extern const function_base *const sf_vfnrclip_xu_f_qf;
+extern const function_base *const sf_vc;
 }
 
 } // end namespace riscv_vector
diff --git a/gcc/config/riscv/sifive-vector-builtins-functions.def b/gcc/config/riscv/sifive-vector-builtins-functions.def
index e6621c7..f6703ae 100644
--- a/gcc/config/riscv/sifive-vector-builtins-functions.def
+++ b/gcc/config/riscv/sifive-vector-builtins-functions.def
@@ -55,4 +55,49 @@ DEF_RVV_FUNCTION (sf_vfnrclip_x_f_qf, sf_vfnrclip, full_preds, i_clip_qf_ops)
 DEF_RVV_FUNCTION (sf_vfnrclip_xu_f_qf, sf_vfnrclip, full_preds, u_clip_qf_ops)
 #undef REQUIRED_EXTENSIONS
 
+#define REQUIRED_EXTENSIONS XSFVCP_EXT
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_x_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_i_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_vv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_xv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_iv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_fv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_vvv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_xvv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_ivv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_fvv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_vvw_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_xvw_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_ivw_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_fvw_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_x_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_i_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_vv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_xv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_iv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_fv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_vvv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_xvv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_ivv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_fvv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_vvw_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_xvw_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_ivw_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_fvw_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_x_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_i_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_vv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_xv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_iv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_fv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_vvv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_xvv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_ivv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_fvv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_vvw_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_xvw_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_ivw_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_fvw_ops)
+#undef REQUIRED_EXTENSIONS
+
 #undef DEF_RVV_FUNCTION
diff --git a/gcc/config/riscv/sifive-vector.md b/gcc/config/riscv/sifive-vector.md
index 2975b1e..a416634 100644
--- a/gcc/config/riscv/sifive-vector.md
+++ b/gcc/config/riscv/sifive-vector.md
@@ -182,3 +182,874 @@
   "sf.vfnrclip.x<v_su>.f.qf\t%0,%3,%4%p1"
   [(set_attr "type" "sf_vfnrclip")
    (set_attr "mode" "<MODE>")])
+
+;; SF_VCP
+(define_insn "@sf_vc_x_se<mode>"
+  [(unspec:<VM>
+	    [(match_operand:<VM> 0 "vector_mask_operand"             "  Wc1")
+	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 6 "const_int_operand"                    "    i")
+	     (match_operand 7 "const_int_operand"                    "    i")
+	     (match_operand 8 "const_int_operand"                    "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:VFULLI
+	    [(match_operand:SI 1 "const_int_operand" "Ou02")
+	     (match_operand:SI 2 "const_int_operand" "K")
+	     (match_operand:SI 3 "const_int_operand" "K")
+	     (match_operand:<VEL> 4 "register_operand" "r")] UNSPECV_SF_CV)]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.x\t%1,%2,%3,%4"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_x_se<mode>"
+  [(set (match_operand:VFULLI 0 "register_operand" "=vr,vr")
+	(if_then_else:VFULLI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 6 "vector_length_operand"                "   rK, rK")
+	     (match_operand 7 "const_int_operand"                    "    i,  i")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:VFULLI
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:SI 4 "const_int_operand" "K,K")
+	     (match_operand:<VEL> 5 "register_operand" "r,r")] UNSPECV_SF_CV)
+       (match_operand:VFULLI 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.x\t%3,%4,%0,%5"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_x<mode>"
+  [(set (match_operand:VFULLI 0 "register_operand" "=vr,vr")
+	(if_then_else:VFULLI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 6 "vector_length_operand"                "   rK, rK")
+	     (match_operand 7 "const_int_operand"                    "    i,  i")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec:VFULLI
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:SI 4 "const_int_operand" "K,K")
+	     (match_operand:<VEL> 5 "register_operand" "r,r")] UNSPEC_SF_CV)
+       (match_operand:VFULLI 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.x\t%3,%4,%0,%5"
+  [(set_attr "type" "sf_vc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_i_se<mode>"
+  [(unspec:<VM>
+	    [(match_operand:<VM> 0 "vector_mask_operand"             "  Wc1")
+	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 6 "const_int_operand"                    "    i")
+	     (match_operand 7 "const_int_operand"                    "    i")
+	     (match_operand 8 "const_int_operand"                    "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:VFULLI
+	    [(match_operand:SI 1 "const_int_operand" "Ou02")
+	     (match_operand:SI 2 "const_int_operand" "K")
+	     (match_operand:SI 3 "const_int_operand" "K")
+	     (match_operand:SI 4 "const_int_operand" "P")] UNSPECV_SF_CV)]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.i\t%1,%2,%3,%4"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_i_se<mode>"
+  [(set (match_operand:VFULLI 0 "register_operand" "=vr,vr")
+	(if_then_else:VFULLI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 6 "vector_length_operand"                "   rK, rK")
+	     (match_operand 7 "const_int_operand"                    "    i,  i")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:VFULLI
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:SI 4 "const_int_operand" "K,K")
+	     (match_operand:SI 5 "const_int_operand" "P,P")] UNSPECV_SF_CV)
+       (match_operand:VFULLI 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.i\t%3,%4,%0,%5"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_i<mode>"
+  [(set (match_operand:VFULLI 0 "register_operand" "=vr,vr")
+	(if_then_else:VFULLI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 6 "vector_length_operand"                "   rK, rK")
+	     (match_operand 7 "const_int_operand"                    "    i,  i")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec:VFULLI
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:SI 4 "const_int_operand" "K,K")
+	     (match_operand:SI 5 "const_int_operand" "P,P")] UNSPEC_SF_CV)
+       (match_operand:VFULLI 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.i\t%3,%4,%0,%5"
+  [(set_attr "type" "sf_vc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_vv_se<mode>"
+  [(unspec:<VM>
+	    [(match_operand:<VM> 0 "vector_mask_operand"             "  Wc1")
+	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 6 "const_int_operand"                    "    i")
+	     (match_operand 7 "const_int_operand"                    "    i")
+	     (match_operand 8 "const_int_operand"                    "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:VFULLI
+	    [(match_operand:SI 1 "const_int_operand" "Ou02")
+	     (match_operand:SI 2 "const_int_operand" "K")
+	     (match_operand:VFULLI 3 "register_operand" "vr")
+	     (match_operand:VFULLI 4 "register_operand" "vr")] UNSPECV_SF_CV)]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.vv\t%1,%2,%3,%4"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_vv_se<mode>"
+  [(set (match_operand:VFULLI 0 "register_operand" "=&vr,vr")
+	(if_then_else:VFULLI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 6 "vector_length_operand"                "   rK, rK")
+	     (match_operand 7 "const_int_operand"                    "    i,  i")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:VFULLI
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:VFULLI 4 "register_operand" "vr,vr")
+	     (match_operand:VFULLI 5 "register_operand" "vr,vr")] UNSPECV_SF_CV)
+       (match_operand:VFULLI 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.vv\t%3,%0,%4,%5"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_vv<mode>"
+  [(set (match_operand:VFULLI 0 "register_operand" "=&vr,vr")
+	(if_then_else:VFULLI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 6 "vector_length_operand"                "   rK, rK")
+	     (match_operand 7 "const_int_operand"                    "    i,  i")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec:VFULLI
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:VFULLI 4 "register_operand" "vr,vr")
+	     (match_operand:VFULLI 5 "register_operand" "vr,vr")] UNSPEC_SF_CV)
+       (match_operand:VFULLI 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.vv\t%3,%0,%4,%5"
+  [(set_attr "type" "sf_vc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_xv_se<mode>"
+  [(unspec:<VM>
+	    [(match_operand:<VM> 0 "vector_mask_operand"             "  Wc1")
+	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 6 "const_int_operand"                    "    i")
+	     (match_operand 7 "const_int_operand"                    "    i")
+	     (match_operand 8 "const_int_operand"                    "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:VFULLI
+	    [(match_operand:SI 1 "const_int_operand" "Ou02")
+	     (match_operand:SI 2 "const_int_operand" "K")
+	     (match_operand:VFULLI 3 "register_operand" "vr")
+	     (match_operand:<VEL> 4 "register_operand" "r")] UNSPECV_SF_CV)]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.xv\t%1,%2,%3,%4"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_xv_se<mode>"
+  [(set (match_operand:VFULLI 0 "register_operand" "=&vd,vd")
+	(if_then_else:VFULLI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 6 "vector_length_operand"                "   rK, rK")
+	     (match_operand 7 "const_int_operand"                    "    i,  i")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:VFULLI
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:VFULLI 4 "register_operand" "vr,vr")
+	     (match_operand:<VEL> 5 "register_operand" "r,r")] UNSPECV_SF_CV)
+       (match_operand:VFULLI 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.xv\t%3,%0,%4,%5"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_xv<mode>"
+  [(set (match_operand:VFULLI 0 "register_operand" "=&vd,vd")
+	(if_then_else:VFULLI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 6 "vector_length_operand"                "   rK, rK")
+	     (match_operand 7 "const_int_operand"                    "    i,  i")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec:VFULLI
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:VFULLI 4 "register_operand" "vr,vr")
+	     (match_operand:<VEL> 5 "register_operand" "r,r")] UNSPEC_SF_CV)
+       (match_operand:VFULLI 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.xv\t%3,%0,%4,%5"
+  [(set_attr "type" "sf_vc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_iv_se<mode>"
+  [(unspec:<VM>
+	    [(match_operand:<VM> 0 "vector_mask_operand"             "  Wc1")
+	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 6 "const_int_operand"                    "    i")
+	     (match_operand 7 "const_int_operand"                    "    i")
+	     (match_operand 8 "const_int_operand"                    "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:VFULLI
+	    [(match_operand:SI 1 "const_int_operand" "Ou02")
+	     (match_operand:SI 2 "const_int_operand" "K")
+	     (match_operand:VFULLI 3 "register_operand" "vr")
+	     (match_operand:SI 4 "const_int_operand" "P")] UNSPECV_SF_CV)]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.iv\t%1,%2,%3,%4"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_iv_se<mode>"
+  [(set (match_operand:VFULLI 0 "register_operand" "=&vd,vd")
+	(if_then_else:VFULLI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 6 "vector_length_operand"                "   rK, rK")
+	     (match_operand 7 "const_int_operand"                    "    i,  i")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:VFULLI
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:VFULLI 4 "register_operand" "vr,vr")
+	     (match_operand:SI 5 "const_int_operand" "P,P")] UNSPECV_SF_CV)
+       (match_operand:VFULLI 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.iv\t%3,%0,%4,%5"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_iv<mode>"
+  [(set (match_operand:VFULLI 0 "register_operand" "=&vd,vd")
+	(if_then_else:VFULLI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 6 "vector_length_operand"                "   rK, rK")
+	     (match_operand 7 "const_int_operand"                    "    i,  i")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec:VFULLI
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:VFULLI 4 "register_operand" "vr,vr")
+	     (match_operand:SI 5 "const_int_operand" "P,P")] UNSPEC_SF_CV)
+       (match_operand:VFULLI 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.iv\t%3,%0,%4,%5"
+  [(set_attr "type" "sf_vc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_fv_se<mode>"
+  [(unspec:<VM>
+	    [(match_operand:<VM> 0 "vector_mask_operand"             "  Wc1")
+	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 6 "const_int_operand"                    "    i")
+	     (match_operand 7 "const_int_operand"                    "    i")
+	     (match_operand 8 "const_int_operand"                    "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:SF_FV
+	    [(match_operand:SI 1 "const_int_operand" "Ou01")
+	     (match_operand:SI 2 "const_int_operand" "K")
+	     (match_operand:SF_FV 3 "register_operand" "vr")
+	     (match_operand:<SF_XF> 4 "register_operand" "f")] UNSPECV_SF_CV)]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.fv\t%1,%2,%3,%4"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_fv_se<mode>"
+  [(set (match_operand:SF_FV 0 "register_operand" "=&vd,vd")
+	(if_then_else:SF_FV
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 6 "vector_length_operand"                "   rK, rK")
+	     (match_operand 7 "const_int_operand"                    "    i,  i")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:SF_FV
+	    [(match_operand:SI 3 "const_int_operand" "Ou01,Ou01")
+	     (match_operand:SF_FV 4 "register_operand" "vr,vr")
+	     (match_operand:<SF_XF> 5 "register_operand" "f,f")] UNSPECV_SF_CV)
+       (match_operand:SF_FV 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.fv\t%3,%0,%4,%5"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_fv<mode>"
+  [(set (match_operand:SF_FV 0 "register_operand" "=&vd,vd")
+	(if_then_else:SF_FV
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 6 "vector_length_operand"                "   rK, rK")
+	     (match_operand 7 "const_int_operand"                    "    i,  i")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec:SF_FV
+	    [(match_operand:SI 3 "const_int_operand" "Ou01,Ou01")
+	     (match_operand:SF_FV 4 "register_operand" "vr,vr")
+	     (match_operand:<SF_XF> 5 "register_operand" "f,f")] UNSPEC_SF_CV)
+       (match_operand:SF_FV 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.fv\t%3,%0,%4,%5"
+  [(set_attr "type" "sf_vc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_vvv_se<mode>"
+  [(unspec:<VM>
+	    [(match_operand:<VM> 0 "vector_mask_operand"             "vmWc1")
+	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 6 "const_int_operand"                    "    i")
+	     (match_operand 7 "const_int_operand"                    "    i")
+	     (match_operand 8 "const_int_operand"                    "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:VFULLI
+	    [(match_operand:SI 1 "const_int_operand" "Ou02")
+	     (match_operand:VFULLI 2 "register_operand" "vd")
+	     (match_operand:VFULLI 3 "register_operand" "vr")
+	     (match_operand:VFULLI 4 "register_operand" "vr")] UNSPECV_SF_CV)]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.vvv\t%1,%2,%3,%4"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_vvv_se<mode>"
+  [(set (match_operand:VFULLI 0 "register_operand" "=&vr,vr")
+	(if_then_else:VFULLI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 7 "vector_length_operand"                "   rK, rK")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (match_operand 10 "const_int_operand"                   "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:VFULLI
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:VFULLI 4 "register_operand" "vd,vd")
+	     (match_operand:VFULLI 5 "register_operand" "vr,vr")
+		 (match_operand:VFULLI 6 "register_operand" "vr,vr")] UNSPECV_SF_CV)
+       (match_operand:VFULLI 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.vvv\t%3,%4,%6,%5"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_vvv<mode>"
+  [(set (match_operand:VFULLI 0 "register_operand" "=&vr,vr")
+	(if_then_else:VFULLI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 7 "vector_length_operand"                "   rK, rK")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (match_operand 10 "const_int_operand"                   "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec:VFULLI
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:VFULLI 4 "register_operand" "vd,vd")
+	     (match_operand:VFULLI 5 "register_operand" "vr,vr")
+		 (match_operand:VFULLI 6 "register_operand" "vr,vr")] UNSPEC_SF_CV)
+       (match_operand:VFULLI 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.vvv\t%3,%4,%6,%5"
+  [(set_attr "type" "sf_vc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_xvv_se<mode>"
+  [(unspec:<VM>
+	    [(match_operand:<VM> 0 "vector_mask_operand"             "  Wc1")
+	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 6 "const_int_operand"                    "    i")
+	     (match_operand 7 "const_int_operand"                    "    i")
+	     (match_operand 8 "const_int_operand"                    "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:VFULLI
+	    [(match_operand:SI 1 "const_int_operand" "Ou02")
+	     (match_operand:VFULLI 2 "register_operand" "vd")
+	     (match_operand:VFULLI 3 "register_operand" "vr")
+		 (match_operand:<VEL> 4 "register_operand" "r")] UNSPECV_SF_CV)]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.xvv\t%1,%2,%3,%4"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_xvv_se<mode>"
+  [(set (match_operand:VFULLI 0 "register_operand" "=&vr,vr")
+	(if_then_else:VFULLI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 7 "vector_length_operand"                "   rK, rK")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (match_operand 10 "const_int_operand"                   "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:VFULLI
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:VFULLI 4 "register_operand" "vd,vd")
+	     (match_operand:VFULLI 5 "register_operand" "vr,vr")
+	     (match_operand:<VEL> 6 "register_operand" "r,r")] UNSPECV_SF_CV)
+       (match_operand:VFULLI 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.xvv\t%3,%4,%5,%6"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_xvv<mode>"
+  [(set (match_operand:VFULLI 0 "register_operand" "=&vr,vr")
+	(if_then_else:VFULLI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 7 "vector_length_operand"                "   rK, rK")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (match_operand 10 "const_int_operand"                   "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec:VFULLI
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:VFULLI 4 "register_operand" "vd,vd")
+	     (match_operand:VFULLI 5 "register_operand" "vr,vr")
+	     (match_operand:<VEL> 6 "register_operand" "r,r")] UNSPEC_SF_CV)
+       (match_operand:VFULLI 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.xvv\t%3,%4,%5,%6"
+  [(set_attr "type" "sf_vc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_ivv_se<mode>"
+  [(unspec:<VM>
+	    [(match_operand:<VM> 0 "vector_mask_operand"             "  Wc1")
+	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 6 "const_int_operand"                    "    i")
+	     (match_operand 7 "const_int_operand"                    "    i")
+	     (match_operand 8 "const_int_operand"                    "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:VFULLI
+	    [(match_operand:SI 1 "const_int_operand" "Ou02")
+	     (match_operand:VFULLI 2 "register_operand" "vd")
+	     (match_operand:VFULLI 3 "register_operand" "vr")
+	     (match_operand:SI 4 "const_int_operand" "P")] UNSPECV_SF_CV)]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.ivv\t%1,%2,%3,%4"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_ivv_se<mode>"
+  [(set (match_operand:VFULLI 0 "register_operand" "=&vr,vr")
+	(if_then_else:VFULLI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 7 "vector_length_operand"                "   rK, rK")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (match_operand 10 "const_int_operand"                   "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:VFULLI
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:VFULLI 4 "register_operand" "vd,vd")
+	     (match_operand:VFULLI 5 "register_operand" "vr,vr")
+	     (match_operand:SI 6 "const_int_operand" "P,P")] UNSPECV_SF_CV)
+       (match_operand:VFULLI 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.ivv\t%3,%4,%5,%6"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_ivv<mode>"
+  [(set (match_operand:VFULLI 0 "register_operand" "=&vr,vr")
+	(if_then_else:VFULLI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 7 "vector_length_operand"                "   rK, rK")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (match_operand 10 "const_int_operand"                   "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec:VFULLI
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:VFULLI 4 "register_operand" "vd,vd")
+	     (match_operand:VFULLI 5 "register_operand" "vr,vr")
+	     (match_operand:SI 6 "const_int_operand" "P,P")] UNSPEC_SF_CV)
+       (match_operand:VFULLI 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.ivv\t%3,%4,%5,%6"
+  [(set_attr "type" "sf_vc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_fvv_se<mode>"
+  [(unspec:<VM>
+	    [(match_operand:<VM> 0 "vector_mask_operand"             "  Wc1")
+	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 6 "const_int_operand"                    "    i")
+	     (match_operand 7 "const_int_operand"                    "    i")
+	     (match_operand 8 "const_int_operand"                    "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:SF_FV
+	    [(match_operand:SI 1 "const_int_operand" "Ou01")
+	     (match_operand:SF_FV 2 "register_operand" "vd")
+	     (match_operand:SF_FV 3 "register_operand" "vr")
+	     (match_operand:<SF_XF> 4 "register_operand" "f")] UNSPECV_SF_CV)]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.fvv\t%1,%2,%3,%4"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_fvv_se<mode>"
+  [(set (match_operand:SF_FV 0 "register_operand" "=&vr,vr")
+	(if_then_else:SF_FV
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 7 "vector_length_operand"                "   rK, rK")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (match_operand 10 "const_int_operand"                    "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:SF_FV
+	    [(match_operand:SI 3 "const_int_operand" "Ou01,Ou01")
+	     (match_operand:SF_FV 4 "register_operand" "vd,vd")
+	     (match_operand:SF_FV 5 "register_operand" "vr,vr")
+	     (match_operand:<SF_XF> 6 "register_operand" "f,f")] UNSPECV_SF_CV)
+       (match_operand:SF_FV 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.fvv\t%3,%4,%5,%6"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_fvv<mode>"
+  [(set (match_operand:SF_FV 0 "register_operand" "=&vr,vr")
+	(if_then_else:SF_FV
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 7 "vector_length_operand"                "   rK, rK")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (match_operand 10 "const_int_operand"                    "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec:SF_FV
+	    [(match_operand:SI 3 "const_int_operand" "Ou01,Ou01")
+	     (match_operand:SF_FV 4 "register_operand" "vd,vd")
+	     (match_operand:SF_FV 5 "register_operand" "vr,vr")
+	     (match_operand:<SF_XF> 6 "register_operand" "f,f")] UNSPEC_SF_CV)
+       (match_operand:SF_FV 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.fvv\t%3,%4,%5,%6"
+  [(set_attr "type" "sf_vc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_vvw_se<mode>"
+  [(unspec:<VM>
+	    [(match_operand:<VM> 0 "vector_mask_operand"             "  Wc1")
+	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 6 "const_int_operand"                    "    i")
+	     (match_operand 7 "const_int_operand"                    "    i")
+	     (match_operand 8 "const_int_operand"                    "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:<SF_VW>
+	    [(match_operand:SI 1 "const_int_operand" "Ou02")
+	     (match_operand:<SF_VW> 2 "register_operand" "vd")
+	     (match_operand:SF_VC_W 3 "register_operand" "vr")
+	     (match_operand:SF_VC_W 4 "register_operand" "vr")] UNSPECV_SF_CV)]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.vvw\t%1,%2,%3,%4"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_vvw_se<mode>"
+  [(set (match_operand:<SF_VW> 0 "register_operand" "=&vr,vr")
+	(if_then_else:<SF_VW>
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 7 "vector_length_operand"                "   rK, rK")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (match_operand 10 "const_int_operand"                   "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:<SF_VW>
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:<SF_VW> 4 "register_operand" "vd,vd")
+	     (match_operand:SF_VC_W 5 "register_operand" "vr,vr")
+	     (match_operand:SF_VC_W 6 "register_operand" "vr,vr")] UNSPECV_SF_CV)
+       (match_operand:<SF_VW> 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.vvw\t%3,%4,%5,%6"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_vvw<mode>"
+  [(set (match_operand:<SF_VW> 0 "register_operand" "=&vr,vr")
+	(if_then_else:<SF_VW>
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 7 "vector_length_operand"                "   rK, rK")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (match_operand 10 "const_int_operand"                   "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec:<SF_VW>
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:<SF_VW> 4 "register_operand" "vd,vd")
+	     (match_operand:SF_VC_W 5 "register_operand" "vr,vr")
+	     (match_operand:SF_VC_W 6 "register_operand" "vr,vr")] UNSPEC_SF_CV)
+       (match_operand:<SF_VW> 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.vvw\t%3,%4,%5,%6"
+  [(set_attr "type" "sf_vc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_xvw_se<mode>"
+  [(unspec:<VM>
+	    [(match_operand:<VM> 0 "vector_mask_operand"             "  Wc1")
+	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 6 "const_int_operand"                    "    i")
+	     (match_operand 7 "const_int_operand"                    "    i")
+	     (match_operand 8 "const_int_operand"                    "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:<SF_VW>
+	    [(match_operand:SI 1 "const_int_operand" "Ou02")
+	     (match_operand:<SF_VW> 2 "register_operand" "vd")
+	     (match_operand:SF_VC_W 3 "register_operand" "vr")
+	     (match_operand:<VEL> 4 "register_operand" "r")] UNSPECV_SF_CV)]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.xvw\t%1,%2,%3,%4"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_xvw_se<mode>"
+  [(set (match_operand:<SF_VW> 0 "register_operand" "=&vr,vr")
+	(if_then_else:<SF_VW>
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 7 "vector_length_operand"                "   rK, rK")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (match_operand 10 "const_int_operand"                   "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:<SF_VW>
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:<SF_VW> 4 "register_operand" "vd,vd")
+	     (match_operand:SF_VC_W 5 "register_operand" "vr,vr")
+	     (match_operand:<VEL> 6 "register_operand" "r,r")] UNSPECV_SF_CV)
+       (match_operand:<SF_VW> 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.xvw\t%3,%4,%5,%6"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_xvw<mode>"
+  [(set (match_operand:<SF_VW> 0 "register_operand" "=&vr,vr")
+	(if_then_else:<SF_VW>
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 7 "vector_length_operand"                "   rK, rK")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (match_operand 10 "const_int_operand"                   "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec:<SF_VW>
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:<SF_VW> 4 "register_operand" "vd,vd")
+	     (match_operand:SF_VC_W 5 "register_operand" "vr,vr")
+	     (match_operand:<VEL> 6 "register_operand" "r,r")] UNSPEC_SF_CV)
+       (match_operand:<SF_VW> 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.xvw\t%3,%4,%5,%6"
+  [(set_attr "type" "sf_vc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_ivw_se<mode>"
+  [(unspec:<VM>
+	    [(match_operand:<VM> 0 "vector_mask_operand"             "  Wc1")
+	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 6 "const_int_operand"                    "    i")
+	     (match_operand 7 "const_int_operand"                    "    i")
+	     (match_operand 8 "const_int_operand"                    "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:<SF_VW>
+	    [(match_operand:SI 1 "const_int_operand" "Ou02")
+	     (match_operand:<SF_VW> 2 "register_operand" "vd")
+	     (match_operand:SF_VC_W 3 "register_operand" "vr")
+	     (match_operand:SI 4 "immediate_operand" "P")] UNSPECV_SF_CV)]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.ivw\t%1,%2,%3,%4"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_ivw_se<mode>"
+  [(set (match_operand:<SF_VW> 0 "register_operand" "=&vr,vr")
+	(if_then_else:<SF_VW>
+	  (unspec_volatile:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 7 "vector_length_operand"                "   rK, rK")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (match_operand 10 "const_int_operand"                   "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:<SF_VW>
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:<SF_VW> 4 "register_operand" "vd,vd")
+	     (match_operand:SF_VC_W 5 "register_operand" "vr,vr")
+	     (match_operand:SI 6 "immediate_operand" "P,P")] UNSPEC_SF_CV)
+       (match_operand:<SF_VW> 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.ivw\t%3,%4,%5,%6"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_ivw<mode>"
+  [(set (match_operand:<SF_VW> 0 "register_operand" "=&vr,vr")
+	(if_then_else:<SF_VW>
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 7 "vector_length_operand"                "   rK, rK")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (match_operand 10 "const_int_operand"                   "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec:<SF_VW>
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:<SF_VW> 4 "register_operand" "vd,vd")
+	     (match_operand:SF_VC_W 5 "register_operand" "vr,vr")
+	     (match_operand:SI 6 "immediate_operand" "P,P")] UNSPEC_SF_CV)
+       (match_operand:<SF_VW> 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.ivw\t%3,%4,%5,%6"
+  [(set_attr "type" "sf_vc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_fvw_se<mode>"
+  [(unspec:<VM>
+	    [(match_operand:<VM> 0 "vector_mask_operand"             "  Wc1")
+	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 6 "const_int_operand"                    "    i")
+	     (match_operand 7 "const_int_operand"                    "    i")
+	     (match_operand 8 "const_int_operand"                    "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:<SF_VW>
+	    [(match_operand:SI 1 "const_int_operand" "Ou01")
+	     (match_operand:<SF_VW> 2 "register_operand" "vd")
+	     (match_operand:SF_VC_FW 3 "register_operand" "vr")
+	     (match_operand:<SF_XFW> 4 "register_operand" "f")] UNSPECV_SF_CV)]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.fvw\t%1,%2,%3,%4"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_fvw_se<mode>"
+  [(set (match_operand:<SF_VW> 0 "register_operand" "=&vr,vr")
+	(if_then_else:<SF_VW>
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 7 "vector_length_operand"                "   rK, rK")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (match_operand 10 "const_int_operand"                   "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:<SF_VW>
+	    [(match_operand:SI 3 "const_int_operand" "Ou01,Ou01")
+	     (match_operand:<SF_VW> 4 "register_operand" "vd,vd")
+	     (match_operand:SF_VC_FW 5 "register_operand" "vr,vr")
+	     (match_operand:<SF_XFW> 6 "register_operand" "f,f")] UNSPECV_SF_CV)
+       (match_operand:<SF_VW> 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.fvw\t%3,%4,%5,%6"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_fvw<mode>"
+  [(set (match_operand:<SF_VW> 0 "register_operand" "=&vr,vr")
+	(if_then_else:<SF_VW>
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 7 "vector_length_operand"                "   rK, rK")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (match_operand 10 "const_int_operand"                   "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec:<SF_VW>
+	    [(match_operand:SI 3 "const_int_operand" "Ou01,Ou01")
+	     (match_operand:<SF_VW> 4 "register_operand" "vd,vd")
+	     (match_operand:SF_VC_FW 5 "register_operand" "vr,vr")
+	     (match_operand:<SF_XFW> 6 "register_operand" "f,f")] UNSPEC_SF_CV)
+       (match_operand:<SF_VW> 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.fvw\t%3,%4,%5,%6"
+  [(set_attr "type" "sf_vc")
+   (set_attr "mode" "<MODE>")])
diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index 726800a..e47bb41 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -386,13 +386,13 @@
   })
 
 (define_insn "amo_atomic_exchange<mode>"
-  [(set (match_operand:GPR 0 "register_operand" "=&r")
+  [(set (match_operand:GPR 0 "register_operand" "=r")
 	(unspec_volatile:GPR
 	  [(match_operand:GPR 1 "memory_operand" "+A")
 	   (match_operand:SI 3 "const_int_operand")] ;; model
 	  UNSPEC_SYNC_EXCHANGE))
    (set (match_dup 1)
-	(match_operand:GPR 2 "register_operand" "0"))]
+	(match_operand:GPR 2 "reg_or_0_operand" "rJ"))]
   "TARGET_ZAAMO"
   "amoswap.<amo>%A3\t%0,%z2,%1"
   [(set_attr "type" "atomic")
@@ -405,18 +405,17 @@
 	   (match_operand:SI 3 "const_int_operand")] ;; model
 	  UNSPEC_SYNC_EXCHANGE))
    (set (match_dup 1)
-	(match_operand:GPR 2 "register_operand" "0"))
+	(match_operand:GPR 2 "reg_or_0_operand" "rJ"))
    (clobber (match_scratch:GPR 4 "=&r"))]	  ;; tmp_1
   "!TARGET_ZAAMO && TARGET_ZALRSC"
   {
     return "1:\;"
-	   "lr.<amo>%I3\t%4, %1\;"
-	   "sc.<amo>%J3\t%0, %0, %1\;"
-	   "bnez\t%0, 1b\;"
-	   "mv\t%0, %4";
+	   "lr.<amo>%I3\t%0, %1\;"
+	   "sc.<amo>%J3\t%4, %z2, %1\;"
+	   "bnez\t%4, 1b\";
   }
   [(set_attr "type" "atomic")
-   (set (attr "length") (const_int 16))])
+   (set (attr "length") (const_int 12))])
 
 (define_expand "atomic_exchange<mode>"
   [(match_operand:SHORT 0 "register_operand") ;; old value at mem
@@ -435,13 +434,13 @@
 })
 
 (define_insn "zabha_atomic_exchange<mode>"
-  [(set (match_operand:SHORT 0 "register_operand" "=&r")
+  [(set (match_operand:SHORT 0 "register_operand" "=r")
 	(unspec_volatile:SHORT
 	  [(match_operand:SHORT 1 "memory_operand" "+A")
 	   (match_operand:SI 3 "const_int_operand")] ;; model
 	  UNSPEC_SYNC_EXCHANGE_ZABHA))
    (set (match_dup 1)
-	(match_operand:SHORT 2 "register_operand" "0"))]
+	(match_operand:SHORT 2 "reg_or_0_operand" "rJ"))]
   "TARGET_ZABHA"
   "amoswap.<amobh>%A3\t%0,%z2,%1"
   [(set_attr "type" "atomic")
@@ -628,7 +627,7 @@
 	(match_operand:SHORT 1 "memory_operand" "+A"))				;; memory
    (set (match_dup 1)
 	(unspec_volatile:SHORT [(match_operand:SHORT 2 "register_operand" "0")  ;; expected_val
-				(match_operand:SHORT 3 "register_operand" "rJ") ;; desired_val
+				(match_operand:SHORT 3 "reg_or_0_operand" "rJ") ;; desired_val
 				(match_operand:SI 4 "const_int_operand")	;; mod_s
 				(match_operand:SI 5 "const_int_operand")]	;; mod_f
 	 UNSPEC_COMPARE_AND_SWAP))]
diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv
index 6493087..a7eaa8b 100644
--- a/gcc/config/riscv/t-riscv
+++ b/gcc/config/riscv/t-riscv
@@ -104,6 +104,13 @@ riscv-avlprop.o: $(srcdir)/config/riscv/riscv-avlprop.cc \
 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
 		$(srcdir)/config/riscv/riscv-avlprop.cc
 
+riscv-vect-permconst.o: $(srcdir)/config/riscv/riscv-vect-permconst.cc \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(REGS_H) \
+  $(TARGET_H) tree-pass.h df.h rtl-ssa.h cfgcleanup.h insn-attr.h \
+  tm-constrs.h insn-opinit.h cfgrtl.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/riscv/riscv-vect-permconst.cc
+
 riscv-d.o: $(srcdir)/config/riscv/riscv-d.cc \
   $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H)
 	$(COMPILE) $<
@@ -180,3 +187,83 @@ s-riscv-vector-type-indexer.gen.defs: build/genrvv-type-indexer$(build_exeext)
 	$(STAMP) s-riscv-vector-type-indexer.gen.defs
 
 genprog+=rvv-type-indexer
+
+RISCV_EXT_DEFS = \
+  $(srcdir)/config/riscv/riscv-ext.def \
+  $(srcdir)/config/riscv/riscv-ext-corev.def \
+  $(srcdir)/config/riscv/riscv-ext.def \
+  $(srcdir)/config/riscv/riscv-ext-sifive.def \
+  $(srcdir)/config/riscv/riscv-ext-thead.def \
+  $(srcdir)/config/riscv/riscv-ext-ventana.def \
+  $(srcdir)/config/riscv/riscv-ext-mips.def
+
+$(srcdir)/config/riscv/riscv-ext.opt: $(RISCV_EXT_DEFS)
+
+build/gen-riscv-ext-opt.o: $(srcdir)/config/riscv/gen-riscv-ext-opt.cc \
+	$(RISCV_EXT_DEFS)
+	$(CXX_FOR_BUILD) $(CXXFLAGS_FOR_BUILD) -c $< -o $@
+
+build/gen-riscv-ext-opt$(build_exeext): build/gen-riscv-ext-opt.o
+	$(LINKER_FOR_BUILD) $(BUILD_LINKERFLAGS) $(BUILD_LDFLAGS) -o $@ $<
+
+s-riscv-ext.opt: build/gen-riscv-ext-opt$(build_exeext)
+	$(RUN_GEN) build/gen-riscv-ext-opt$(build_exeext) > tmp-riscv-ext.opt
+	$(SHELL) $(srcdir)/../move-if-change tmp-riscv-ext.opt $(srcdir)/config/riscv/riscv-ext.opt
+	$(STAMP) s-riscv-ext.opt
+
+build/gen-riscv-ext-texi.o: $(srcdir)/config/riscv/gen-riscv-ext-texi.cc \
+	$(RISCV_EXT_DEFS)
+	$(CXX_FOR_BUILD) $(CXXFLAGS_FOR_BUILD) -c $< -o $@
+
+build/gen-riscv-ext-texi$(build_exeext): build/gen-riscv-ext-texi.o
+	$(LINKER_FOR_BUILD) $(BUILD_LINKERFLAGS) $(BUILD_LDFLAGS) -o $@ $<
+
+$(srcdir)/doc/riscv-ext.texi: $(RISCV_EXT_DEFS)
+$(srcdir)/doc/riscv-ext.texi: s-riscv-ext.texi ; @true
+
+# Generate the doc when generating option file.
+$(srcdir)/config/riscv/riscv-ext.opt: s-riscv-ext.texi ; @true
+
+s-riscv-ext.texi: build/gen-riscv-ext-texi$(build_exeext)
+	$(RUN_GEN) build/gen-riscv-ext-texi$(build_exeext) > tmp-riscv-ext.texi
+	$(SHELL) $(srcdir)/../move-if-change tmp-riscv-ext.texi $(srcdir)/doc/riscv-ext.texi
+	$(STAMP) s-riscv-ext.texi
+
+RISCV_CORES_DEFS = \
+  $(srcdir)/config/riscv/riscv-cores.def
+
+build/gen-riscv-mtune-texi.o: $(srcdir)/config/riscv/gen-riscv-mtune-texi.cc \
+	$(RISCV_CORES_DEFS)
+	$(CXX_FOR_BUILD) $(CXXFLAGS_FOR_BUILD) -c $< -o $@
+
+build/gen-riscv-mcpu-texi.o: $(srcdir)/config/riscv/gen-riscv-mcpu-texi.cc \
+	$(RISCV_CORES_DEFS)
+	$(CXX_FOR_BUILD) $(CXXFLAGS_FOR_BUILD) -c $< -o $@
+
+build/gen-riscv-mtune-texi$(build_exeext): build/gen-riscv-mtune-texi.o
+	$(LINKER_FOR_BUILD) $(BUILD_LINKERFLAGS) $(BUILD_LDFLAGS) -o $@ $<
+
+build/gen-riscv-mcpu-texi$(build_exeext): build/gen-riscv-mcpu-texi.o
+	$(LINKER_FOR_BUILD) $(BUILD_LINKERFLAGS) $(BUILD_LDFLAGS) -o $@ $<
+
+$(srcdir)/doc/riscv-mtune.texi: $(RISCV_CORES_DEFS)
+$(srcdir)/doc/riscv-mtune.texi: s-riscv-mtune.texi ; @true
+
+$(srcdir)/doc/riscv-mcpu.texi: $(RISCV_CORES_DEFS)
+$(srcdir)/doc/riscv-mcpu.texi: s-riscv-mcpu.texi ; @true
+
+s-riscv-mtune.texi: build/gen-riscv-mtune-texi$(build_exeext)
+	$(RUN_GEN) build/gen-riscv-mtune-texi$(build_exeext) > tmp-riscv-mtune.texi
+	$(SHELL) $(srcdir)/../move-if-change tmp-riscv-mtune.texi $(srcdir)/doc/riscv-mtune.texi
+	$(STAMP) s-riscv-mtune.texi
+
+s-riscv-mcpu.texi: build/gen-riscv-mcpu-texi$(build_exeext)
+	$(RUN_GEN) build/gen-riscv-mcpu-texi$(build_exeext) > tmp-riscv-mcpu.texi
+	$(SHELL) $(srcdir)/../move-if-change tmp-riscv-mcpu.texi $(srcdir)/doc/riscv-mcpu.texi
+	$(STAMP) s-riscv-mcpu.texi
+
+# Run `riscv-regen' after you changed or added anything from riscv-ext*.def and riscv-cores*.def
+
+.PHONY: riscv-regen
+
+riscv-regen: s-riscv-ext.texi s-riscv-ext.opt s-riscv-mtune.texi s-riscv-mcpu.texi
diff --git a/gcc/config/riscv/t-rtems b/gcc/config/riscv/t-rtems
index f596e76..a4d2d03 100644
--- a/gcc/config/riscv/t-rtems
+++ b/gcc/config/riscv/t-rtems
@@ -1,8 +1,8 @@
 MULTILIB_OPTIONS	=
 MULTILIB_DIRNAMES	=
 
-MULTILIB_OPTIONS	+= march=rv32i/march=rv32iac/march=rv32im/march=rv32imf/march=rv32ima/march=rv32imac/march=rv32imaf/march=rv32imafc/march=rv32imafd/march=rv32imafdc/march=rv64ima/march=rv64imac/march=rv64imafd/march=rv64imafdc
-MULTILIB_DIRNAMES	+= rv32i       rv32iac       rv32im       rv32imf      rv32ima       rv32imac       rv32imaf       rv32imafc       rv32imafd       rv32imafdc       rv64ima       rv64imac       rv64imafd       rv64imafdc
+MULTILIB_OPTIONS	+= march=rv32i/march=rv32iac/march=rv32im/march=rv32imf/march=rv32ima/march=rv32imac/march=rv32imaf/march=rv32imafc/march=rv32imafd/march=rv32imafdc/march=rv64ima/march=rv64imac/march=rv64imafd/march=rv64imafdc/march=rv64imc
+MULTILIB_DIRNAMES	+= rv32i       rv32iac       rv32im       rv32imf       rv32ima       rv32imac       rv32imaf       rv32imafc       rv32imafd       rv32imafdc       rv64ima       rv64imac       rv64imafd       rv64imafdc       rv64imc
 
 MULTILIB_OPTIONS	+= mabi=ilp32/mabi=ilp32f/mabi=ilp32d/mabi=lp64/mabi=lp64d
 MULTILIB_DIRNAMES	+= ilp32      ilp32f      ilp32d      lp64      lp64d
@@ -10,6 +10,9 @@ MULTILIB_DIRNAMES	+= ilp32      ilp32f      ilp32d      lp64      lp64d
 MULTILIB_OPTIONS	+= mcmodel=medany
 MULTILIB_DIRNAMES	+= medany
 
+MULTILIB_OPTIONS	+= mstrict-align
+MULTILIB_DIRNAMES	+= strict-align
+
 MULTILIB_REQUIRED	=
 MULTILIB_REQUIRED	+= march=rv32i/mabi=ilp32
 MULTILIB_REQUIRED	+= march=rv32iac/mabi=ilp32
@@ -25,3 +28,5 @@ MULTILIB_REQUIRED	+= march=rv64ima/mabi=lp64/mcmodel=medany
 MULTILIB_REQUIRED	+= march=rv64imac/mabi=lp64/mcmodel=medany
 MULTILIB_REQUIRED	+= march=rv64imafd/mabi=lp64d/mcmodel=medany
 MULTILIB_REQUIRED	+= march=rv64imafdc/mabi=lp64d/mcmodel=medany
+MULTILIB_REQUIRED	+= march=rv64imafdc/mabi=lp64d/mcmodel=medany/mstrict-align
+MULTILIB_REQUIRED	+= march=rv64imc/mabi=lp64/mcmodel=medany/mstrict-align
diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md
index f8da71b..aa3b6fb 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -120,10 +120,11 @@
 
   UNSPEC_SF_VFNRCLIP
   UNSPEC_SF_VFNRCLIPU
+  UNSPEC_SF_CV
 ])
 
 (define_c_enum "unspecv" [
-  UNSPECV_FRM_RESTORE_EXIT
+  UNSPECV_SF_CV
 ])
 
 ;; Subset of VI with fractional LMUL types
@@ -2417,6 +2418,47 @@
   (RVVM1x2DF "rvvm1df")
 ])
 
+(define_mode_attr vsubel [
+  (RVVM8HI "qi") (RVVM4HI "qi") (RVVM2HI "qi") (RVVM1HI "qi") (RVVMF2HI "qi") (RVVMF4HI "qi")
+
+  (RVVM8SI "hi") (RVVM4SI "hi") (RVVM2SI "hi") (RVVM1SI "hi") (RVVMF2SI "hi")
+
+  (RVVM8SF "hf") (RVVM4SF "hf") (RVVM2SF "hf") (RVVM1SF "hf") (RVVMF2SF "hf")
+
+  (RVVM8DI "si") (RVVM4DI "si") (RVVM2DI "si") (RVVM1DI "si")
+
+  (RVVM8DF "sf") (RVVM4DF "sf") (RVVM2DF "sf") (RVVM1DF "sf")
+
+  ;; VLS modes.
+  (V1HI "qi") (V2HI "qi") (V4HI "qi") (V8HI "qi") (V16HI "qi") (V32HI "qi") (V64HI "qi") (V128HI "qi") (V256HI "qi")
+  (V512HI "qi") (V1024HI "qi") (V2048HI "qi")
+  (V1SI "hi") (V2SI "hi") (V4SI "hi") (V8SI "hi") (V16SI "hi") (V32SI "hi") (V64SI "hi") (V128SI "hi") (V256SI "hi")
+  (V512SI "hi") (V1024SI "hi")
+  (V1DI "si") (V2DI "si") (V4DI "si") (V8DI "si") (V16DI "si") (V32DI "si") (V64DI "si") (V128DI "si") (V256DI "si") (V512DI "si")
+
+  (V1SF "hf")
+  (V2SF "hf")
+  (V4SF "hf")
+  (V8SF "hf")
+  (V16SF "hf")
+  (V32SF "hf")
+  (V64SF "hf")
+  (V128SF "hf")
+  (V256SF "hf")
+  (V512SF "hf")
+  (V1024SF "hf")
+  (V1DF "sf")
+  (V2DF "sf")
+  (V4DF "sf")
+  (V8DF "sf")
+  (V16DF "sf")
+  (V32DF "sf")
+  (V64DF "sf")
+  (V128DF "sf")
+  (V256DF "sf")
+  (V512DF "sf")
+])
+
 (define_mode_attr VSUBEL [
   (RVVM8HI "QI") (RVVM4HI "QI") (RVVM2HI "QI") (RVVM1HI "QI") (RVVMF2HI "QI") (RVVMF4HI "QI")
 
@@ -3971,6 +4013,14 @@
 			      UNSPEC_VASUBU UNSPEC_VASUB UNSPEC_VSMUL
 			      UNSPEC_VSSRL UNSPEC_VSSRA])
 
+(define_int_iterator VSAT_VX_OP_V_VDUP [
+  UNSPEC_VAADDU UNSPEC_VAADD
+])
+
+(define_int_iterator VSAT_VX_OP_VDUP_V [
+  UNSPEC_VAADDU UNSPEC_VAADD
+])
+
 (define_int_iterator VSAT_ARITH_OP [UNSPEC_VAADDU UNSPEC_VAADD
 			      	    UNSPEC_VASUBU UNSPEC_VASUB UNSPEC_VSMUL])
 (define_int_iterator VSAT_SHIFT_OP [UNSPEC_VSSRL UNSPEC_VSSRA])
@@ -4005,6 +4055,14 @@
 			 	(UNSPEC_VSSRA "vsshift") (UNSPEC_VNCLIP "vnclip")
 				(UNSPEC_VNCLIPU "vnclip")])
 
+(define_int_attr sat_op_v_vdup [
+  (UNSPEC_VAADDU "aaddu") (UNSPEC_VAADD "aadd")
+])
+
+(define_int_attr sat_op_vdup_v [
+  (UNSPEC_VAADDU "aaddu") (UNSPEC_VAADD "aadd")
+])
+
 (define_int_attr misc_op [(UNSPEC_VMSBF "sbf") (UNSPEC_VMSIF "sif") (UNSPEC_VMSOF "sof")
 			  (UNSPEC_VFRSQRT7 "rsqrt7")])
 
@@ -4040,6 +4098,15 @@
   smax umax smin umin mult div udiv mod umod
 ])
 
+(define_code_iterator any_int_binop_no_shift_v_vdup [
+  plus minus and ior xor mult div udiv mod umod smax umax smin umin us_plus
+  us_minus ss_plus ss_minus
+])
+
+(define_code_iterator any_int_binop_no_shift_vdup_v [
+  plus minus and ior xor mult smax umax smin umin us_plus ss_plus
+])
+
 (define_code_iterator any_int_unop [neg not])
 
 (define_code_iterator any_commutative_binop [plus and ior xor
@@ -4873,3 +4940,50 @@
   (RVVM1QI  "rvvm4sf")
   (RVVM2QI  "rvvm8sf")
 ])
+
+
+(define_mode_iterator SF_VC_W [
+  RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32")
+  RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32")
+  RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32")
+])
+
+(define_mode_attr SF_VW [
+  (RVVM4QI "RVVM8HI") (RVVM2QI "RVVM4HI") (RVVM1QI "RVVM2HI") (RVVMF2QI "RVVM1HI")
+  (RVVMF4QI "RVVMF2HI") (RVVMF8QI "RVVMF4HI")
+  (RVVM4HI "RVVM8SI") (RVVM2HI "RVVM4SI") (RVVM1HI "RVVM2SI") (RVVMF2HI "RVVM1SI")
+  (RVVMF4HI "RVVMF2SI")
+  (RVVM4SI "RVVM8DI") (RVVM2SI "RVVM4DI") (RVVM1SI "RVVM2DI") (RVVMF2SI "RVVM1DI")
+])
+
+(define_mode_attr sf_vw [
+  (RVVM4QI "rvvm8hi") (RVVM2QI "rvvm4hi") (RVVM1QI "rvvm2hi") (RVVMF2QI "rvvm1hi")
+  (RVVMF4QI "rvvmf2hi") (RVVMF8QI "rvvmf4hi")
+  (RVVM4HI "rvvm8si") (RVVM2HI "rvvm4si") (RVVM1HI "rvvm2si") (RVVMF2HI "rvvm1si")
+  (RVVMF4HI "rvvmf2si")
+  (RVVM4SI "rvvm8di") (RVVM2SI "rvvm4di") (RVVM1SI "rvvm2di") (RVVMF2SI "rvvm1di")
+])
+
+(define_mode_iterator SF_FV [
+  RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32")
+  RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32")
+  (RVVM8DI "TARGET_VECTOR_ELEN_64") (RVVM4DI "TARGET_VECTOR_ELEN_64")
+  (RVVM2DI "TARGET_VECTOR_ELEN_64") (RVVM1DI "TARGET_VECTOR_ELEN_64")
+])
+
+
+(define_mode_iterator SF_VC_FW [
+  RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32")
+  RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32")
+])
+
+(define_mode_attr SF_XF [
+  (RVVM8HI "HF") (RVVM4HI "HF") (RVVM2HI "HF") (RVVM1HI "HF") (RVVMF2HI "HF") (RVVMF4HI "HF")
+  (RVVM8SI "SF") (RVVM4SI "SF") (RVVM2SI "SF") (RVVM1SI "SF") (RVVMF2SI "SF")
+  (RVVM8DI "DF") (RVVM4DI "DF") (RVVM2DI "DF") (RVVM1DI "DF")
+])
+
+(define_mode_attr SF_XFW [
+  (RVVM4HI "HF") (RVVM2HI "HF") (RVVM1HI "HF") (RVVMF2HI "HF") (RVVMF4HI "HF")
+  (RVVM4SI "SF") (RVVM2SI "SF") (RVVM1SI "SF") (RVVMF2SI "SF")
+])
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 51eb64f..2b35d66 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -56,8 +56,7 @@
 			  vssegtux,vssegtox,vlsegdff,vandn,vbrev,vbrev8,vrev8,vcpop,vclz,vctz,vrol,\
 			  vror,vwsll,vclmul,vclmulh,vghsh,vgmul,vaesef,vaesem,vaesdf,vaesdm,\
 			  vaeskf1,vaeskf2,vaesz,vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,\
-			  vfncvtbf16,vfwcvtbf16,vfwmaccbf16,\
-			  sf_vqmacc,sf_vfnrclip")
+			  vfncvtbf16,vfwcvtbf16,vfwmaccbf16,sf_vqmacc,sf_vfnrclip,sf_vc,sf_vc_se")
 	 (const_string "true")]
 	(const_string "false")))
 
@@ -1116,19 +1115,6 @@
    (set_attr "mode" "SI")]
  )
 
-;; The volatile fsrmsi restore is used for the exit point for the
-;; dynamic mode switching. It will generate one volatile fsrm a5
-;; which won't be eliminated.
-(define_insn "fsrmsi_restore_volatile"
-  [(set (reg:SI FRM_REGNUM)
-	(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")]
-			    UNSPECV_FRM_RESTORE_EXIT))]
-  "TARGET_VECTOR"
-  "fsrm\t%0"
-  [(set_attr "type" "wrfrm")
-   (set_attr "mode" "SI")]
-)
-
 ;; Read FRM
 (define_insn "frrmsi"
   [(set (match_operand:SI 0 "register_operand" "=r")
@@ -1412,6 +1398,7 @@
   }
   [(set_attr "type" "vmov,vlde,vste")
    (set_attr "mode" "<VT:MODE>")
+   (set (attr "merge_op_idx") (const_int INVALID_ATTRIBUTE))
    (set (attr "avl_type_idx") (const_int INVALID_ATTRIBUTE))
    (set (attr "mode_idx") (const_int INVALID_ATTRIBUTE))])
 
@@ -1449,6 +1436,7 @@
   }
   [(set_attr "type" "vlde,vste,vmov")
    (set_attr "mode" "<MODE>")
+   (set (attr "merge_op_idx") (const_int INVALID_ATTRIBUTE))
    (set (attr "avl_type_idx") (const_int INVALID_ATTRIBUTE))
    (set (attr "mode_idx") (const_int INVALID_ATTRIBUTE))]
 )
@@ -1499,6 +1487,7 @@
 }
   [(set_attr "type" "vlde,vste,vmov")
    (set_attr "mode" "<VLS_AVL_REG:MODE>")
+   (set (attr "merge_op_idx") (const_int INVALID_ATTRIBUTE))
    (set (attr "avl_type_idx") (const_int INVALID_ATTRIBUTE))
    (set (attr "mode_idx") (const_int INVALID_ATTRIBUTE))]
 )
@@ -1565,20 +1554,44 @@
 (define_expand "vec_duplicate<mode>"
   [(set (match_operand:V_VLS 0 "register_operand")
         (vec_duplicate:V_VLS
-          (match_operand:<VEL> 1 "direct_broadcast_operand")))]
+          (match_operand:<VEL> 1 "any_broadcast_operand")))]
   "TARGET_VECTOR"
   {
-    /* Early expand DImode broadcast in RV32 system to avoid RA reload
-       generate (set (reg) (vec_duplicate:DI)).  */
+    /* Don't keep a DImode broadcast for RV32 in the vec_duplicate form.
+       Otherwise combine or late combine could end up doing
+	      "64-bit broadcast" (!= vmv.v.x)
+            + vadd.vv
+	    = vadd.vx
+       which would be invalid.  */
     bool gt_p = maybe_gt (GET_MODE_SIZE (<VEL>mode), GET_MODE_SIZE (Pmode));
     if (!FLOAT_MODE_P (<VEL>mode) && gt_p)
       {
-        riscv_vector::emit_vlmax_insn (code_for_pred_broadcast (<MODE>mode),
-				       riscv_vector::UNARY_OP, operands);
-	DONE;
+        riscv_vector::emit_vlmax_insn
+	  (code_for_pred_strided_broadcast
+	    (<MODE>mode), riscv_vector::UNARY_OP, operands);
+       DONE;
       }
-    /* Otherwise, allow it fall into general vec_duplicate pattern
-       which allow us to have vv->vx combine optimization in later pass.  */
+
+    /* Even though we can eventually broadcast any permissible
+       constant by moving it into a register we need to force
+       any non-immediate one into a register here.
+       If we didn't do that we couldn't fwprop/late-combine
+	      vec_duplicate 123.45f
+	    + vfadd.vv
+	    = vfadd.vf
+       because the constant is valid for vec_duplicate but not
+       for vfadd.vf.  Therefore we need to do
+	      fa0 = 123.45f
+	      vec_duplicate fa0
+	    + vfadd.vv
+	    = vfadd.vf  */
+    if (!satisfies_constraint_P (operands[1])
+	&& !satisfies_constraint_J (operands[1])
+	&& !rtx_equal_p (operands[1], CONST0_RTX (<VEL>mode))
+	&& !memory_operand (operands[1], <VEL>mode))
+      operands[1] = force_reg (<VEL>mode, operands[1]);
+
+    /* Otherwise keep the vec_duplicate pattern until split.  */
   })
 
 ;; According to GCC internal:
@@ -1588,14 +1601,20 @@
 (define_insn_and_split "*vec_duplicate<mode>"
   [(set (match_operand:V_VLS 0 "register_operand")
         (vec_duplicate:V_VLS
-          (match_operand:<VEL> 1 "direct_broadcast_operand")))]
+          (match_operand:<VEL> 1 "any_broadcast_operand")))]
   "TARGET_VECTOR && can_create_pseudo_p ()"
   "#"
   "&& 1"
   [(const_int 0)]
   {
-    riscv_vector::emit_vlmax_insn (code_for_pred_broadcast (<MODE>mode),
-                                   riscv_vector::UNARY_OP, operands);
+    if (riscv_vector::can_be_broadcast_p (operands[1]))
+      riscv_vector::emit_vlmax_insn (code_for_pred_broadcast (<MODE>mode),
+				     riscv_vector::UNARY_OP, operands);
+    else
+      riscv_vector::emit_vlmax_insn (code_for_pred_strided_broadcast
+				     (<MODE>mode), riscv_vector::UNARY_OP,
+				     operands);
+
     DONE;
   }
   [(set_attr "type" "vector")]
@@ -1797,7 +1816,7 @@
   [(set_attr "type" "vsetvl")
    (set_attr "mode" "SI")])
 
-;; This pattern use to combine bellow two insns and then further remove
+;; This pattern use to combine below two insns and then further remove
 ;; unnecessary sign_extend operations:
 ;;   (set (reg:DI 134 [ _1 ])
 ;;        (unspec:DI [
@@ -2136,170 +2155,119 @@
 	     (match_operand 7 "const_int_operand")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
-	  (vec_duplicate:V_VLS
-	    (match_operand:<VEL> 3 "direct_broadcast_operand"))
+	  ;; (vec_duplicate:V_VLS ;; wrapper activated by wrap_vec_dup below.
+	  (match_operand:<VEL> 3 "direct_broadcast_operand") ;; )
 	  (match_operand:V_VLS 2 "vector_merge_operand")))]
   "TARGET_VECTOR"
 {
-  /* Transform vmv.v.x/vfmv.v.f (avl = 1) into vmv.s.x since vmv.s.x/vfmv.s.f
-     has better chances to do vsetvl fusion in vsetvl pass.  */
-  if (riscv_vector::splat_to_scalar_move_p (operands))
+  bool wrap_vec_dup = true;
+  rtx vec_cst = NULL_RTX;
+  if (immediate_operand (operands[3], <VEL>mode)
+      && (vec_cst = gen_const_vec_duplicate (<MODE>mode, operands[3]))
+      && (/* -> pred_broadcast<mode>_zero */
+	  (vector_least_significant_set_mask_operand (operands[1],
+						      <VM>mode)
+	   && vector_const_0_operand (vec_cst, <MODE>mode))
+	  || (/* pred_broadcast<mode>_imm */
+	      vector_all_trues_mask_operand (operands[1], <VM>mode)
+	      && vector_const_int_or_double_0_operand (vec_cst,
+						       <MODE>mode))))
     {
-      operands[1] = riscv_vector::gen_scalar_move_mask (<VM>mode);
-      operands[3] = force_reg (<VEL>mode, operands[3]);
+      operands[3] = vec_cst;
+      wrap_vec_dup = false;
     }
-  /* Handle vmv.s.x instruction (Wb1 mask) which has memory scalar.  */
-  else if (satisfies_constraint_Wdm (operands[3]))
-    {
-      if (satisfies_constraint_Wb1 (operands[1]))
-	{
-	  /* Case 1: vmv.s.x (TA, x == memory) ==> vlse.v (TA)  */
-	  if (satisfies_constraint_vu (operands[2]))
-	    operands[1] = CONSTM1_RTX (<VM>mode);
-	  else if (GET_MODE_BITSIZE (<VEL>mode) > GET_MODE_BITSIZE (Pmode))
-	    {
-	      /* Case 2: vmv.s.x (TU, x == memory) ==>
-			   vl = 0 or 1; + vlse.v (TU) in RV32 system  */
-	      operands[4] = riscv_vector::gen_avl_for_scalar_move (operands[4]);
-	      operands[1] = CONSTM1_RTX (<VM>mode);
-	    }
-	  else
-	    /* Case 3: load x (memory) to register.  */
-	    operands[3] = force_reg (<VEL>mode, operands[3]);
-	}
-    }
-  else if (GET_MODE_BITSIZE (<VEL>mode) > GET_MODE_BITSIZE (Pmode)
-           && (immediate_operand (operands[3], Pmode)
+  else if (GET_MODE_SIZE (<VEL>mode) > UNITS_PER_WORD
+	   && satisfies_constraint_Wb1 (operands[1])
+	   && (immediate_operand (operands[3], Xmode)
 	       || (CONST_POLY_INT_P (operands[3])
 	           && known_ge (rtx_to_poly_int64 (operands[3]), 0U)
-		   && known_le (rtx_to_poly_int64 (operands[3]), GET_MODE_SIZE (<MODE>mode)))))
+		   && known_le (rtx_to_poly_int64 (operands[3]),
+				GET_MODE_SIZE (<MODE>mode)))))
     {
       rtx tmp = gen_reg_rtx (Pmode);
       poly_int64 value = rtx_to_poly_int64 (operands[3]);
-      emit_move_insn (tmp, gen_int_mode (value, Pmode));
+      emit_move_insn (tmp, gen_int_mode (value, Xmode));
       operands[3] = gen_rtx_SIGN_EXTEND (<VEL>mode, tmp);
     }
-  /* Never load (const_int 0) into a register, that's silly.  */
-  else if (operands[3] == CONST0_RTX (<VEL>mode))
+
+  /* For a vmv.v.x never load (const_int 0) or valid immediate operands
+     into a register, because we can use vmv.v.i.  */
+  else if (satisfies_constraint_Wc1 (operands[1])
+      && (satisfies_constraint_P (operands[3])
+	  || operands[3] == CONST0_RTX (<VEL>mode)))
     ;
-  /* If we're broadcasting [-16..15] across more than just
-     element 0, then we can use vmv.v.i directly, thus avoiding
-     the load of the constant into a GPR.  */
-  else if (CONST_INT_P (operands[3])
-	   && IN_RANGE (INTVAL (operands[3]), -16, 15)
-	   && !satisfies_constraint_Wb1 (operands[1]))
+  /* For vmv.s.x we have vmv.s.x v1, zero.  */
+  else if (satisfies_constraint_Wb1 (operands[1])
+	   && operands[3] == CONST0_RTX (<VEL>mode))
     ;
   else
     operands[3] = force_reg (<VEL>mode, operands[3]);
+  if (wrap_vec_dup)
+    operands[3] = gen_rtx_VEC_DUPLICATE (<MODE>mode, operands[3]);
 })
 
-(define_insn_and_split "*pred_broadcast<mode>"
-  [(set (match_operand:V_VLSI 0 "register_operand"                 "=vr, vr, vd, vd, vr, vr, vr, vr")
+(define_insn_and_rewrite "*pred_broadcast<mode>"
+  [(set (match_operand:V_VLSI 0 "register_operand"                 "=vr, vr, vr, vr")
 	(if_then_else:V_VLSI
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_broadcast_mask_operand" "Wc1,Wc1, vm, vm,Wc1,Wc1,Wb1,Wb1")
-	     (match_operand 4 "vector_length_operand"              "rvl,rvl,rvl,rvl,rvl,rvl,rvl,rvl")
-	     (match_operand 5 "const_int_operand"                  "  i,  i,  i,  i,  i,  i,  i,  i")
-	     (match_operand 6 "const_int_operand"                  "  i,  i,  i,  i,  i,  i,  i,  i")
-	     (match_operand 7 "const_int_operand"                  "  i,  i,  i,  i,  i,  i,  i,  i")
+	    [(match_operand:<VM> 1 "vector_broadcast_mask_operand" "Wc1,Wc1,Wb1,Wb1")
+	     (match_operand 4 "vector_length_operand"		   "rvl,rvl,rvl,rvl")
+	     (match_operand 5 "const_int_operand"                  "  i,  i,  i,  i")
+	     (match_operand 6 "const_int_operand"                  "  i,  i,  i,  i")
+	     (match_operand 7 "const_int_operand"                  "  i,  i,  i,  i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (vec_duplicate:V_VLSI
-	    (match_operand:<VEL> 3 "direct_broadcast_operand"       "rP,rP,Wdm,Wdm,Wdm,Wdm, rJ, rJ"))
-	  (match_operand:V_VLSI 2 "vector_merge_operand"            "vu, 0, vu,  0, vu,  0, vu,  0")))]
+	    (match_operand:<VEL> 3 "direct_broadcast_operand"      " rP, rP, rJ, rJ"))
+	  (match_operand:V_VLSI 2 "vector_merge_operand"           " vu,  0, vu,  0")))]
   "TARGET_VECTOR"
   "@
    vmv.v.%o3\t%0,%3
    vmv.v.%o3\t%0,%3
-   vlse<sew>.v\t%0,%3,zero,%1.t
-   vlse<sew>.v\t%0,%3,zero,%1.t
-   vlse<sew>.v\t%0,%3,zero
-   vlse<sew>.v\t%0,%3,zero
    vmv.s.x\t%0,%z3
    vmv.s.x\t%0,%z3"
-  "(register_operand (operands[3], <VEL>mode)
-  || CONST_POLY_INT_P (operands[3]))
-  && GET_MODE_BITSIZE (<VEL>mode) > GET_MODE_BITSIZE (Pmode)"
-  [(set (match_dup 0)
-	(if_then_else:V_VLSI (unspec:<VM> [(match_dup 1) (match_dup 4)
-	     (match_dup 5) (match_dup 6) (match_dup 7)
-	     (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
-	  (vec_duplicate:V_VLSI (match_dup 3))
-	  (match_dup 2)))]
-  {
-    gcc_assert (can_create_pseudo_p ());
-    if (CONST_POLY_INT_P (operands[3]))
-      {
-	rtx tmp = gen_reg_rtx (<VEL>mode);
-	emit_move_insn (tmp, operands[3]);
-	operands[3] = tmp;
-      }
-    rtx m = assign_stack_local (<VEL>mode, GET_MODE_SIZE (<VEL>mode),
-				GET_MODE_ALIGNMENT (<VEL>mode));
-    m = validize_mem (m);
-    emit_move_insn (m, operands[3]);
-    m = gen_rtx_MEM (<VEL>mode, force_reg (Pmode, XEXP (m, 0)));
-    operands[3] = m;
-
-    /* For SEW = 64 in RV32 system, we expand vmv.s.x:
-       andi a2,a2,1
-       vsetvl zero,a2,e64
-       vlse64.v  */
-    if (satisfies_constraint_Wb1 (operands[1]))
-      {
-	operands[4] = riscv_vector::gen_avl_for_scalar_move (operands[4]);
-	operands[1] = CONSTM1_RTX (<VM>mode);
-      }
-  }
-  [(set_attr "type" "vimov,vimov,vlds,vlds,vlds,vlds,vimovxv,vimovxv")
+  "&& (operands[1] == CONSTM1_RTX (<VM>mode)
+       && operands[4] == CONST1_RTX (Pmode)
+       && (register_operand (operands[3], <VEL>mode)
+           || satisfies_constraint_J (operands[3])))"
+{
+  /* A broadcast of a single element is just a vmv.s.x.  */
+  operands[1] = riscv_vector::gen_scalar_move_mask (<VM>mode);
+}
+  [(set_attr "type" "vimov,vimov,vimovxv,vimovxv")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*pred_broadcast<mode>_zvfh"
-  [(set (match_operand:V_VLSF    0 "register_operand"              "=vr,  vr,  vr,  vr")
+(define_insn_and_rewrite "pred_broadcast<mode>_zvfh"
+  [(set (match_operand:V_VLSF    0 "register_operand"              "=vr, vr, vr, vr")
 	(if_then_else:V_VLSF
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_broadcast_mask_operand" "Wc1, Wc1, Wb1, Wb1")
-	     (match_operand      4 "vector_length_operand"         "rvl, rvl, rvl, rvl")
-	     (match_operand      5 "const_int_operand"             "  i,   i,   i,   i")
-	     (match_operand      6 "const_int_operand"             "  i,   i,   i,   i")
-	     (match_operand      7 "const_int_operand"             "  i,   i,   i,   i")
+	    [(match_operand:<VM> 1 "vector_broadcast_mask_operand" "Wc1,Wc1,Wb1,Wb1")
+	     (match_operand      4 "vector_length_operand"	   "rvl,rvl,rvl,rvl")
+	     (match_operand      5 "const_int_operand"             "  i,  i,  i,  i")
+	     (match_operand      6 "const_int_operand"             "  i,  i,  i,  i")
+	     (match_operand      7 "const_int_operand"             "  i,  i,  i,  i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (vec_duplicate:V_VLSF
-	    (match_operand:<VEL> 3 "direct_broadcast_operand"      "  f,   f,   f,   f"))
-	  (match_operand:V_VLSF  2 "vector_merge_operand"          " vu,   0,  vu,   0")))]
+	    (match_operand:<VEL> 3 "direct_broadcast_operand"      "  f,  f,  f,  f"))
+	  (match_operand:V_VLSF  2 "vector_merge_operand"          " vu,  0, vu,  0")))]
   "TARGET_VECTOR"
   "@
    vfmv.v.f\t%0,%3
    vfmv.v.f\t%0,%3
    vfmv.s.f\t%0,%3
    vfmv.s.f\t%0,%3"
+  "&& (operands[1] == CONSTM1_RTX (<VM>mode)
+       && operands[4] == CONST1_RTX (Pmode)
+       && (register_operand (operands[3], <VEL>mode)
+           || satisfies_constraint_J (operands[3])))"
+{
+  /* A broadcast of a single element is just a vfmv.s.f.  */
+  operands[1] = riscv_vector::gen_scalar_move_mask (<VM>mode);
+}
   [(set_attr "type" "vfmov,vfmov,vfmovfv,vfmovfv")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*pred_broadcast<mode>_zvfhmin"
-  [(set (match_operand:V_VLSF_ZVFHMIN   0 "register_operand"              "=vr,  vr,  vr,  vr")
-	(if_then_else:V_VLSF_ZVFHMIN
-	  (unspec:<VM>
-	    [(match_operand:<VM>        1 "vector_broadcast_mask_operand" " vm,  vm, Wc1, Wc1")
-	     (match_operand             4 "vector_length_operand"         "rvl, rvl, rvl, rvl")
-	     (match_operand             5 "const_int_operand"             "  i,   i,   i,   i")
-	     (match_operand             6 "const_int_operand"             "  i,   i,   i,   i")
-	     (match_operand             7 "const_int_operand"             "  i,   i,   i,   i")
-	     (reg:SI VL_REGNUM)
-	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
-	  (vec_duplicate:V_VLSF_ZVFHMIN
-	    (match_operand:<VEL>        3 "direct_broadcast_operand"      "Wdm, Wdm, Wdm, Wdm"))
-	  (match_operand:V_VLSF_ZVFHMIN 2 "vector_merge_operand"          " vu,   0,  vu,   0")))]
-  "TARGET_VECTOR"
-  "@
-   vlse<sew>.v\t%0,%3,zero,%1.t
-   vlse<sew>.v\t%0,%3,zero,%1.t
-   vlse<sew>.v\t%0,%3,zero
-   vlse<sew>.v\t%0,%3,zero"
-  [(set_attr "type" "vlds,vlds,vlds,vlds")
-   (set_attr "mode" "<MODE>")])
-
 (define_insn "*pred_broadcast<mode>_extended_scalar"
   [(set (match_operand:V_VLSI_D 0 "register_operand"               "=vr, vr, vr, vr")
 	(if_then_else:V_VLSI_D
@@ -2362,6 +2330,117 @@
   [(set_attr "type" "vimov,vimov")
    (set_attr "mode" "<MODE>")])
 
+(define_expand "@pred_strided_broadcast<mode>"
+  [(set (match_operand:V_VLS 0 "register_operand")
+	(if_then_else:V_VLS
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "strided_broadcast_mask_operand")
+	     (match_operand 4 "vector_length_operand")
+	     (match_operand 5 "const_int_operand")
+	     (match_operand 6 "const_int_operand")
+	     (match_operand 7 "const_int_operand")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (vec_duplicate:V_VLS
+	    (match_operand:<VEL> 3 "strided_broadcast_operand"))
+	  (match_operand:V_VLS 2 "vector_merge_operand")))]
+  "TARGET_VECTOR"
+{
+  if (satisfies_constraint_Wb1 (operands[1]))
+    {
+      /* If we're asked to set a single element (like vmv.s.x but we
+	 need to go via memory here) and the tail policy is agnostic
+	 we can overwrite all elements.
+	 Thus, set the mask to broadcast.  */
+      operands[1] = CONSTM1_RTX (<VM>mode);
+      if (!satisfies_constraint_vu (operands[2])
+	  && GET_MODE_SIZE (<VEL>mode) > UNITS_PER_WORD)
+	{
+	  /* Case 2: vmv.s.x (TU, x == memory) ==>
+	     vl = 0 or 1; + vlse.v (TU) in RV32 system  */
+	  /* In this case we must not overwrite the residual elements,
+	     so set the vector length to 0/1.  */
+	  operands[4] = riscv_vector::gen_avl_for_scalar_move (operands[4]);
+	}
+    }
+})
+
+(define_insn_and_split "*pred_strided_broadcast<mode>"
+  [(set (match_operand:V_VLSI 0 "register_operand"                  "=vd, vd, vr, vr")
+	(if_then_else:V_VLSI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "strided_broadcast_mask_operand" " vm, vm,Wc1,Wc1")
+	     (match_operand 4 "vector_length_operand"               "rvl,rvl,rvl,rvl")
+	     (match_operand 5 "const_int_operand"                   "  i,  i,  i,  i")
+	     (match_operand 6 "const_int_operand"                   "  i,  i,  i,  i")
+	     (match_operand 7 "const_int_operand"                   "  i,  i,  i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (vec_duplicate:V_VLSI
+	    (match_operand:<VEL> 3 "strided_broadcast_operand"	    "  A,  A,  A,  A"))
+	  (match_operand:V_VLSI 2 "vector_merge_operand"            " vu,  0, vu,  0")))]
+  "TARGET_VECTOR"
+  "@
+   vlse<sew>.v\t%0,%3,zero,%1.t
+   vlse<sew>.v\t%0,%3,zero,%1.t
+   vlse<sew>.v\t%0,%3,zero
+   vlse<sew>.v\t%0,%3,zero"
+  "&& !strided_load_broadcast_p () && can_create_pseudo_p ()"
+  [(const_int 0)]
+  {
+    rtx tmp = gen_reg_rtx (V1DImode);
+    emit_move_insn (tmp, gen_lowpart (V1DImode, operands[3]));
+    tmp = lowpart_subreg (<MODE>mode, tmp, V1DImode);
+
+    emit_insn
+      (gen_pred_gather<mode>_scalar
+       (operands[0], operands[1], operands[2], tmp, CONST0_RTX (Pmode),
+	operands[4], operands[5], operands[6], operands[7]));
+    DONE;
+  }
+  [(set_attr "type" "vlds,vlds,vlds,vlds")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn_and_split "*pred_strided_broadcast<mode>_zvfhmin"
+  [(set (match_operand:V_VLSF_ZVFHMIN   0 "register_operand"               "=vr,  vr,  vr,  vr")
+	(if_then_else:V_VLSF_ZVFHMIN
+	  (unspec:<VM>
+	    [(match_operand:<VM>        1 "strided_broadcast_mask_operand" " vm,  vm, Wc1, Wc1")
+	     (match_operand             4 "vector_length_operand"          "rvl, rvl, rvl, rvl")
+	     (match_operand             5 "const_int_operand"              "  i,   i,   i,   i")
+	     (match_operand             6 "const_int_operand"              "  i,   i,   i,   i")
+	     (match_operand             7 "const_int_operand"              "  i,   i,   i,   i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (vec_duplicate:V_VLSF_ZVFHMIN
+	    (match_operand:<VEL>        3 "strided_broadcast_operand"	   "  A,   A,   A,   A"))
+	  (match_operand:V_VLSF_ZVFHMIN 2 "vector_merge_operand"           " vu,   0,  vu,   0")))]
+  "TARGET_VECTOR"
+  "@
+   vlse<sew>.v\t%0,%3,zero,%1.t
+   vlse<sew>.v\t%0,%3,zero,%1.t
+   vlse<sew>.v\t%0,%3,zero
+   vlse<sew>.v\t%0,%3,zero"
+  "&& !strided_load_broadcast_p ()
+   && <VEL>mode == HFmode
+   && can_create_pseudo_p ()"
+  [(const_int 0)]
+  {
+    poly_uint64 nunits = GET_MODE_NUNITS (<MODE>mode);
+    machine_mode vmodehi
+      = riscv_vector::get_vector_mode (HImode, nunits).require ();
+    rtx ops[] = {gen_lowpart (vmodehi, operands[0]),
+		 gen_lowpart (HImode, operands[3])};
+    riscv_vector::emit_avltype_insn (code_for_pred_broadcast (vmodehi),
+				     riscv_vector::UNARY_OP, ops,
+				     (riscv_vector::avl_type) INTVAL (operands[7]),
+				     operands[4]);
+    DONE;
+  }
+  [(set_attr "type" "vlds,vlds,vlds,vlds")
+   (set_attr "mode" "<MODE>")])
+
+
 ;; -------------------------------------------------------------------------------
 ;; ---- Predicated Strided loads/stores
 ;; -------------------------------------------------------------------------------
@@ -4603,8 +4682,8 @@
 ;; Handle GET_MODE_INNER (mode) = DImode. We need to split them since
 ;; we need to deal with SEW = 64 in RV32 system.
 (define_expand "@pred_<sat_op><mode>_scalar"
-  [(set (match_operand:VI_D 0 "register_operand")
-	(if_then_else:VI_D
+  [(set (match_operand:V_VLSI_D 0 "register_operand")
+	(if_then_else:V_VLSI_D
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand")
 	     (match_operand 5 "vector_length_operand")
@@ -4615,10 +4694,10 @@
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)
 	     (reg:SI VXRM_REGNUM)] UNSPEC_VPREDICATE)
-	  (unspec:VI_D
-	    [(match_operand:VI_D 3 "register_operand")
+	  (unspec:V_VLSI_D
+	    [(match_operand:V_VLSI_D 3 "register_operand")
 	     (match_operand:<VEL> 4 "reg_or_int_operand")] VSAT_ARITH_OP)
-	  (match_operand:VI_D 2 "vector_merge_operand")))]
+	  (match_operand:V_VLSI_D 2 "vector_merge_operand")))]
   "TARGET_VECTOR"
 {
   if (riscv_vector::sew64_scalar_helper (
@@ -4637,8 +4716,8 @@
 })
 
 (define_insn "*pred_<sat_op><mode>_scalar"
-  [(set (match_operand:VI_D 0 "register_operand"         "=vd, vr, vd, vr")
-	(if_then_else:VI_D
+  [(set (match_operand:V_VLSI_D 0 "register_operand"         "=vd, vr, vd, vr")
+	(if_then_else:V_VLSI_D
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
 	     (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
@@ -4649,18 +4728,18 @@
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)
 	     (reg:SI VXRM_REGNUM)] UNSPEC_VPREDICATE)
-	  (unspec:VI_D
-	    [(match_operand:VI_D 3 "register_operand"    " vr, vr, vr, vr")
+	  (unspec:V_VLSI_D
+	    [(match_operand:V_VLSI_D 3 "register_operand"    " vr, vr, vr, vr")
 	     (match_operand:<VEL> 4 "reg_or_0_operand"   " rJ, rJ, rJ, rJ")] VSAT_ARITH_OP)
-	  (match_operand:VI_D 2 "vector_merge_operand"   " vu,  0, vu,  0")))]
+	  (match_operand:V_VLSI_D 2 "vector_merge_operand"   " vu,  0, vu,  0")))]
   "TARGET_VECTOR"
   "v<sat_op>.vx\t%0,%3,%z4%p1"
   [(set_attr "type" "<sat_insn_type>")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*pred_<sat_op><mode>_extended_scalar"
-  [(set (match_operand:VI_D 0 "register_operand"            "=vd, vr, vd, vr")
-	(if_then_else:VI_D
+  [(set (match_operand:V_VLSI_D 0 "register_operand"            "=vd, vr, vd, vr")
+	(if_then_else:V_VLSI_D
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"    " vm, vm,Wc1,Wc1")
 	     (match_operand 5 "vector_length_operand"       "rvl,rvl,rvl,rvl")
@@ -4671,11 +4750,11 @@
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)
 	     (reg:SI VXRM_REGNUM)] UNSPEC_VPREDICATE)
-	  (unspec:VI_D
-	    [(match_operand:VI_D 3 "register_operand"       " vr, vr, vr, vr")
+	  (unspec:V_VLSI_D
+	    [(match_operand:V_VLSI_D 3 "register_operand"       " vr, vr, vr, vr")
 	     (sign_extend:<VEL>
 	       (match_operand:<VSUBEL> 4 "reg_or_0_operand" " rJ, rJ, rJ, rJ"))] VSAT_ARITH_OP)
-	  (match_operand:VI_D 2 "vector_merge_operand"      " vu,  0, vu,  0")))]
+	  (match_operand:V_VLSI_D 2 "vector_merge_operand"      " vu,  0, vu,  0")))]
   "TARGET_VECTOR && !TARGET_64BIT"
   "v<sat_op>.vx\t%0,%3,%z4%p1"
   [(set_attr "type" "<sat_insn_type>")
@@ -5414,6 +5493,98 @@
   "TARGET_VECTOR"
 {})
 
+(define_expand "@pred_mul_plus_vx_<mode>"
+  [(set (match_operand:V_VLSI_QHS       0 "register_operand")
+	(if_then_else:V_VLSI_QHS
+	  (unspec:<VM>
+	    [(match_operand:<VM>        1 "vector_mask_operand")
+	     (match_operand             6 "vector_length_operand")
+	     (match_operand             7 "const_int_operand")
+	     (match_operand             8 "const_int_operand")
+	     (match_operand             9 "const_int_operand")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (plus:V_VLSI_QHS
+	    (mult:V_VLSI_QHS
+	      (vec_duplicate:V_VLSI_QHS
+	        (match_operand:<VEL>    2 "register_operand"))
+	      (match_operand:V_VLSI_QHS 3 "register_operand"))
+	    (match_operand:V_VLSI_QHS   4 "register_operand"))
+	  (match_operand:V_VLSI_QHS     5 "vector_merge_operand")))]
+  "TARGET_VECTOR"
+{
+  riscv_vector::prepare_ternary_operands (operands);
+})
+
+(define_expand "@pred_mul_plus_vx_<mode>"
+  [(set (match_operand:V_VLSI_D       0 "register_operand")
+	(if_then_else:V_VLSI_D
+	  (unspec:<VM>
+	    [(match_operand:<VM>      1 "vector_mask_operand")
+	     (match_operand           6 "vector_length_operand")
+	     (match_operand           7 "const_int_operand")
+	     (match_operand           8 "const_int_operand")
+	     (match_operand           9 "const_int_operand")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (plus:V_VLSI_D
+	    (mult:V_VLSI_D
+	      (vec_duplicate:V_VLSI_D
+		(match_operand:<VEL>  2 "register_operand"))
+	      (match_operand:V_VLSI_D 3 "register_operand"))
+	    (match_operand:V_VLSI_D   4 "register_operand"))
+	  (match_operand:V_VLSI_D     5 "vector_merge_operand")))]
+  "TARGET_VECTOR && TARGET_64BIT"
+{
+  riscv_vector::prepare_ternary_operands (operands);
+})
+
+(define_expand "@pred_vnmsac_vx_<mode>"
+  [(set (match_operand:V_VLSI_QHS       0 "register_operand")
+	(if_then_else:V_VLSI_QHS
+	  (unspec:<VM>
+	    [(match_operand:<VM>        1 "vector_mask_operand")
+	     (match_operand             6 "vector_length_operand")
+	     (match_operand             7 "const_int_operand")
+	     (match_operand             8 "const_int_operand")
+	     (match_operand             9 "const_int_operand")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (minus:V_VLSI_QHS
+	    (match_operand:V_VLSI_QHS   4 "register_operand")
+	    (mult:V_VLSI_QHS
+	      (vec_duplicate:V_VLSI_QHS
+	        (match_operand:<VEL>    2 "register_operand"))
+	      (match_operand:V_VLSI_QHS 3 "register_operand")))
+	  (match_operand:V_VLSI_QHS     5 "vector_merge_operand")))]
+  "TARGET_VECTOR"
+{
+  riscv_vector::prepare_ternary_operands (operands);
+})
+
+(define_expand "@pred_vnmsac_vx_<mode>"
+  [(set (match_operand:V_VLSI_D       0 "register_operand")
+	(if_then_else:V_VLSI_D
+	  (unspec:<VM>
+	    [(match_operand:<VM>      1 "vector_mask_operand")
+	     (match_operand           6 "vector_length_operand")
+	     (match_operand           7 "const_int_operand")
+	     (match_operand           8 "const_int_operand")
+	     (match_operand           9 "const_int_operand")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (minus:V_VLSI_D
+	    (match_operand:V_VLSI_D   4 "register_operand")
+	    (mult:V_VLSI_D
+	      (vec_duplicate:V_VLSI_D
+		(match_operand:<VEL>  2 "register_operand"))
+	      (match_operand:V_VLSI_D 3 "register_operand")))
+	  (match_operand:V_VLSI_D     5 "vector_merge_operand")))]
+  "TARGET_VECTOR && TARGET_64BIT"
+{
+  riscv_vector::prepare_ternary_operands (operands);
+})
+
 (define_insn "*pred_madd<mode>_scalar"
   [(set (match_operand:V_VLSI 0 "register_operand"            "=vd, vr")
 	(if_then_else:V_VLSI
@@ -6248,8 +6419,8 @@
    (set_attr "mode" "<MODE>")])
 
 (define_insn "@pred_<optab><mode>_scalar"
-  [(set (match_operand:VF 0 "register_operand"           "=vd, vd, vr, vr")
-	(if_then_else:VF
+  [(set (match_operand:V_VLSF 0 "register_operand"           "=vd, vd, vr, vr")
+	(if_then_else:V_VLSF
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
 	     (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
@@ -6260,11 +6431,11 @@
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)
 	     (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
-	  (commutative_float_binop:VF
-	    (vec_duplicate:VF
+	  (commutative_float_binop:V_VLSF
+	    (vec_duplicate:V_VLSF
 	      (match_operand:<VEL> 4 "register_operand"  "  f,  f,  f,  f"))
-	    (match_operand:VF 3 "register_operand"       " vr, vr, vr, vr"))
-	  (match_operand:VF 2 "vector_merge_operand"     " vu,  0, vu,  0")))]
+	    (match_operand:V_VLSF 3 "register_operand"       " vr, vr, vr, vr"))
+	  (match_operand:V_VLSF 2 "vector_merge_operand"     " vu,  0, vu,  0")))]
   "TARGET_VECTOR"
   "vf<insn>.vf\t%0,%3,%4%p1"
   [(set_attr "type" "<float_insn_type>")
@@ -6273,43 +6444,43 @@
 	(symbol_ref "riscv_vector::get_frm_mode (operands[9])"))])
 
 (define_insn "@pred_<optab><mode>_scalar"
-  [(set (match_operand:VF 0 "register_operand"           "=vd, vd, vr, vr")
-	(if_then_else:VF
+  [(set (match_operand:V_VLSF 0 "register_operand"           "=vd, vd, vr, vr")
+	(if_then_else:V_VLSF
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
-	     (match_operand 6 "const_int_operand"        "  i,  i,  i,  i")
-	     (match_operand 7 "const_int_operand"        "  i,  i,  i,  i")
-	     (match_operand 8 "const_int_operand"        "  i,  i,  i,  i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"     " vm, vm,Wc1,Wc1")
+	     (match_operand 5 "vector_length_operand"        "rvl,rvl,rvl,rvl")
+	     (match_operand 6 "const_int_operand"            "  i,  i,  i,  i")
+	     (match_operand 7 "const_int_operand"            "  i,  i,  i,  i")
+	     (match_operand 8 "const_int_operand"            "  i,  i,  i,  i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
-	  (commutative_float_binop_nofrm:VF
-	    (vec_duplicate:VF
-	      (match_operand:<VEL> 4 "register_operand"  "  f,  f,  f,  f"))
-	    (match_operand:VF 3 "register_operand"       " vr, vr, vr, vr"))
-	  (match_operand:VF 2 "vector_merge_operand"     " vu,  0, vu,  0")))]
+	  (commutative_float_binop_nofrm:V_VLSF
+	    (vec_duplicate:V_VLSF
+	      (match_operand:<VEL> 4 "register_operand"      "  f,  f,  f,  f"))
+	    (match_operand:V_VLSF 3 "register_operand"       " vr, vr, vr, vr"))
+	  (match_operand:V_VLSF 2 "vector_merge_operand"     " vu,  0, vu,  0")))]
   "TARGET_VECTOR"
   "vf<insn>.vf\t%0,%3,%4%p1"
   [(set_attr "type" "<float_insn_type>")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "@pred_<ieee_fmaxmin_op><mode>_scalar"
-  [(set (match_operand:VF 0 "register_operand"           "=vd, vd, vr, vr")
-	(if_then_else:VF
+  [(set (match_operand:V_VLSF 0 "register_operand"         "=vd, vd, vr, vr")
+	(if_then_else:V_VLSF
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
-	     (match_operand 6 "const_int_operand"        "  i,  i,  i,  i")
-	     (match_operand 7 "const_int_operand"        "  i,  i,  i,  i")
-	     (match_operand 8 "const_int_operand"        "  i,  i,  i,  i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"   " vm, vm,Wc1,Wc1")
+	     (match_operand 5 "vector_length_operand"      "rvl,rvl,rvl,rvl")
+	     (match_operand 6 "const_int_operand"          "  i,  i,  i,  i")
+	     (match_operand 7 "const_int_operand"          "  i,  i,  i,  i")
+	     (match_operand 8 "const_int_operand"          "  i,  i,  i,  i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
-	  (unspec:VF
-	    [(match_operand:VF 3 "register_operand"        " vr, vr, vr, vr")
-	      (vec_duplicate:VF
+	  (unspec:V_VLSF
+	    [(match_operand:V_VLSF 3 "register_operand"    " vr, vr, vr, vr")
+	      (vec_duplicate:V_VLSF
 		(match_operand:<VEL> 4 "register_operand"  "  f,  f,  f,  f"))]
 	      UNSPEC_VFMAXMIN)
-	  (match_operand:VF 2 "vector_merge_operand"     " vu,  0, vu,  0")))]
+	  (match_operand:V_VLSF 2 "vector_merge_operand"   " vu,  0, vu,  0")))]
   "TARGET_VECTOR"
   "v<ieee_fmaxmin_op>.vf\t%0,%3,%4%p1"
   [(set_attr "type" "vfminmax")
@@ -6341,8 +6512,8 @@
 	(symbol_ref "riscv_vector::get_frm_mode (operands[9])"))])
 
 (define_insn "@pred_<optab><mode>_reverse_scalar"
-  [(set (match_operand:VF 0 "register_operand"           "=vd, vd, vr, vr")
-	(if_then_else:VF
+  [(set (match_operand:V_VLSF 0 "register_operand"       "=vd, vd, vr, vr")
+	(if_then_else:V_VLSF
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
 	     (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
@@ -6353,11 +6524,11 @@
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)
 	     (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
-	  (non_commutative_float_binop:VF
-	    (vec_duplicate:VF
+	  (non_commutative_float_binop:V_VLSF
+	    (vec_duplicate:V_VLSF
 	      (match_operand:<VEL> 4 "register_operand"  "  f,  f,  f,  f"))
-	    (match_operand:VF 3 "register_operand"       " vr, vr, vr, vr"))
-	  (match_operand:VF 2 "vector_merge_operand"     " vu,  0, vu,  0")))]
+	    (match_operand:V_VLSF 3 "register_operand"   " vr, vr, vr, vr"))
+	  (match_operand:V_VLSF 2 "vector_merge_operand" " vu,  0, vu,  0")))]
   "TARGET_VECTOR"
   "vfr<insn>.vf\t%0,%3,%4%p1"
   [(set_attr "type" "<float_insn_type>")
@@ -6595,9 +6766,42 @@
 	        (match_operand:<VEL> 2 "register_operand"))
 	      (match_operand:V_VLSF 3 "register_operand"))
 	    (match_operand:V_VLSF 4 "register_operand"))
-	  (match_operand:V_VLSF 5 "register_operand")))]
+	  (match_operand:V_VLSF 5 "vector_merge_operand")))]
   "TARGET_VECTOR"
-{})
+{
+  riscv_vector::prepare_ternary_operands (operands);
+})
+
+(define_insn "*pred_mul_<optab><mode>_scalar_undef"
+  [(set (match_operand:V_VLSF 0 "register_operand"           "=vd,vd, vr, vr")
+	(if_then_else:V_VLSF
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
+	     (match_operand 6 "vector_length_operand"    "rvl,rvl,rvl,rvl")
+	     (match_operand 7 "const_int_operand"        "  i,  i,  i,  i")
+	     (match_operand 8 "const_int_operand"        "  i,  i,  i,  i")
+	     (match_operand 9 "const_int_operand"        "  i,  i,  i,  i")
+	     (match_operand 10 "const_int_operand"       "  i,  i,  i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)
+	     (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
+	  (plus_minus:V_VLSF
+	    (mult:V_VLSF
+	      (vec_duplicate:V_VLSF
+	        (match_operand:<VEL> 3 "register_operand"    "  f,  f,  f,  f"))
+	      (match_operand:V_VLSF 4 "register_operand"     "  0, vr,  0, vr"))
+	    (match_operand:V_VLSF 5 "register_operand"       " vr,  0, vr,  0"))
+	  (match_operand:V_VLSF 2 "vector_undef_operand")))]
+  "TARGET_VECTOR"
+  "@
+   vf<madd_msub>.vf\t%0,%3,%5%p1
+   vf<macc_msac>.vf\t%0,%3,%4%p1
+   vf<madd_msub>.vf\t%0,%3,%5%p1
+   vf<macc_msac>.vf\t%0,%3,%4%p1"
+  [(set_attr "type" "vfmuladd")
+   (set_attr "mode" "<MODE>")
+   (set (attr "frm_mode")
+	(symbol_ref "riscv_vector::get_frm_mode (operands[10])"))])
 
 (define_insn "*pred_<madd_msub><mode>_scalar"
   [(set (match_operand:V_VLSF 0 "register_operand"            "=vd, vr")
@@ -6811,9 +7015,43 @@
 	          (match_operand:<VEL> 2 "register_operand"))
 	        (match_operand:V_VLSF 3 "register_operand")))
 	    (match_operand:V_VLSF 4 "register_operand"))
-	  (match_operand:V_VLSF 5 "register_operand")))]
+	  (match_operand:V_VLSF 5 "vector_merge_operand")))]
   "TARGET_VECTOR"
-{})
+{
+  riscv_vector::prepare_ternary_operands (operands);
+})
+
+(define_insn "*pred_mul_neg_<optab><mode>_scalar_undef"
+  [(set (match_operand:V_VLSF 0 "register_operand"           "=vd,vd, vr, vr")
+	(if_then_else:V_VLSF
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
+	     (match_operand 6 "vector_length_operand"    "rvl,rvl,rvl,rvl")
+	     (match_operand 7 "const_int_operand"        "  i,  i,  i,  i")
+	     (match_operand 8 "const_int_operand"        "  i,  i,  i,  i")
+	     (match_operand 9 "const_int_operand"        "  i,  i,  i,  i")
+	     (match_operand 10 "const_int_operand"       "  i,  i,  i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)
+	     (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
+	  (plus_minus:V_VLSF
+	    (neg:V_VLSF
+	      (mult:V_VLSF
+		(vec_duplicate:V_VLSF
+		  (match_operand:<VEL> 3 "register_operand"  "  f,  f,  f,  f"))
+		(match_operand:V_VLSF 4 "register_operand"   "  0, vr,  0, vr")))
+	    (match_operand:V_VLSF 5 "register_operand"       " vr,  0, vr,  0"))
+	  (match_operand:V_VLSF 2 "vector_undef_operand")))]
+  "TARGET_VECTOR"
+  "@
+   vf<nmsub_nmadd>.vf\t%0,%3,%5%p1
+   vf<nmsac_nmacc>.vf\t%0,%3,%4%p1
+   vf<nmsub_nmadd>.vf\t%0,%3,%5%p1
+   vf<nmsac_nmacc>.vf\t%0,%3,%4%p1"
+  [(set_attr "type" "vfmuladd")
+   (set_attr "mode" "<MODE>")
+   (set (attr "frm_mode")
+	(symbol_ref "riscv_vector::get_frm_mode (operands[10])"))])
 
 (define_insn "*pred_<nmsub_nmadd><mode>_scalar"
   [(set (match_operand:V_VLSF 0 "register_operand"            "=vd, vr")
@@ -7196,10 +7434,10 @@
 	  (plus_minus:VWEXTF
 	    (mult:VWEXTF
 	      (float_extend:VWEXTF
-	        (vec_duplicate:<V_DOUBLE_TRUNC>
-	          (match_operand:<VSUBEL> 3 "register_operand"       "    f")))
-	      (float_extend:VWEXTF
-	        (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand" "   vr")))
+	        (match_operand:<V_DOUBLE_TRUNC> 4 "register_operand" "   vr"))
+	      (vec_duplicate:VWEXTF
+	        (float_extend:<VEL>
+	          (match_operand:<VSUBEL> 3 "register_operand"       "    f"))))
 	    (match_operand:VWEXTF 2 "register_operand"               "    0"))
 	  (match_dup 2)))]
   "TARGET_VECTOR"
@@ -8696,6 +8934,106 @@
   [(set_attr "type" "vssegt<order>x")
    (set_attr "mode" "<V32T:MODE>")])
 
+(define_insn "*pred_macc_<mode>_scalar_undef"
+  [(set (match_operand:V_VLSI_QHS       0 "register_operand"      "=vd,  vr")
+	(if_then_else:V_VLSI_QHS
+	  (unspec:<VM>
+	    [(match_operand:<VM>        1 "vector_mask_operand"   " vm, Wc1")
+	     (match_operand             6 "vector_length_operand" "rvl, rvl")
+	     (match_operand             7 "const_int_operand"     "  i,   i")
+	     (match_operand             8 "const_int_operand"     "  i,   i")
+	     (match_operand             9 "const_int_operand"     "  i,   i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (plus:V_VLSI_QHS
+	    (mult:V_VLSI_QHS
+	      (vec_duplicate:V_VLSI_QHS
+		(match_operand:<VEL>    3 "reg_or_0_operand"     "  rJ,  rJ"))
+	      (match_operand:V_VLSI_QHS 4 "register_operand"     "  vr,  vr"))
+	    (match_operand:V_VLSI_QHS   5 "register_operand"     "   0,   0"))
+	  (match_operand:V_VLSI_QHS     2 "vector_undef_operand")))]
+  "TARGET_VECTOR"
+  "@
+   vmacc.vx\t%0,%z3,%4%p1
+   vmacc.vx\t%0,%z3,%4%p1"
+  [(set_attr "type" "vimuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*pred_macc_<mode>_scalar_undef"
+  [(set (match_operand:V_VLSI_D       0 "register_operand"      "=vd,  vr")
+	(if_then_else:V_VLSI_D
+	  (unspec:<VM>
+	    [(match_operand:<VM>      1 "vector_mask_operand"   " vm, Wc1")
+	     (match_operand           6 "vector_length_operand" "rvl, rvl")
+	     (match_operand           7 "const_int_operand"     "  i,   i")
+	     (match_operand           8 "const_int_operand"     "  i,   i")
+	     (match_operand           9 "const_int_operand"     "  i,   i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (plus:V_VLSI_D
+	    (mult:V_VLSI_D
+	      (vec_duplicate:V_VLSI_D
+		(match_operand:<VEL>  3 "reg_or_0_operand"     "  rJ,  rJ"))
+	      (match_operand:V_VLSI_D 4 "register_operand"     "  vr,  vr"))
+	    (match_operand:V_VLSI_D   5 "register_operand"     "   0,   0"))
+	  (match_operand:V_VLSI_D     2 "vector_undef_operand")))]
+  "TARGET_VECTOR && TARGET_64BIT"
+  "@
+   vmacc.vx\t%0,%z3,%4%p1
+   vmacc.vx\t%0,%z3,%4%p1"
+  [(set_attr "type" "vimuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*pred_nmsac_<mode>_scalar_undef"
+  [(set (match_operand:V_VLSI_QHS       0 "register_operand"      "=vd,  vr")
+	(if_then_else:V_VLSI_QHS
+	  (unspec:<VM>
+	    [(match_operand:<VM>        1 "vector_mask_operand"   " vm, Wc1")
+	     (match_operand             6 "vector_length_operand" "rvl, rvl")
+	     (match_operand             7 "const_int_operand"     "  i,   i")
+	     (match_operand             8 "const_int_operand"     "  i,   i")
+	     (match_operand             9 "const_int_operand"     "  i,   i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (minus:V_VLSI_QHS
+	    (match_operand:V_VLSI_QHS   5 "register_operand"     "   0,   0")
+	    (mult:V_VLSI_QHS
+	      (vec_duplicate:V_VLSI_QHS
+		(match_operand:<VEL>    3 "reg_or_0_operand"     "  rJ,  rJ"))
+	      (match_operand:V_VLSI_QHS 4 "register_operand"     "  vr,  vr")))
+	  (match_operand:V_VLSI_QHS     2 "vector_undef_operand")))]
+  "TARGET_VECTOR"
+  "@
+   vnmsac.vx\t%0,%z3,%4%p1
+   vnmsac.vx\t%0,%z3,%4%p1"
+  [(set_attr "type" "vimuladd")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*pred_nmsac_<mode>_scalar_undef"
+  [(set (match_operand:V_VLSI_D       0 "register_operand"      "=vd,  vr")
+	(if_then_else:V_VLSI_D
+	  (unspec:<VM>
+	    [(match_operand:<VM>      1 "vector_mask_operand"   " vm, Wc1")
+	     (match_operand           6 "vector_length_operand" "rvl, rvl")
+	     (match_operand           7 "const_int_operand"     "  i,   i")
+	     (match_operand           8 "const_int_operand"     "  i,   i")
+	     (match_operand           9 "const_int_operand"     "  i,   i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (minus:V_VLSI_D
+	    (match_operand:V_VLSI_D   5 "register_operand"     "   0,   0")
+	    (mult:V_VLSI_D
+	      (vec_duplicate:V_VLSI_D
+		(match_operand:<VEL>  3 "reg_or_0_operand"     "  rJ,  rJ"))
+	      (match_operand:V_VLSI_D 4 "register_operand"     "  vr,  vr")))
+	  (match_operand:V_VLSI_D     2 "vector_undef_operand")))]
+  "TARGET_VECTOR && TARGET_64BIT"
+  "@
+   vnmsac.vx\t%0,%z3,%4%p1
+   vnmsac.vx\t%0,%z3,%4%p1"
+  [(set_attr "type" "vimuladd")
+   (set_attr "mode" "<MODE>")])
+
 (include "autovec.md")
 (include "autovec-opt.md")
 (include "sifive-vector.md")
diff --git a/gcc/config/riscv/xiangshan.md b/gcc/config/riscv/xiangshan.md
index 5ed6bac..6179140 100644
--- a/gcc/config/riscv/xiangshan.md
+++ b/gcc/config/riscv/xiangshan.md
@@ -107,7 +107,8 @@
 ;; they are just dummies like this one.
 (define_insn_reservation "xiangshan_alu_unknown" 1
   (and (eq_attr "tune" "xiangshan")
-       (eq_attr "type" "zicond,min,max,minu,maxu,clz,ctz,cpop,ghost,rotate,clmul,condmove,crypto,mvpair,rdvlenb,rdvl,wrvxrm,wrfrm,rdfrm,vsetvl,vsetvl_pre,vlde,vste,vldm,vstm,vlds,vsts,vldux,vldox,vstux,vstox,vldff,vldr,vstr,vlsegde,vssegte,vlsegds,vssegts,vlsegdux,vlsegdox,vssegtux,vssegtox,vlsegdff,vialu,viwalu,vext,vicalu,vshift,vnshift,vicmp,viminmax,vimul,vidiv,viwmul,vimuladd,sf_vqmacc,viwmuladd,vimerge,vimov,vsalu,vaalu,vsmul,vsshift,vnclip,sf_vfnrclip,vfalu,vfwalu,vfmul,vfdiv,vfwmul,vfmuladd,vfwmuladd,vfsqrt,vfrecp,vfcmp,vfminmax,vfsgnj,vfclass,vfmerge,vfmov,vfcvtitof,vfcvtftoi,vfwcvtitof,vfwcvtftoi,vfwcvtftof,vfncvtitof,vfncvtftoi,vfncvtftof,vired,viwred,vfredu,vfredo,vfwredu,vfwredo,vmalu,vmpop,vmffs,vmsfs,vmiota,vmidx,vimovvx,vimovxv,vfmovvf,vfmovfv,vslideup,vslidedown,vislide1up,vislide1down,vfslide1up,vfslide1down,vgather,vcompress,vmov,vector,vandn,vbrev,vbrev8,vrev8,vclz,vctz,vcpop,vrol,vror,vwsll,vclmul,vclmulh,vghsh,vgmul,vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,vaesz,vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,vfncvtbf16,vfwcvtbf16,vfwmaccbf16"))
+       (eq_attr "type" "zicond,min,max,minu,maxu,clz,ctz,cpop,ghost,rotate,clmul,condmove,crypto,mvpair,rdvlenb,rdvl,wrvxrm,wrfrm,rdfrm,vsetvl,vsetvl_pre,vlde,vste,vldm,vstm,vlds,vsts,vldux,vldox,vstux,vstox,vldff,vldr,vstr,vlsegde,vssegte,vlsegds,vssegts,vlsegdux,vlsegdox,vssegtux,vssegtox,vlsegdff,vialu,viwalu,vext,vicalu,vshift,vnshift,vicmp,viminmax,vimul,vidiv,viwmul,vimuladd,sf_vqmacc,viwmuladd,vimerge,vimov,vsalu,vaalu,vsmul,vsshift,vnclip,sf_vfnrclip,vfalu,vfwalu,vfmul,vfdiv,vfwmul,vfmuladd,vfwmuladd,vfsqrt,vfrecp,vfcmp,vfminmax,vfsgnj,vfclass,vfmerge,vfmov,vfcvtitof,vfcvtftoi,vfwcvtitof,vfwcvtftoi,vfwcvtftof,vfncvtitof,vfncvtftoi,vfncvtftof,vired,viwred,vfredu,vfredo,vfwredu,vfwredo,vmalu,vmpop,vmffs,vmsfs,vmiota,vmidx,vimovvx,vimovxv,vfmovvf,vfmovfv,vslideup,vslidedown,vislide1up,vislide1down,vfslide1up,vfslide1down,vgather,vcompress,vmov,vector,vandn,vbrev,vbrev8,vrev8,vclz,vctz,vcpop,vrol,vror,vwsll,vclmul,vclmulh,vghsh,vgmul,vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,vaesz,vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,vfncvtbf16,vfwcvtbf16,vfwmaccbf16,sf_vc,sf_vc_se"))
+
   "xs_alu_rs")
 
 ;; ----------------------------------------------------
@@ -143,13 +144,13 @@
 (define_insn_reservation "xiangshan_sfdiv" 11
   (and (eq_attr "tune" "xiangshan")
        (eq_attr "type" "fdiv")
-       (eq_attr "mode" "SF"))
+       (eq_attr "mode" "HF,SF"))
   "xs_fmisc_rs")
 
 (define_insn_reservation "xiangshan_sfsqrt" 17
   (and (eq_attr "tune" "xiangshan")
        (eq_attr "type" "fsqrt")
-       (eq_attr "mode" "SF"))
+       (eq_attr "mode" "HF,SF"))
   "xs_fmisc_rs")
 
 (define_insn_reservation "xiangshan_dfdiv" 21
diff --git a/gcc/config/riscv/zicond.md b/gcc/config/riscv/zicond.md
index f87b4f2..d170f6a 100644
--- a/gcc/config/riscv/zicond.md
+++ b/gcc/config/riscv/zicond.md
@@ -234,5 +234,39 @@
 				      (const_int 0)
 				      (match_dup 4)))])
 
+;; We can splat the sign bit across a GPR with a arithmetic right shift
+;; which gives us a 0, -1 result.  We then turn on bit #0 unconditionally
+;; which results in 1, -1.  There's probably other cases that could be
+;; handled, this seems particularly important though.
+(define_split
+  [(set (match_operand:X 0 "register_operand")
+	(plus:X (if_then_else:X (ge:X (match_operand:X 1 "register_operand")
+				      (const_int 0))
+				(match_operand 2 "const_int_operand")
+				(match_operand 3 "const_int_operand"))
+		(match_operand 4 "const_int_operand")))]
+  "((TARGET_ZICOND_LIKE || TARGET_XTHEADCONDMOV)
+    && INTVAL (operands[2]) + INTVAL (operands[4]) == 1
+    && INTVAL (operands[3]) + INTVAL (operands[4]) == -1)"
+  [(set (match_dup 0) (ashiftrt:X (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (ior:X (match_dup 0) (const_int 1)))]
+  { operands[2] = GEN_INT (GET_MODE_BITSIZE (word_mode) - 1); })
 
-
+;; Similarly, but the condition and true/false values are reversed
+;;
+;; Note the case where the condition is reversed, but not the true/false
+;; values.  Or vice-versa is not handled because we don't support 4->3
+;; splits.
+(define_split
+  [(set (match_operand:X 0 "register_operand")
+	(plus:X (if_then_else:X (lt:X (match_operand:X 1 "register_operand")
+				      (const_int 0))
+				(match_operand 2 "const_int_operand")
+				(match_operand 3 "const_int_operand"))
+		(match_operand 4 "const_int_operand")))]
+  "((TARGET_ZICOND_LIKE || TARGET_XTHEADCONDMOV)
+    && INTVAL (operands[2]) + INTVAL (operands[4]) == -1
+    && INTVAL (operands[3]) + INTVAL (operands[4]) == 1)"
+  [(set (match_dup 0) (ashiftrt:X (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (ior:X (match_dup 0) (const_int 1)))]
+  { operands[2] = GEN_INT (GET_MODE_BITSIZE (word_mode) - 1); })
diff --git a/gcc/config/rl78/rl78.cc b/gcc/config/rl78/rl78.cc
index 09753b6..28728aa 100644
--- a/gcc/config/rl78/rl78.cc
+++ b/gcc/config/rl78/rl78.cc
@@ -4953,8 +4953,7 @@ rl78_emit_libcall (const char *name, enum rtx_code code,
       gcc_unreachable ();
     }
 
-  insns = get_insns ();
-  end_sequence ();
+  insns = end_sequence ();
   emit_libcall_block (insns, operands[0], ret, equiv);
   return ret;
 }
diff --git a/gcc/config/rl78/rl78.opt.urls b/gcc/config/rl78/rl78.opt.urls
index 96eff5f..66e874b 100644
--- a/gcc/config/rl78/rl78.opt.urls
+++ b/gcc/config/rl78/rl78.opt.urls
@@ -4,7 +4,7 @@ msim
 UrlSuffix(gcc/RL78-Options.html#index-msim-6)
 
 mmul=
-UrlSuffix(gcc/RL78-Options.html#index-mmul)
+UrlSuffix(gcc/RL78-Options.html#index-mmul-1)
 
 mallregs
 UrlSuffix(gcc/RL78-Options.html#index-mallregs)
diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 50e577a..85f3a92 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -30,7 +30,6 @@
 
 (define_c_enum "unspec"
   [UNSPEC_VSX_ASSEMBLE
-   UNSPEC_MMA_EXTRACT
    UNSPEC_MMA_PMXVBF16GER2
    UNSPEC_MMA_PMXVBF16GER2NN
    UNSPEC_MMA_PMXVBF16GER2NP
@@ -398,29 +397,8 @@
    (match_operand 2 "const_0_to_1_operand")]
   "TARGET_MMA"
 {
-  rtx src;
-  int regoff = INTVAL (operands[2]);
-  src = gen_rtx_UNSPEC (V16QImode,
-			gen_rtvec (2, operands[1], GEN_INT (regoff)),
-			UNSPEC_MMA_EXTRACT);
-  emit_move_insn (operands[0], src);
-  DONE;
-})
-
-(define_insn_and_split "*vsx_disassemble_pair"
-  [(set (match_operand:V16QI 0 "mma_disassemble_output_operand" "=mwa")
-       (unspec:V16QI [(match_operand:OO 1 "vsx_register_operand" "wa")
-		      (match_operand 2 "const_0_to_1_operand")]
-		      UNSPEC_MMA_EXTRACT))]
-  "TARGET_MMA
-   && vsx_register_operand (operands[1], OOmode)"
-  "#"
-  "&& reload_completed"
-  [(const_int 0)]
-{
-  int reg = REGNO (operands[1]);
-  int regoff = INTVAL (operands[2]);
-  rtx src = gen_rtx_REG (V16QImode, reg + regoff);
+  int regoff = INTVAL (operands[2]) * 16;
+  rtx src = simplify_gen_subreg (V16QImode, operands[1], OOmode, regoff);
   emit_move_insn (operands[0], src);
   DONE;
 })
@@ -472,29 +450,8 @@
    (match_operand 2 "const_0_to_3_operand")]
   "TARGET_MMA"
 {
-  rtx src;
-  int regoff = INTVAL (operands[2]);
-  src = gen_rtx_UNSPEC (V16QImode,
-			gen_rtvec (2, operands[1], GEN_INT (regoff)),
-			UNSPEC_MMA_EXTRACT);
-  emit_move_insn (operands[0], src);
-  DONE;
-})
-
-(define_insn_and_split "*mma_disassemble_acc"
-  [(set (match_operand:V16QI 0 "mma_disassemble_output_operand" "=mwa")
-       (unspec:V16QI [(match_operand:XO 1 "fpr_reg_operand" "d")
-		      (match_operand 2 "const_0_to_3_operand")]
-		      UNSPEC_MMA_EXTRACT))]
-  "TARGET_MMA
-   && fpr_reg_operand (operands[1], XOmode)"
-  "#"
-  "&& reload_completed"
-  [(const_int 0)]
-{
-  int reg = REGNO (operands[1]);
-  int regoff = INTVAL (operands[2]);
-  rtx src = gen_rtx_REG (V16QImode, reg + regoff);
+  int regoff = INTVAL (operands[2]) * 16;
+  rtx src = simplify_gen_subreg (V16QImode, operands[1], XOmode, regoff);
   emit_move_insn (operands[0], src);
   DONE;
 })
diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc
index 1118023..bc1580f 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -915,7 +915,7 @@ fold_build_vec_cmp (tree_code code, tree type, tree arg0, tree arg1,
   tree cmp_type = truth_type_for (type);
   tree zero_vec = build_zero_cst (type);
   tree minus_one_vec = build_minus_one_cst (type);
-  tree temp = create_tmp_reg_or_ssa_name (cmp_type);
+  tree temp = make_ssa_name (cmp_type);
   gimple *g = gimple_build_assign (temp, code, arg0, arg1);
   gsi_insert_before (gsi, g, GSI_SAME_STMT);
   return fold_build3 (VEC_COND_EXPR, type, temp, minus_one_vec, zero_vec);
@@ -1106,7 +1106,7 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi,
       if (TREE_TYPE (src_ptr) != src_type)
 	src_ptr = build1 (NOP_EXPR, src_type, src_ptr);
 
-      tree src = create_tmp_reg_or_ssa_name (TREE_TYPE (src_type));
+      tree src = make_ssa_name (TREE_TYPE (src_type));
       gimplify_assign (src, build_simple_mem_ref (src_ptr), &new_seq);
 
       /* If we are not disassembling an accumulator/pair or our destination is
@@ -1130,7 +1130,7 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi,
 	{
 	  new_decl = rs6000_builtin_decls[RS6000_BIF_XXMFACC_INTERNAL];
 	  new_call = gimple_build_call (new_decl, 1, src);
-	  src = create_tmp_reg_or_ssa_name (vector_quad_type_node);
+	  src = make_ssa_name (vector_quad_type_node);
 	  gimple_call_set_lhs (new_call, src);
 	  gimple_seq_add_stmt (&new_seq, new_call);
 	}
@@ -1146,7 +1146,7 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi,
 	  unsigned index = WORDS_BIG_ENDIAN ? i : nvec - 1 - i;
 	  tree dst = build2 (MEM_REF, unsigned_V16QI_type_node, dst_base,
 			     build_int_cst (dst_type, index * 16));
-	  tree dstssa = create_tmp_reg_or_ssa_name (unsigned_V16QI_type_node);
+	  tree dstssa = make_ssa_name (unsigned_V16QI_type_node);
 	  new_call = gimple_build_call (new_decl, 2, src,
 					build_int_cstu (uint16_type_node, i));
 	  gimple_call_set_lhs (new_call, dstssa);
@@ -1204,7 +1204,7 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi,
     {
       /* This built-in has a pass-by-reference accumulator input, so load it
 	 into a temporary accumulator for use as a pass-by-value input.  */
-      op[0] = create_tmp_reg_or_ssa_name (vector_quad_type_node);
+      op[0] = make_ssa_name (vector_quad_type_node);
       for (unsigned i = 1; i < nopnds; i++)
 	op[i] = gimple_call_arg (stmt, i);
       gimplify_assign (op[0], build_simple_mem_ref (acc), &new_seq);
@@ -1252,9 +1252,9 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi,
     }
 
   if (fncode == RS6000_BIF_BUILD_PAIR || fncode == RS6000_BIF_ASSEMBLE_PAIR_V)
-    lhs = create_tmp_reg_or_ssa_name (vector_pair_type_node);
+    lhs = make_ssa_name (vector_pair_type_node);
   else
-    lhs = create_tmp_reg_or_ssa_name (vector_quad_type_node);
+    lhs = make_ssa_name (vector_quad_type_node);
   gimple_call_set_lhs (new_call, lhs);
   gimple_seq_add_stmt (&new_seq, new_call);
   gimplify_assign (build_simple_mem_ref (acc), lhs, &new_seq);
@@ -1450,7 +1450,7 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
       arg0 = gimple_call_arg (stmt, 0);
       arg1 = gimple_call_arg (stmt, 1);
       lhs = gimple_call_lhs (stmt);
-      temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
+      temp = make_ssa_name (TREE_TYPE (arg1));
       g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1);
       gimple_set_location (g, gimple_location (stmt));
       gsi_insert_before (gsi, g, GSI_SAME_STMT);
@@ -1472,7 +1472,7 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
       arg0 = gimple_call_arg (stmt, 0);
       arg1 = gimple_call_arg (stmt, 1);
       lhs = gimple_call_lhs (stmt);
-      temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
+      temp = make_ssa_name (TREE_TYPE (arg1));
       g = gimple_build_assign (temp, BIT_AND_EXPR, arg0, arg1);
       gimple_set_location (g, gimple_location (stmt));
       gsi_insert_before (gsi, g, GSI_SAME_STMT);
@@ -1512,7 +1512,7 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
       arg0 = gimple_call_arg (stmt, 0);
       arg1 = gimple_call_arg (stmt, 1);
       lhs = gimple_call_lhs (stmt);
-      temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
+      temp = make_ssa_name (TREE_TYPE (arg1));
       g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1);
       gimple_set_location (g, gimple_location (stmt));
       gsi_insert_before (gsi, g, GSI_SAME_STMT);
@@ -1552,7 +1552,7 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
       arg0 = gimple_call_arg (stmt, 0);
       arg1 = gimple_call_arg (stmt, 1);
       lhs = gimple_call_lhs (stmt);
-      temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
+      temp = make_ssa_name (TREE_TYPE (arg1));
       g = gimple_build_assign (temp, BIT_IOR_EXPR, arg0, arg1);
       gimple_set_location (g, gimple_location (stmt));
       gsi_insert_before (gsi, g, GSI_SAME_STMT);
@@ -1643,7 +1643,7 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
       arg0 = gimple_call_arg (stmt, 0);
       arg1 = gimple_call_arg (stmt, 1);
       lhs = gimple_call_lhs (stmt);
-      temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
+      temp = make_ssa_name (TREE_TYPE (arg1));
       g = gimple_build_assign (temp, BIT_XOR_EXPR, arg0, arg1);
       gimple_set_location (g, gimple_location (stmt));
       gsi_insert_before (gsi, g, GSI_SAME_STMT);
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 737c3d6..8dd23f8 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -55,7 +55,6 @@
 #include "output.h"
 #include "common/common-target.h"
 #include "langhooks.h"
-#include "reload.h"
 #include "sched-int.h"
 #include "gimplify.h"
 #include "gimple-iterator.h"
@@ -4952,10 +4951,19 @@ static bool
 rs6000_builtin_support_vector_misalignment (machine_mode mode,
 					    const_tree type,
 					    int misalignment,
-					    bool is_packed)
+					    bool is_packed,
+					    bool is_gather_scatter)
 {
   if (TARGET_VSX)
     {
+      if (is_gather_scatter)
+	{
+	  if (TARGET_ALTIVEC && is_packed)
+	    return false;
+	  else
+	    return true;
+	}
+
       if (TARGET_EFFICIENT_UNALIGNED_VSX)
 	return true;
 
@@ -5166,6 +5174,7 @@ public:
 
 protected:
   void update_target_cost_per_stmt (vect_cost_for_stmt, stmt_vec_info,
+				    slp_tree node,
 				    vect_cost_model_location, unsigned int);
   void density_test (loop_vec_info);
   void adjust_vect_cost_per_loop (loop_vec_info);
@@ -5313,6 +5322,7 @@ rs6000_adjust_vect_cost_per_stmt (enum vect_cost_for_stmt kind,
 void
 rs6000_cost_data::update_target_cost_per_stmt (vect_cost_for_stmt kind,
 					       stmt_vec_info stmt_info,
+					       slp_tree node,
 					       vect_cost_model_location where,
 					       unsigned int orig_count)
 {
@@ -5373,12 +5383,12 @@ rs6000_cost_data::update_target_cost_per_stmt (vect_cost_for_stmt kind,
 	 or may not need to apply.  When finalizing the cost of the loop,
 	 the extra penalty is applied when the load density heuristics
 	 are satisfied.  */
-      if (kind == vec_construct && stmt_info
-	  && STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
-	  && (STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
-	      || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_STRIDED_SLP))
+      if (kind == vec_construct && node
+	  && SLP_TREE_TYPE (node) == load_vec_info_type
+	  && (SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_ELEMENTWISE
+	      || SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_STRIDED_SLP))
 	{
-	  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+	  tree vectype = SLP_TREE_VECTYPE (node);
 	  unsigned int nunits = vect_nunits_for_cost (vectype);
 	  /* As PR103702 shows, it's possible that vectorizer wants to do
 	     costings for only one unit here, it's no need to do any
@@ -5407,7 +5417,7 @@ rs6000_cost_data::update_target_cost_per_stmt (vect_cost_for_stmt kind,
 
 unsigned
 rs6000_cost_data::add_stmt_cost (int count, vect_cost_for_stmt kind,
-				 stmt_vec_info stmt_info, slp_tree,
+				 stmt_vec_info stmt_info, slp_tree node,
 				 tree vectype, int misalign,
 				 vect_cost_model_location where)
 {
@@ -5425,7 +5435,7 @@ rs6000_cost_data::add_stmt_cost (int count, vect_cost_for_stmt kind,
       retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
       m_costs[where] += retval;
 
-      update_target_cost_per_stmt (kind, stmt_info, where, orig_count);
+      update_target_cost_per_stmt (kind, stmt_info, node, where, orig_count);
     }
 
   return retval;
@@ -9259,8 +9269,7 @@ rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
 
   start_sequence ();
   ret = rs6000_legitimize_address (x, oldx, mode);
-  insns = get_insns ();
-  end_sequence ();
+  insns = end_sequence ();
 
   if (ret != x)
     {
@@ -10311,15 +10320,18 @@ can_be_rotated_to_negative_lis (HOST_WIDE_INT c, int *rot)
 
   /* case b. xx0..01..1xx: some of 15 x's (and some of 16 0's) are
      rotated over the highest bit.  */
-  int pos_one = clz_hwi ((c << 16) >> 16);
-  middle_zeros = ctz_hwi (c >> (HOST_BITS_PER_WIDE_INT - pos_one));
-  int middle_ones = clz_hwi (~(c << pos_one));
-  if (middle_zeros >= 16 && middle_ones >= 33)
+  unsigned HOST_WIDE_INT uc = c;
+  int pos_one = clz_hwi ((HOST_WIDE_INT) (uc << 16) >> 16);
+  if (pos_one > 0 && pos_one < HOST_BITS_PER_WIDE_INT)
     {
-      *rot = pos_one;
-      return true;
+      middle_zeros = ctz_hwi (c >> (HOST_BITS_PER_WIDE_INT - pos_one));
+      int middle_ones = clz_hwi (~(uc << pos_one));
+      if (middle_zeros >= 16 && middle_ones >= 33)
+	{
+	  *rot = pos_one;
+	  return true;
+	}
     }
-
   return false;
 }
 
@@ -10436,7 +10448,8 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask)
   if (lz >= HOST_BITS_PER_WIDE_INT)
     return false;
 
-  int middle_ones = clz_hwi (~(c << lz));
+  unsigned HOST_WIDE_INT uc = c;
+  int middle_ones = clz_hwi (~(uc << lz));
   if (tz + lz + middle_ones >= ones
       && (tz - lz) < HOST_BITS_PER_WIDE_INT
       && tz < HOST_BITS_PER_WIDE_INT)
@@ -10470,7 +10483,7 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask)
   if (!IN_RANGE (pos_first_1, 1, HOST_BITS_PER_WIDE_INT-1))
     return false;
 
-  middle_ones = clz_hwi (~c << pos_first_1);
+  middle_ones = clz_hwi ((~(unsigned HOST_WIDE_INT) c) << pos_first_1);
   middle_zeros = ctz_hwi (c >> (HOST_BITS_PER_WIDE_INT - pos_first_1));
   if (pos_first_1 < HOST_BITS_PER_WIDE_INT
       && middle_ones + middle_zeros < HOST_BITS_PER_WIDE_INT
@@ -10572,7 +10585,8 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns)
     {
       /* li/lis; rldicX */
       unsigned HOST_WIDE_INT imm = (c | ~mask);
-      imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift));
+      if (shift > 0 && shift < HOST_BITS_PER_WIDE_INT)
+	imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift));
 
       count_or_emit_insn (temp, GEN_INT (imm));
       if (shift != 0)
@@ -25314,7 +25328,6 @@ rs6000_get_function_versions_dispatcher (void *decl)
   struct cgraph_node *node = NULL;
   struct cgraph_node *default_node = NULL;
   struct cgraph_function_version_info *node_v = NULL;
-  struct cgraph_function_version_info *first_v = NULL;
 
   tree dispatch_decl = NULL;
 
@@ -25334,38 +25347,16 @@ rs6000_get_function_versions_dispatcher (void *decl)
   if (node_v->dispatcher_resolver != NULL)
     return node_v->dispatcher_resolver;
 
-  /* Find the default version and make it the first node.  */
-  first_v = node_v;
-  /* Go to the beginning of the chain.  */
-  while (first_v->prev != NULL)
-    first_v = first_v->prev;
-
-  default_version_info = first_v;
-  while (default_version_info != NULL)
-    {
-      const tree decl2 = default_version_info->this_node->decl;
-      if (is_function_default_version (decl2))
-        break;
-      default_version_info = default_version_info->next;
-    }
+  /* The default node is always the beginning of the chain.  */
+  default_version_info = node_v;
+  while (default_version_info->prev)
+    default_version_info = default_version_info->prev;
+  default_node = default_version_info->this_node;
 
   /* If there is no default node, just return NULL.  */
-  if (default_version_info == NULL)
+  if (!is_function_default_version (default_node->decl))
     return NULL;
 
-  /* Make default info the first node.  */
-  if (first_v != default_version_info)
-    {
-      default_version_info->prev->next = default_version_info->next;
-      if (default_version_info->next)
-        default_version_info->next->prev = default_version_info->prev;
-      first_v->prev = default_version_info;
-      default_version_info->next = first_v;
-      default_version_info->prev = NULL;
-    }
-
-  default_node = default_version_info->this_node;
-
 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
   error_at (DECL_SOURCE_LOCATION (default_node->decl),
 	    "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
@@ -25765,10 +25756,13 @@ rs6000_can_inline_p (tree caller, tree callee)
 	}
     }
 
-  /* Ignore -mpower8-fusion and -mpower10-fusion options for inlining
-     purposes.  */
-  callee_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION);
-  explicit_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION);
+  /* Ignore -mpower8-fusion, -mpower10-fusion and -msave-toc-indirect options
+     for inlining purposes.  */
+  HOST_WIDE_INT ignored_isas = (OPTION_MASK_P8_FUSION
+				| OPTION_MASK_P10_FUSION
+				| OPTION_MASK_SAVE_TOC_INDIRECT);
+  callee_isa &= ~ignored_isas;
+  explicit_isa &= ~ignored_isas;
 
   /* The callee's options must be a subset of the caller's options, i.e.
      a vsx function may inline an altivec function, but a no-vsx function
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 9c718ca..04a6c0f 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -1969,7 +1969,7 @@
   [(set (match_dup 0) (plus:GPR (match_dup 1) (match_dup 3)))
    (set (match_dup 0) (plus:GPR (match_dup 0) (match_dup 4)))]
 {
-  HOST_WIDE_INT val = INTVAL (operands[2]);
+  unsigned HOST_WIDE_INT val = UINTVAL (operands[2]);
   HOST_WIDE_INT low = sext_hwi (val, 16);
   HOST_WIDE_INT rest = trunc_int_for_mode (val - low, <MODE>mode);
 
@@ -15665,10 +15665,10 @@
 	(if_then_else:SI (lt (match_dup 3)
 			     (const_int 0))
 			 (const_int -1)
-			 (if_then_else (gt (match_dup 3)
-					   (const_int 0))
-				       (const_int 1)
-				       (const_int 0))))]
+			 (if_then_else:SI (gt (match_dup 3)
+					      (const_int 0))
+					  (const_int 1)
+					  (const_int 0))))]
   "TARGET_P9_MISC"
 {
   operands[3] = gen_reg_rtx (CCmode);
@@ -15703,10 +15703,10 @@
 	 (if_then_else:SI (lt (match_operand:CC 1 "cc_reg_operand" "y")
 			      (const_int 0))
 			  (const_int -1)
-			  (if_then_else (gt (match_dup 1)
-					    (const_int 0))
-					(const_int 1)
-					(const_int 0))))]
+			  (if_then_else:SI (gt (match_dup 1)
+					       (const_int 0))
+					   (const_int 1)
+					   (const_int 0))))]
   "TARGET_P9_MISC"
   "setb %0,%1"
   [(set_attr "type" "logical")])
@@ -15716,10 +15716,10 @@
 	 (if_then_else:SI (ltu (match_operand:CCUNS 1 "cc_reg_operand" "y")
 			      (const_int 0))
 			  (const_int -1)
-			  (if_then_else (gtu (match_dup 1)
-					    (const_int 0))
-					(const_int 1)
-					(const_int 0))))]
+			  (if_then_else:SI (gtu (match_dup 1)
+						(const_int 0))
+					   (const_int 1)
+					   (const_int 0))))]
   "TARGET_P9_MISC"
   "setb %0,%1"
   [(set_attr "type" "logical")])
@@ -15751,10 +15751,10 @@
 	(if_then_else:SI (lt (match_dup 3)
 			     (const_int 0))
 			 (const_int -1)
-			 (if_then_else (gt (match_dup 3)
-					   (const_int 0))
-				       (const_int 1)
-				       (const_int 0))))]
+			 (if_then_else:SI (gt (match_dup 3)
+					      (const_int 0))
+					  (const_int 1)
+					  (const_int 0))))]
   "TARGET_P9_MISC"
 {
   operands[3] = gen_reg_rtx (CCmode);
@@ -15807,10 +15807,10 @@
 	(if_then_else:SI (lt (match_dup 3)
 			     (const_int 0))
 			 (const_int -1)
-			 (if_then_else (gt (match_dup 3)
-					   (const_int 0))
-				       (const_int 1)
-				       (const_int 0))))]
+			 (if_then_else:SI (gt (match_dup 3)
+					      (const_int 0))
+					  (const_int 1)
+					  (const_int 0))))]
   "TARGET_P9_MISC && TARGET_64BIT"
 {
   operands[3] = gen_reg_rtx (CCmode);
diff --git a/gcc/config/rs6000/vxworks.h b/gcc/config/rs6000/vxworks.h
index fa2c837b..e77247b 100644
--- a/gcc/config/rs6000/vxworks.h
+++ b/gcc/config/rs6000/vxworks.h
@@ -34,6 +34,21 @@ along with GCC; see the file COPYING3.  If not see
 /* Common definitions first.                                   */
 /*-------------------------------------------------------------*/
 
+/* Default to 64 bits when the target is powerpc64*-wrs-vxworks*,
+   and to 32 bits otherwise.  */
+#undef SUBTARGET_DRIVER_SELF_SPECS
+#if TARGET_VXWORKS64
+#define SUBTARGET_DRIVER_SELF_SPECS "%{!m64:%{!m32:-m64}}"
+#else
+#define SUBTARGET_DRIVER_SELF_SPECS "%{!m32:%{!m64:-m32}}"
+#endif
+
+/* Having used the build-time TARGET_VXWORKS64 to choose the default ABI above,
+   redefine it so that it matches whichever ABI is selected for each
+   compilation.  */
+#undef TARGET_VXWORKS64
+#define TARGET_VXWORKS64 TARGET_64BIT
+
 /* CPP predefined macros.  */
 
 #undef TARGET_OS_CPP_BUILTINS
diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index e8c7f83..6becad1 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -114,6 +114,7 @@ extern bool s390_expand_cmpmem (rtx, rtx, rtx, rtx);
 extern void s390_expand_vec_strlen (rtx, rtx, rtx);
 extern void s390_expand_vec_movstr (rtx, rtx, rtx);
 extern bool s390_expand_addcc (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
+extern void s390_expand_cstoreti4 (rtx, rtx, rtx, rtx);
 extern bool s390_expand_insv (rtx, rtx, rtx, rtx);
 extern void s390_expand_cs (machine_mode, rtx, rtx, rtx, rtx, rtx, bool);
 extern void s390_expand_atomic_exchange_tdsi (rtx, rtx, rtx);
@@ -127,6 +128,8 @@ extern void s390_expand_vcond (rtx, rtx, rtx, enum rtx_code, rtx, rtx);
 extern void s390_expand_vec_init (rtx, rtx);
 extern rtx s390_expand_merge_perm_const (machine_mode, bool);
 extern void s390_expand_merge (rtx, rtx, rtx, bool);
+extern void s390_expand_int_spaceship (rtx, rtx, rtx, rtx);
+extern void s390_expand_fp_spaceship (rtx, rtx, rtx, rtx);
 extern rtx s390_build_signbit_mask (machine_mode);
 extern rtx s390_return_addr_rtx (int, rtx);
 extern rtx s390_back_chain_rtx (void);
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index d82b16e..1a47f47 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -3862,7 +3862,21 @@ s390_register_move_cost (machine_mode mode,
 {
   /* On s390, copy between fprs and gprs is expensive.  */
 
-  /* It becomes somewhat faster having ldgr/lgdr.  */
+  /* With vector extensions any GPR<->VR load up to 8 bytes is supported.  */
+  if (TARGET_VX && GET_MODE_SIZE (mode) <= 8)
+    {
+      /* ldgr/vlvgg take one cycle and vlvg[bhf] take two cycles. */
+      if (reg_classes_intersect_p (from, GENERAL_REGS)
+	  && reg_classes_intersect_p (to, VEC_REGS))
+	return GET_MODE_SIZE (mode) == 8 ? 1 : 2;
+      /* lgdr/vlgv[fg] take three cycles and vlgv[bh] take five cycles. */
+      if (reg_classes_intersect_p (to, GENERAL_REGS)
+	  && reg_classes_intersect_p (from, VEC_REGS))
+	return GET_MODE_SIZE (mode) >= 4 ? 3 : 4;
+    }
+
+  /* Without vector extensions it still becomes somewhat faster having
+     ldgr/lgdr.  */
   if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8)
     {
       /* ldgr is single cycle. */
@@ -5589,8 +5603,7 @@ legitimize_tls_address (rtx addr, rtx reg)
 	new_rtx = force_const_mem (Pmode, new_rtx);
 	emit_move_insn (r2, new_rtx);
 	s390_emit_tls_call_insn (r2, tls_call);
-	insn = get_insns ();
-	end_sequence ();
+	insn = end_sequence ();
 
 	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
 	temp = gen_reg_rtx (Pmode);
@@ -5612,8 +5625,7 @@ legitimize_tls_address (rtx addr, rtx reg)
 	new_rtx = force_const_mem (Pmode, new_rtx);
 	emit_move_insn (r2, new_rtx);
 	s390_emit_tls_call_insn (r2, tls_call);
-	insn = get_insns ();
-	end_sequence ();
+	insn = end_sequence ();
 
 	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
 	temp = gen_reg_rtx (Pmode);
@@ -7210,6 +7222,82 @@ s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
 			      NULL_RTX, 1, OPTAB_DIRECT);
 }
 
+/* Expand optab cstoreti4.  */
+
+void
+s390_expand_cstoreti4 (rtx dst, rtx cmp, rtx op1, rtx op2)
+{
+  rtx_code code = GET_CODE (cmp);
+
+  if (TARGET_VXE3)
+    {
+      rtx cond = s390_emit_compare (GET_MODE (cmp), code, op1, op2);
+      emit_insn (gen_movsicc (dst, cond, const1_rtx, const0_rtx));
+      return;
+    }
+
+  /* Prior VXE3 emulate the comparison.  For an (in)equality test exploit
+     VECTOR COMPARE EQUAL.  For a relational test, first compare the high part
+     via VECTOR ELEMENT COMPARE (LOGICAL).  If the high part does not equal,
+     then consume the CC immediatelly by a subsequent LOAD ON CONDITION.
+     Otherweise, if the high part equals, then perform a subsequent VECTOR
+     COMPARE HIGH LOGICAL followed by a LOAD ON CONDITION.  */
+
+  op1 = force_reg (V2DImode, simplify_gen_subreg (V2DImode, op1, TImode, 0));
+  op2 = force_reg (V2DImode, simplify_gen_subreg (V2DImode, op2, TImode, 0));
+
+  if (code == EQ || code == NE)
+    {
+      s390_expand_vec_compare_cc (dst, code, op1, op2, code == EQ);
+      return;
+    }
+
+  /* Normalize code into either GE(U) or GT(U).  */
+  if (code == LT || code == LE || code == LTU || code == LEU)
+    {
+      std::swap (op1, op2);
+      code = swap_condition (code);
+    }
+
+  /* For (un)signed comparisons
+     - high(op1) >= high(op2) instruction VECG op1, op2 sets CC1
+       if the relation does _not_ hold.
+     - high(op1) >  high(op2) instruction VECG op2, op1 sets CC1
+       if the relation holds.  */
+  if (code == GT || code == GTU)
+    std::swap (op1, op2);
+  machine_mode cc_mode = (code == GEU || code == GTU) ? CCUmode : CCSmode;
+  rtx lane0 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
+  emit_insn (
+    gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
+		 gen_rtx_COMPARE (cc_mode,
+				  gen_rtx_VEC_SELECT (DImode, op1, lane0),
+				  gen_rtx_VEC_SELECT (DImode, op2, lane0))));
+  rtx ccs_reg = gen_rtx_REG (CCSmode, CC_REGNUM);
+  rtx lab = gen_label_rtx ();
+  s390_emit_jump (lab, gen_rtx_NE (VOIDmode, ccs_reg, const0_rtx));
+  /* At this point we have that high(op1) == high(op2).  Thus, test the low
+     part, now.  For unsigned comparisons
+     - low(op1) >= low(op2) instruction VCHLGS op2, op1 sets CC1
+       if the relation does _not_ hold.
+     - low(op1) >  low(op2) instruction VCHLGS op1, op2 sets CC1
+       if the relation holds.  */
+  std::swap (op1, op2);
+  emit_insn (gen_rtx_PARALLEL (
+    VOIDmode,
+    gen_rtvec (2,
+	       gen_rtx_SET (gen_rtx_REG (CCVIHUmode, CC_REGNUM),
+			    gen_rtx_COMPARE (CCVIHUmode, op1, op2)),
+	       gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode)))));
+  emit_label (lab);
+  /* For (un)signed comparison >= any CC except CC1 means that the relation
+     holds.  For (un)signed comparison > only CC1 means that the relation
+     holds.  */
+  rtx_code cmp_code = (code == GE || code == GEU) ? UNGE : LT;
+  rtx cond = gen_rtx_fmt_ee (cmp_code, CCSmode, ccs_reg, const0_rtx);
+  emit_insn (gen_movsicc (dst, cond, const1_rtx, const0_rtx));
+}
+
 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
    the result in TARGET.  */
 
@@ -7310,9 +7398,9 @@ s390_expand_vec_compare (rtx target, enum rtx_code cond,
 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
    TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
    elements in CMP1 and CMP2 fulfill the comparison.
-   This function is only used to emit patterns for the vx builtins and
-   therefore only handles comparison codes required by the
-   builtins.  */
+   This function is only used in s390_expand_cstoreti4 and to emit patterns for
+   the vx builtins and therefore only handles comparison codes required by
+   those.  */
 void
 s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
 			    rtx cmp1, rtx cmp2, bool all_p)
@@ -7793,8 +7881,7 @@ s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
 			    const0_rtx, ins))
 	{
 	  *seq1 = NULL;
-	  *seq2 = get_insns ();
-	  end_sequence ();
+	  *seq2 = end_sequence ();
 	  return tmp;
 	}
       end_sequence ();
@@ -7803,13 +7890,11 @@ s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
   /* Failed to use insv.  Generate a two part shift and mask.  */
   start_sequence ();
   tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
-  *seq1 = get_insns ();
-  end_sequence ();
+  *seq1 = end_sequence ();
 
   start_sequence ();
   tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
-  *seq2 = get_insns ();
-  end_sequence ();
+  *seq2 = end_sequence ();
 
   return tmp;
 }
@@ -8128,6 +8213,167 @@ s390_expand_atomic (machine_mode mode, enum rtx_code code,
 					       NULL_RTX, 1, OPTAB_DIRECT), 1);
 }
 
+/* Expand integer op0 = op1 <=> op2, i.e.,
+   op0 = op1 == op2 ? 0 : op1 < op2 ? -1 : 1.
+
+   Signedness is specified by op3.  If op3 equals 1, then perform an unsigned
+   comparison, and if op3 equals -1, then perform a signed comparison.
+
+   For integer comparisons we strive for a sequence like
+   CR[L] ; LHI ; LOCHIL ; LOCHIH
+   where the first three instructions fit into a group.  */
+
+void
+s390_expand_int_spaceship (rtx op0, rtx op1, rtx op2, rtx op3)
+{
+  gcc_assert (op3 == const1_rtx || op3 == constm1_rtx);
+
+  rtx cc, cond_lt, cond_gt;
+  machine_mode cc_mode;
+  machine_mode mode = GET_MODE (op1);
+
+  /* Prior VXE3 emulate a 128-bit comparison by breaking it up into three
+     comparisons.  First test the high halfs.  In case they equal, then test
+     the low halfs.  Finally, test for equality.  Depending on the results
+     make use of LOCs.  */
+  if (mode == TImode && !TARGET_VXE3)
+    {
+      gcc_assert (TARGET_VX);
+      op1
+	= force_reg (V2DImode, simplify_gen_subreg (V2DImode, op1, TImode, 0));
+      op2
+	= force_reg (V2DImode, simplify_gen_subreg (V2DImode, op2, TImode, 0));
+      rtx lab = gen_label_rtx ();
+      rtx ccz = gen_rtx_REG (CCZmode, CC_REGNUM);
+      /* Compare high halfs for equality.
+	 VEC[L]G op1, op2 sets
+	   CC1 if high(op1) < high(op2)
+	 and
+	   CC2 if high(op1) > high(op2).  */
+      machine_mode cc_mode = op3 == const1_rtx ? CCUmode : CCSmode;
+      rtx lane0 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
+      emit_insn (gen_rtx_SET (
+	gen_rtx_REG (cc_mode, CC_REGNUM),
+	gen_rtx_COMPARE (cc_mode,
+			 gen_rtx_VEC_SELECT (DImode, op1, lane0),
+			 gen_rtx_VEC_SELECT (DImode, op2, lane0))));
+      s390_emit_jump (lab, gen_rtx_NE (CCZmode, ccz, const0_rtx));
+      /* At this point we know that the high halfs equal.
+	 VCHLGS op2, op1 sets CC1 if low(op1) < low(op2)  */
+      emit_insn (gen_rtx_PARALLEL (
+	VOIDmode,
+	gen_rtvec (2,
+		   gen_rtx_SET (gen_rtx_REG (CCVIHUmode, CC_REGNUM),
+				gen_rtx_COMPARE (CCVIHUmode, op2, op1)),
+		   gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode)))));
+      emit_label (lab);
+      emit_insn (gen_rtx_SET (op0, const1_rtx));
+      emit_insn (
+	gen_movsicc (op0,
+		     gen_rtx_LTU (CCUmode, gen_rtx_REG (CCUmode, CC_REGNUM),
+				  const0_rtx),
+		     constm1_rtx, op0));
+      /* Deal with the case where both halfs equal.  */
+      emit_insn (gen_rtx_PARALLEL (
+	VOIDmode,
+	gen_rtvec (2,
+		   gen_rtx_SET (gen_rtx_REG (CCVEQmode, CC_REGNUM),
+				gen_rtx_COMPARE (CCVEQmode, op1, op2)),
+		   gen_rtx_SET (gen_reg_rtx (V2DImode),
+				gen_rtx_EQ (V2DImode, op1, op2)))));
+      emit_insn (gen_movsicc (op0, gen_rtx_EQ (CCZmode, ccz, const0_rtx),
+			      const0_rtx, op0));
+      return;
+    }
+
+  if (mode == QImode || mode == HImode)
+    {
+      rtx_code extend = op3 == const1_rtx ? ZERO_EXTEND : SIGN_EXTEND;
+      op1 = simplify_gen_unary (extend, SImode, op1, mode);
+      op1 = force_reg (SImode, op1);
+      op2 = simplify_gen_unary (extend, SImode, op2, mode);
+      op2 = force_reg (SImode, op2);
+      mode = SImode;
+    }
+
+  if (op3 == const1_rtx)
+    {
+      cc_mode = CCUmode;
+      cc = gen_rtx_REG (cc_mode, CC_REGNUM);
+      cond_lt = gen_rtx_LTU (mode, cc, const0_rtx);
+      cond_gt = gen_rtx_GTU (mode, cc, const0_rtx);
+    }
+  else
+    {
+      cc_mode = CCSmode;
+      cc = gen_rtx_REG (cc_mode, CC_REGNUM);
+      cond_lt = gen_rtx_LT (mode, cc, const0_rtx);
+      cond_gt = gen_rtx_GT (mode, cc, const0_rtx);
+    }
+
+  emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (cc_mode, op1, op2)));
+  emit_move_insn (op0, const0_rtx);
+  emit_insn (gen_movsicc (op0, cond_lt, constm1_rtx, op0));
+  emit_insn (gen_movsicc (op0, cond_gt, const1_rtx, op0));
+}
+
+/* Expand floating-point op0 = op1 <=> op2, i.e.,
+   op0 = op1 == op2 ? 0 : op1 < op2 ? -1 : op1 > op2 ? 1 : -128.
+
+   If op3 equals const0_rtx, then we are interested in the compare only (see
+   test spaceship-fp-4.c).  Otherwise, op3 is a CONST_INT different than
+   const1_rtx and constm1_rtx which is used in order to set op0 for unordered.
+
+   Emit a branch-only solution, i.e., let if-convert fold the branches into
+   LOCs if applicable.  This has the benefit that the solution is also
+   applicable if we are only interested in the compare, i.e., if op3 equals
+   const0_rtx.
+ */
+
+void
+s390_expand_fp_spaceship (rtx op0, rtx op1, rtx op2, rtx op3)
+{
+  gcc_assert (op3 != const1_rtx && op3 != constm1_rtx);
+
+  machine_mode mode = GET_MODE (op1);
+  machine_mode cc_mode = s390_select_ccmode (LTGT, op1, op2);
+  rtx cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
+  rtx cond_unordered = gen_rtx_UNORDERED (mode, cc_reg, const0_rtx);
+  rtx cond_eq = gen_rtx_EQ (mode, cc_reg, const0_rtx);
+  rtx cond_gt = gen_rtx_GT (mode, cc_reg, const0_rtx);
+  rtx_insn *insn;
+  rtx l_unordered = gen_label_rtx ();
+  rtx l_eq = gen_label_rtx ();
+  rtx l_gt = gen_label_rtx ();
+  rtx l_end = gen_label_rtx ();
+
+  s390_emit_compare (VOIDmode, LTGT, op1, op2);
+  if (!flag_finite_math_only)
+    {
+      insn = s390_emit_jump (l_unordered, cond_unordered);
+      add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
+    }
+  insn = s390_emit_jump (l_eq, cond_eq);
+  add_reg_br_prob_note (insn, profile_probability::unlikely ());
+  insn = s390_emit_jump (l_gt, cond_gt);
+  add_reg_br_prob_note (insn, profile_probability::even ());
+  emit_move_insn (op0, constm1_rtx);
+  emit_jump (l_end);
+  emit_label (l_eq);
+  emit_move_insn (op0, const0_rtx);
+  emit_jump (l_end);
+  emit_label (l_gt);
+  emit_move_insn (op0, const1_rtx);
+  if (!flag_finite_math_only)
+    {
+      emit_jump (l_end);
+      emit_label (l_unordered);
+      rtx unord_val = op3 == const0_rtx ? GEN_INT (-128) : op3;
+      emit_move_insn (op0, unord_val);
+    }
+  emit_label (l_end);
+}
+
 /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
    We need to emit DTP-relative relocations.  */
 
@@ -8993,15 +9239,12 @@ print_operand (FILE *file, rtx x, int code)
       else if (code == 'h')
 	fprintf (file, HOST_WIDE_INT_PRINT_DEC,
 		 ((CONST_WIDE_INT_ELT (x, 0) & 0xffff) ^ 0x8000) - 0x8000);
+      /* Support arbitrary _BitInt constants in asm statements.  */
+      else if (code == 0)
+	output_addr_const (file, x);
       else
-	{
-	  if (code == 0)
-	    output_operand_lossage ("invalid constant - try using "
-				    "an output modifier");
-	  else
-	    output_operand_lossage ("invalid constant for output modifier '%c'",
-				    code);
-	}
+	output_operand_lossage ("invalid constant for output modifier '%c'",
+				code);
       break;
     case CONST_VECTOR:
       switch (code)
@@ -11735,8 +11978,7 @@ s390_load_got (void)
 
   emit_move_insn (got_rtx, s390_got_symbol ());
 
-  insns = get_insns ();
-  end_sequence ();
+  insns = end_sequence ();
   return insns;
 }
 
@@ -13503,8 +13745,7 @@ s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
 
       start_sequence ();
       emit_move_insn (reg, gen_rtx_REG (Pmode, 1));
-      seq = get_insns ();
-      end_sequence ();
+      seq = end_sequence ();
 
       push_topmost_sequence ();
       emit_insn_after (seq, entry_of_function ());
@@ -14496,7 +14737,21 @@ s390_call_saved_register_used (tree call_expr)
 
 	  for (reg = 0; reg < nregs; reg++)
 	    if (!call_used_or_fixed_reg_p (reg + REGNO (parm_rtx)))
-	      return true;
+	      {
+		rtx parm;
+		/* Allow passing through unmodified value from caller,
+		   see PR119873.  */
+		if (TREE_CODE (parameter) == SSA_NAME
+		    && SSA_NAME_IS_DEFAULT_DEF (parameter)
+		    && SSA_NAME_VAR (parameter)
+		    && TREE_CODE (SSA_NAME_VAR (parameter)) == PARM_DECL
+		    && (parm = DECL_INCOMING_RTL (SSA_NAME_VAR (parameter)))
+		    && REG_P (parm)
+		    && REGNO (parm) == REGNO (parm_rtx)
+		    && REG_NREGS (parm) == REG_NREGS (parm_rtx))
+		  break;
+		return true;
+	      }
 	}
       else if (GET_CODE (parm_rtx) == PARALLEL)
 	{
@@ -14510,7 +14765,17 @@ s390_call_saved_register_used (tree call_expr)
 	      gcc_assert (REG_NREGS (r) == 1);
 
 	      if (!call_used_or_fixed_reg_p (REGNO (r)))
-		return true;
+		{
+		  rtx parm;
+		  if (TREE_CODE (parameter) == SSA_NAME
+		      && SSA_NAME_IS_DEFAULT_DEF (parameter)
+		      && SSA_NAME_VAR (parameter)
+		      && TREE_CODE (SSA_NAME_VAR (parameter)) == PARM_DECL
+		      && (parm = DECL_INCOMING_RTL (SSA_NAME_VAR (parameter)))
+		      && rtx_equal_p (parm_rtx, parm))
+		    break;
+		  return true;
+		}
 	    }
 	}
     }
@@ -14543,8 +14808,9 @@ s390_function_ok_for_sibcall (tree decl, tree exp)
     return false;
 
   /* Register 6 on s390 is available as an argument register but unfortunately
-     "caller saved". This makes functions needing this register for arguments
-     not suitable for sibcalls.  */
+     "caller saved".  This makes functions needing this register for arguments
+     not suitable for sibcalls, unless the same value is passed from the
+     caller.  */
   return !s390_call_saved_register_used (exp);
 }
 
@@ -16472,9 +16738,6 @@ s390_option_override_internal (struct gcc_options *opts,
   else
     SET_OPTION_IF_UNSET (opts, opts_set, param_vect_partial_vector_usage, 0);
 
-  /* Do not vectorize loops with a low trip count for now.  */
-  SET_OPTION_IF_UNSET (opts, opts_set, param_min_vect_loop_bound, 2);
-
   /* Set the default alignment.  */
   s390_default_align (opts);
 
@@ -16769,8 +17032,9 @@ s390_valid_target_attribute_inner_p (tree args,
 	      generate_option (opt, NULL, value, CL_TARGET, &decoded);
 	      s390_handle_option (opts, new_opts_set, &decoded, input_location);
 	      set_option (opts, new_opts_set, opt, value,
-			  p + opt_len, DK_UNSPECIFIED, input_location,
-			  global_dc);
+			  p + opt_len,
+			  static_cast<int> (diagnostics::kind::unspecified),
+			  input_location, global_dc);
 	    }
 	  else
 	    {
@@ -16787,8 +17051,9 @@ s390_valid_target_attribute_inner_p (tree args,
 	  arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
 	  if (arg_ok)
 	    set_option (opts, new_opts_set, opt, value,
-			p + opt_len, DK_UNSPECIFIED, input_location,
-			global_dc);
+			p + opt_len,
+			static_cast<int> (diagnostics::kind::unspecified),
+			input_location, global_dc);
 	  else
 	    {
 	      error ("attribute %<target%> argument %qs is unknown", orig_p);
@@ -17240,13 +17505,15 @@ static bool
 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED,
 				  const_tree type ATTRIBUTE_UNUSED,
 				  int misalignment ATTRIBUTE_UNUSED,
-				  bool is_packed ATTRIBUTE_UNUSED)
+				  bool is_packed ATTRIBUTE_UNUSED,
+				  bool is_gather_scatter ATTRIBUTE_UNUSED)
 {
   if (TARGET_VX)
     return true;
 
   return default_builtin_support_vector_misalignment (mode, type, misalignment,
-						      is_packed);
+						      is_packed,
+						      is_gather_scatter);
 }
 
 /* The vector ABI requires vector types to be aligned on an 8 byte
@@ -17738,9 +18005,11 @@ f_constraint_p (const char *constraint)
   for (size_t i = 0, c_len = strlen (constraint); i < c_len;
        i += CONSTRAINT_LEN (constraint[i], constraint + i))
     {
-      if (constraint[i] == 'f')
+      if (constraint[i] == 'f'
+	  || (constraint[i] == '{' && constraint[i + 1] == 'f'))
 	seen_f_p = true;
-      if (constraint[i] == 'v')
+      if (constraint[i] == 'v'
+	  || (constraint[i] == '{' && constraint[i + 1] == 'v'))
 	seen_v_p = true;
     }
 
@@ -17830,7 +18099,8 @@ s390_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs,
 	continue;
       bool allows_mem, allows_reg, is_inout;
       bool ok = parse_output_constraint (&constraint, i, ninputs, noutputs,
-					 &allows_mem, &allows_reg, &is_inout);
+					 &allows_mem, &allows_reg, &is_inout,
+					 nullptr);
       gcc_assert (ok);
       if (!f_constraint_p (constraint))
 	/* Long double with a constraint other than "=f" - nothing to do.  */
@@ -17875,7 +18145,7 @@ s390_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs,
       bool allows_mem, allows_reg;
       bool ok = parse_input_constraint (&constraint, i, ninputs, noutputs, 0,
 					constraints.address (), &allows_mem,
-					&allows_reg);
+					&allows_reg, nullptr);
       gcc_assert (ok);
       if (!f_constraint_p (constraint))
 	/* Long double with a constraint other than "f" (or "=f" for inout
@@ -17947,9 +18217,34 @@ expand_perm_with_merge (const struct expand_vec_perm_d &d)
   static const unsigned char lo_perm_qi_swap[16]
     = {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15};
 
+  static const unsigned char hi_perm_qi_di[16]
+    = {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23};
+  static const unsigned char hi_perm_qi_si[16]
+    = {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23};
+  static const unsigned char hi_perm_qi_hi[16]
+    = {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23};
+
+  static const unsigned char lo_perm_qi_di[16]
+    = {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31};
+  static const unsigned char lo_perm_qi_si[16]
+    = {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31};
+  static const unsigned char lo_perm_qi_hi[16]
+    = {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31};
+
+  static const unsigned char hi_perm_hi_si[8] = {0, 1, 8, 9, 2, 3, 10, 11};
+  static const unsigned char hi_perm_hi_di[8] = {0, 1, 2, 3, 8, 9, 10, 11};
+
+  static const unsigned char lo_perm_hi_si[8] = {4, 5, 12, 13, 6, 7, 14, 15};
+  static const unsigned char lo_perm_hi_di[8] = {4, 5, 6, 7, 12, 13, 14, 15};
+
+  static const unsigned char hi_perm_si_di[4] = {0, 1, 4, 5};
+
+  static const unsigned char lo_perm_si_di[4] = {2, 3, 6, 7};
+
   bool merge_lo_p = false;
   bool merge_hi_p = false;
   bool swap_operands_p = false;
+  machine_mode mergemode = d.vmode;
 
   if ((d.nelt == 2 && memcmp (d.perm, hi_perm_di, 2) == 0)
       || (d.nelt == 4 && memcmp (d.perm, hi_perm_si, 4) == 0)
@@ -17981,6 +18276,75 @@ expand_perm_with_merge (const struct expand_vec_perm_d &d)
       merge_lo_p = true;
       swap_operands_p = true;
     }
+  else if (d.nelt == 16)
+    {
+      if (memcmp (d.perm, hi_perm_qi_di, 16) == 0)
+	{
+	  merge_hi_p = true;
+	  mergemode = E_V2DImode;
+	}
+      else if (memcmp (d.perm, hi_perm_qi_si, 16) == 0)
+	{
+	  merge_hi_p = true;
+	  mergemode = E_V4SImode;
+	}
+      else if (memcmp (d.perm, hi_perm_qi_hi, 16) == 0)
+	{
+	  merge_hi_p = true;
+	  mergemode = E_V8HImode;
+	}
+      else if (memcmp (d.perm, lo_perm_qi_di, 16) == 0)
+	{
+	  merge_lo_p = true;
+	  mergemode = E_V2DImode;
+	}
+      else if (memcmp (d.perm, lo_perm_qi_si, 16) == 0)
+	{
+	  merge_lo_p = true;
+	  mergemode = E_V4SImode;
+	}
+      else if (memcmp (d.perm, lo_perm_qi_hi, 16) == 0)
+	{
+	  merge_lo_p = true;
+	  mergemode = E_V8HImode;
+	}
+    }
+  else if (d.nelt == 8)
+    {
+      if (memcmp (d.perm, hi_perm_hi_di, 8) == 0)
+	{
+	  merge_hi_p = true;
+	  mergemode = E_V2DImode;
+	}
+      else if (memcmp (d.perm, hi_perm_hi_si, 8) == 0)
+	{
+	  merge_hi_p = true;
+	  mergemode = E_V4SImode;
+	}
+      else if (memcmp (d.perm, lo_perm_hi_di, 8) == 0)
+	{
+	  merge_lo_p = true;
+	  mergemode = E_V2DImode;
+	}
+      else if (memcmp (d.perm, lo_perm_hi_si, 8) == 0)
+	{
+	  merge_lo_p = true;
+	  mergemode = E_V4SImode;
+	}
+    }
+  else if (d.nelt == 4)
+    {
+      if (memcmp (d.perm, hi_perm_si_di, 4) == 0)
+	{
+	  merge_hi_p = true;
+	  mergemode = E_V2DImode;
+	}
+      else if (memcmp (d.perm, lo_perm_si_di, 4) == 0)
+	{
+	  merge_lo_p = true;
+	  mergemode = E_V2DImode;
+	}
+    }
 
   if (!merge_lo_p && !merge_hi_p)
     return false;
@@ -17988,7 +18352,7 @@ expand_perm_with_merge (const struct expand_vec_perm_d &d)
   if (d.testing_p)
     return merge_lo_p || merge_hi_p;
 
-  rtx op0, op1;
+  rtx op0, op1, target = d.target;
   if (swap_operands_p)
     {
       op0 = d.op1;
@@ -17999,12 +18363,80 @@ expand_perm_with_merge (const struct expand_vec_perm_d &d)
       op0 = d.op0;
       op1 = d.op1;
     }
+  if (mergemode != d.vmode)
+    {
+      target = simplify_gen_subreg (mergemode, target, d.vmode, 0);
+      op0 = simplify_gen_subreg (mergemode, op0, d.vmode, 0);
+      op1 = simplify_gen_subreg (mergemode, op1, d.vmode, 0);
+    }
 
-  s390_expand_merge (d.target, op0, op1, merge_hi_p);
+  s390_expand_merge (target, op0, op1, merge_hi_p);
 
   return true;
 }
 
+/* Try to expand the vector permute operation described by D using the vector
+   pack instruction vpk.  Return true if vector pack could be used.  */
+static bool
+expand_perm_with_pack (const struct expand_vec_perm_d &d)
+{
+  static const unsigned char qi_hi[16]
+    = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31};
+  static const unsigned char qi_si[16]
+    = {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31};
+  static const unsigned char qi_di[16]
+    = {4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31};
+
+  static const unsigned char hi_si[8]
+    = {1, 3, 5, 7, 9, 11, 13, 15};
+  static const unsigned char hi_di[8]
+    = {2, 3, 6, 7, 10, 11, 14, 15};
+
+  static const unsigned char si_di[4]
+    = {1, 3, 5, 7};
+
+  machine_mode packmode, resmode;
+  enum insn_code code = CODE_FOR_nothing;
+
+  if (d.nelt == 16 && memcmp (d.perm, qi_hi, 16) == 0)
+    {
+      packmode = E_V8HImode;
+      resmode = E_V16QImode;
+      code = CODE_FOR_vec_pack_trunc_v8hi;
+    }
+  else if ((d.nelt == 16 && memcmp (d.perm, qi_si, 16) == 0)
+	   || (d.nelt == 8 && memcmp (d.perm, hi_si, 8) == 0))
+    {
+      packmode = E_V4SImode;
+      resmode = E_V8HImode;
+      code = CODE_FOR_vec_pack_trunc_v4si;
+    }
+  else if ((d.nelt == 16 && memcmp (d.perm, qi_di, 16) == 0)
+	   || (d.nelt == 8 && memcmp (d.perm, hi_di, 8) == 0)
+	   || (d.nelt == 4 && memcmp (d.perm, si_di, 4) == 0))
+    {
+      packmode = E_V2DImode;
+      resmode = E_V4SImode;
+      code = CODE_FOR_vec_pack_trunc_v2di;
+    }
+
+  if (code == CODE_FOR_nothing)
+    return false;
+
+  if (d.testing_p)
+    return true;
+  rtx target = simplify_gen_subreg (resmode, d.target, d.vmode, 0);
+  rtx op0 = simplify_gen_subreg (packmode,
+				 force_reg (GET_MODE (d.op0), d.op0),
+				 d.vmode, 0);
+  rtx op1 = simplify_gen_subreg (packmode,
+				 force_reg (GET_MODE (d.op1), d.op1),
+				 d.vmode, 0);
+  rtx pat = GEN_FCN (code) (target, op0, op1);
+  emit_insn (pat);
+  return true;
+}
+
 /* Try to expand the vector permute operation described by D using the
    vector permute doubleword immediate instruction vpdi.  Return true
    if vpdi could be used.
@@ -18228,6 +18660,9 @@ vectorize_vec_perm_const_1 (const struct expand_vec_perm_d &d)
   if (expand_perm_with_merge (d))
     return true;
 
+  if (expand_perm_with_pack (d))
+    return true;
+
   if (expand_perm_with_vpdi (d))
     return true;
 
@@ -18330,6 +18765,27 @@ s390_c_mode_for_floating_type (enum tree_index ti)
   return default_mode_for_floating_type (ti);
 }
 
+/* Return true if _BitInt(N) is supported and fill its details into *INFO.  */
+
+bool
+s390_bitint_type_info (int n, struct bitint_info *info)
+{
+  if (!TARGET_64BIT)
+    return false;
+  if (n <= 8)
+    info->limb_mode = QImode;
+  else if (n <= 16)
+    info->limb_mode = HImode;
+  else if (n <= 32)
+    info->limb_mode = SImode;
+  else
+    info->limb_mode = DImode;
+  info->abi_limb_mode = info->limb_mode;
+  info->big_endian = true;
+  info->extended = true;
+  return true;
+}
+
 /* Initialize GCC target structure.  */
 
 #undef  TARGET_ASM_ALIGNED_HI_OP
@@ -18651,6 +19107,9 @@ s390_c_mode_for_floating_type (enum tree_index ti)
 #undef TARGET_DOCUMENTATION_NAME
 #define TARGET_DOCUMENTATION_NAME "S/390"
 
+#undef TARGET_C_BITINT_TYPE_INFO
+#define TARGET_C_BITINT_TYPE_INFO s390_bitint_type_info
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-s390.h"
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 874974a..858387c 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -121,6 +121,7 @@
 
    ; Test Data Class (TDC)
    UNSPEC_TDC_INSN
+   UNSPEC_SIGNBIT
 
    ; Byte-wise Population Count
    UNSPEC_POPCNT
@@ -139,9 +140,6 @@
    UNSPEC_LCBB
 
    ; Vector
-   UNSPEC_VEC_SMULT_HI
-   UNSPEC_VEC_UMULT_HI
-   UNSPEC_VEC_SMULT_LO
    UNSPEC_VEC_SMULT_EVEN
    UNSPEC_VEC_UMULT_EVEN
    UNSPEC_VEC_SMULT_ODD
@@ -241,9 +239,6 @@
 
    UNSPEC_VEC_MSUM
 
-   UNSPEC_VEC_VFMIN
-   UNSPEC_VEC_VFMAX
-
    UNSPEC_VEC_VBLEND
    UNSPEC_VEC_VEVAL
    UNSPEC_VEC_VGEM
@@ -256,6 +251,9 @@
 
    UNSPEC_NNPA_VCFN_V8HI
    UNSPEC_NNPA_VCNF_V8HI
+
+   UNSPEC_FMAX
+   UNSPEC_FMIN
 ])
 
 ;;
@@ -311,6 +309,9 @@
    UNSPECV_SPLIT_STACK_CALL
 
    UNSPECV_OSC_BREAK
+
+   ; Stack Protector
+   UNSPECV_SP_GET_TP
   ])
 
 ;;
@@ -368,6 +369,9 @@
    (VR23_REGNUM                 45)
    (VR24_REGNUM                 46)
    (VR31_REGNUM                 53)
+   ; Access registers
+   (AR0_REGNUM                  36)
+   (AR1_REGNUM                  37)
   ])
 
 ; Rounding modes for binary floating point numbers
@@ -510,7 +514,7 @@
 				    S390_TDC_INFINITY
 				    S390_TDC_NORMAL_BFP])
 
-(define_int_attr tdc_insn [(S390_TDC_SIGNBIT_SET "signbit")
+(define_int_attr tdc_insn [(S390_TDC_SIGNBIT_SET "signbit_tdc")
 			   (S390_TDC_FINITE "isfinite")
 			   (S390_TDC_INFINITY "isinf")
 			   (S390_TDC_NORMAL_BFP "isnormal")
@@ -993,6 +997,10 @@
 (define_mode_attr asm_fcmp [(CCVEQ "e") (CCVFH "h") (CCVFHE "he")])
 (define_mode_attr insn_cmp [(CCVEQ "eq") (CCVIH "h") (CCVIHU "hl") (CCVFH "h") (CCVFHE "he")])
 
+(define_mode_iterator CC_SUZ [CCS CCU CCZ])
+(define_mode_attr l [(CCS "") (CCU "l") (CCZ "")])
+(define_mode_attr cc_tolower [(CCS "ccs") (CCU "ccu") (CCZ "ccz")])
+
 ; Analogue to TOINTVEC / tointvec
 (define_mode_attr TOINT [(TF "TI") (DF "DI") (SF "SI")])
 (define_mode_attr toint [(TF "ti") (DF "di") (SF "si")])
@@ -1519,6 +1527,27 @@
   operands[0] = SET_DEST (PATTERN (curr_insn));
 })
 
+; Restrict spaceship optab to z13 or later since there we have
+; LOAD HALFWORD IMMEDIATE ON CONDITION.
+
+(define_mode_iterator SPACESHIP_INT [(TI "TARGET_VX") DI SI HI QI])
+(define_expand "spaceship<mode>4"
+  [(match_operand:SI 0 "register_operand")
+   (match_operand:SPACESHIP_INT 1 "register_operand")
+   (match_operand:SPACESHIP_INT 2 "register_operand")
+   (match_operand:SI 3 "const_int_operand")]
+  "TARGET_Z13 && TARGET_64BIT"
+  "s390_expand_int_spaceship (operands[0], operands[1], operands[2], operands[3]); DONE;")
+
+(define_mode_iterator SPACESHIP_BFP [TF DF SF])
+(define_expand "spaceship<mode>4"
+  [(match_operand:SI 0 "register_operand")
+   (match_operand:SPACESHIP_BFP 1 "register_operand")
+   (match_operand:SPACESHIP_BFP 2 "register_operand")
+   (match_operand:SI 3 "const_int_operand")]
+  "TARGET_Z13 && TARGET_64BIT && TARGET_HARD_FLOAT"
+  "s390_expand_fp_spaceship (operands[0], operands[1], operands[2], operands[3]); DONE;")
+
 
 ; (TF|DF|SF|TD|DD|SD) instructions
 
@@ -3597,7 +3626,7 @@
         (match_operand:BLK 1 "memory_operand" ""))
    (use (match_operand 2 "const_int_operand" ""))
    (use (match_operand 3 "immediate_operand" ""))
-   (clobber (scratch))]
+   (clobber (match_scratch 4))]
   "reload_completed"
   [(parallel
     [(set (match_dup 0) (match_dup 1))
@@ -3609,7 +3638,7 @@
         (match_operand:BLK 1 "memory_operand" ""))
    (use (match_operand 2 "register_operand" ""))
    (use (match_operand 3 "memory_operand" ""))
-   (clobber (scratch))]
+   (clobber (match_scratch 4))]
   "reload_completed"
   [(parallel
     [(unspec [(match_dup 2) (match_dup 3)
@@ -3623,14 +3652,14 @@
         (match_operand:BLK 1 "memory_operand" ""))
    (use (match_operand 2 "register_operand" ""))
    (use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN)))
-   (clobber (scratch))]
+   (clobber (match_scratch 3))]
   "TARGET_Z10 && reload_completed"
   [(parallel
     [(unspec [(match_dup 2) (const_int 0)
-              (label_ref (match_dup 3))] UNSPEC_EXECUTE)
+	      (label_ref (match_dup 4))] UNSPEC_EXECUTE)
      (set (match_dup 0) (match_dup 1))
      (use (const_int 1))])]
-  "operands[3] = gen_label_rtx ();")
+  "operands[4] = gen_label_rtx ();")
 
 (define_split
   [(set (match_operand:BLK 0 "memory_operand" "")
@@ -3775,6 +3804,86 @@
 	(unspec:SI [(reg:CCZ CC_REGNUM)] UNSPEC_CC_TO_INT))]
   "TARGET_HARD_DFP")
 
+(define_mode_iterator SIGNBIT_SINGLE [(SF "TARGET_HARD_FLOAT")
+				      (SD "TARGET_HARD_DFP")])
+(define_expand "signbit<mode>2"
+  [(match_operand:SI 0 "register_operand")
+   (match_operand:SIGNBIT_SINGLE 1 "nonimmediate_operand")]
+  ""
+{
+  if (TARGET_VX && TARGET_64BIT)
+    {
+      emit_insn (gen_rtx_SET (operands[0], simplify_gen_subreg (SImode, operands[1], <MODE>mode, 0)));
+      emit_insn (gen_rtx_SET (operands[0], gen_rtx_LSHIFTRT (SImode, operands[0], GEN_INT (31))));
+    }
+  else if (TARGET_Z10 && TARGET_64BIT)
+    emit_insn (gen_signbit<mode>2_z10 (operands[0], operands[1]));
+  else
+    emit_insn (gen_signbit_tdc<mode>2 (operands[0], force_reg (<MODE>mode, operands[1])));
+  DONE;
+})
+
+(define_insn "signbit<mode>2_z10"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(match_operand:SIGNBIT_SINGLE 1 "nonimmediate_operand" "fRT")]
+		   UNSPEC_SIGNBIT))]
+  "TARGET_Z10 && TARGET_64BIT"
+  "#")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand")
+	(unspec:SI [(match_operand:SIGNBIT_SINGLE 1 "register_operand")]
+		   UNSPEC_SIGNBIT))]
+  "TARGET_Z10 && TARGET_64BIT && reload_completed"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 63)))]
+{
+  operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+  operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
+})
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand")
+	(unspec:SI [(match_operand:SIGNBIT_SINGLE 1 "memory_operand")]
+		   UNSPEC_SIGNBIT))]
+  "TARGET_Z10 && TARGET_64BIT && reload_completed"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 31)))]
+{
+  operands[1] = change_address (operands[1], SImode, 0);
+})
+
+(define_mode_iterator SIGNBIT_DBL_TETRA [(DF "TARGET_HARD_FLOAT")
+					 (TF "TARGET_HARD_FLOAT")
+					 (DD "TARGET_HARD_DFP")
+					 (TD "TARGET_HARD_DFP")])
+(define_expand "signbit<mode>2"
+  [(match_operand:SI 0 "register_operand")
+   (match_operand:SIGNBIT_DBL_TETRA 1 "nonimmediate_operand")]
+  ""
+{
+  if (TARGET_Z10 && TARGET_64BIT)
+    {
+      rtx reg_di = gen_reg_rtx (DImode);
+      if (<MODE>mode == TFmode || <MODE>mode == TDmode)
+	{
+	  rtx reg_ti = gen_reg_rtx (TImode);
+	  emit_insn (gen_rtx_SET (reg_ti, simplify_gen_subreg (TImode, operands[1], <MODE>mode, 0)));
+	  emit_insn (gen_rtx_SET (reg_di, simplify_gen_subreg (DImode, reg_ti, TImode, 0)));
+	}
+      else
+	emit_insn (gen_rtx_SET (reg_di, simplify_gen_subreg (DImode, operands[1], <MODE>mode, 0)));
+      emit_insn (gen_rtx_SET (reg_di, gen_rtx_LSHIFTRT (DImode, reg_di, GEN_INT (63))));
+      rtx subreg = gen_rtx_SUBREG (SImode, reg_di, 4);
+      SUBREG_PROMOTED_VAR_P (subreg) = 1;
+      SUBREG_PROMOTED_SET (subreg, SRP_SIGNED_AND_UNSIGNED);
+      emit_insn (gen_rtx_SET (operands[0], subreg));
+    }
+  else
+    emit_insn (gen_signbit_tdc<mode>2 (operands[0], force_reg (<MODE>mode, operands[1])));
+  DONE;
+})
+
 ; This extracts CC into a GPR properly shifted.  The actual IPM
 ; instruction will be issued by reload.  The constraint of operand 1
 ; forces reload to use a GPR.  So reload will issue a movcc insn for
@@ -3852,7 +3961,7 @@
         (const_int 0))
    (use (match_operand 1 "const_int_operand" ""))
    (use (match_operand 2 "immediate_operand" ""))
-   (clobber (scratch))
+   (clobber (match_scratch 3))
    (clobber (reg:CC CC_REGNUM))]
   "reload_completed"
   [(parallel
@@ -3866,7 +3975,7 @@
         (const_int 0))
    (use (match_operand 1 "register_operand" ""))
    (use (match_operand 2 "memory_operand" ""))
-   (clobber (scratch))
+   (clobber (match_scratch 3))
    (clobber (reg:CC CC_REGNUM))]
   "reload_completed"
   [(parallel
@@ -3882,7 +3991,7 @@
         (const_int 0))
    (use (match_operand 1 "register_operand" ""))
    (use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN)))
-   (clobber (scratch))
+   (clobber (match_scratch 2))
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_Z10 && reload_completed"
   [(parallel
@@ -4047,7 +4156,7 @@
                      (match_operand:BLK 1 "memory_operand" "")))
    (use (match_operand 2 "const_int_operand" ""))
    (use (match_operand 3 "immediate_operand" ""))
-   (clobber (scratch))]
+   (clobber (match_scratch 4))]
   "reload_completed"
   [(parallel
     [(set (reg:CCU CC_REGNUM) (compare:CCU (match_dup 0) (match_dup 1)))
@@ -4060,7 +4169,7 @@
                      (match_operand:BLK 1 "memory_operand" "")))
    (use (match_operand 2 "register_operand" ""))
    (use (match_operand 3 "memory_operand" ""))
-   (clobber (scratch))]
+   (clobber (match_scratch 4))]
   "reload_completed"
   [(parallel
     [(unspec [(match_dup 2) (match_dup 3)
@@ -4075,7 +4184,7 @@
                      (match_operand:BLK 1 "memory_operand" "")))
    (use (match_operand 2 "register_operand" ""))
    (use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN)))
-   (clobber (scratch))]
+   (clobber (match_scratch 3))]
   "TARGET_Z10 && reload_completed"
   [(parallel
     [(unspec [(match_dup 2) (const_int 0)
@@ -5139,18 +5248,19 @@
 })
 
 (define_insn "*zero_extendsidi2"
-  [(set (match_operand:DI 0 "register_operand" "=d,d,d")
-        (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "d,T,b")))]
+  [(set (match_operand:DI 0 "register_operand" "=d,d,d,d")
+        (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "d,T,b,v")))]
   "TARGET_ZARCH"
   "@
    llgfr\t%0,%1
    llgf\t%0,%1
-   llgfrl\t%0,%1"
-  [(set_attr "op_type"      "RRE,RXY,RIL")
-   (set_attr "type"         "*,*,larl")
-   (set_attr "cpu_facility" "*,*,z10")
-   (set_attr "z10prop" "z10_fwd_E1,z10_fwd_A3,z10_fwd_A3")
-   (set_attr "relative_long" "*,*,yes")])
+   llgfrl\t%0,%1
+   vlgvf\t%0,%v1,0"
+  [(set_attr "op_type"      "RRE,RXY,RIL,VRS")
+   (set_attr "type"         "*,*,larl,*")
+   (set_attr "cpu_facility" "*,*,z10,vx")
+   (set_attr "z10prop" "z10_fwd_E1,z10_fwd_A3,z10_fwd_A3,*")
+   (set_attr "relative_long" "*,*,yes,*")])
 
 ;
 ; LLGT-type instructions (zero-extend from 31 bit to 64 bit).
@@ -5253,29 +5363,32 @@
 
 ; llhrl, llghrl
 (define_insn "*zero_extendhi<mode>2_z10"
-  [(set (match_operand:GPR 0 "register_operand" "=d,d,d")
-        (zero_extend:GPR (match_operand:HI 1 "nonimmediate_operand" "d,T,b")))]
+  [(set (match_operand:GPR 0 "register_operand" "=d,d,d,d")
+        (zero_extend:GPR (match_operand:HI 1 "nonimmediate_operand" "d,T,b,v")))]
   "TARGET_Z10"
   "@
    ll<g>hr\t%0,%1
    ll<g>h\t%0,%1
-   ll<g>hrl\t%0,%1"
-  [(set_attr "op_type"      "RXY,RRE,RIL")
-   (set_attr "type"         "*,*,larl")
-   (set_attr "cpu_facility" "*,*,z10")
-   (set_attr "z10prop" "z10_super_E1,z10_fwd_A3,z10_fwd_A3")
-   (set_attr "relative_long" "*,*,yes")])
+   ll<g>hrl\t%0,%1
+   vlgvh\t%0,%v1,0"
+  [(set_attr "op_type"      "RXY,RRE,RIL,VRS")
+   (set_attr "type"         "*,*,larl,*")
+   (set_attr "cpu_facility" "*,*,z10,vx")
+   (set_attr "z10prop" "z10_super_E1,z10_fwd_A3,z10_fwd_A3,*")
+   (set_attr "relative_long" "*,*,yes,*")])
 
 ; llhr, llcr, llghr, llgcr, llh, llc, llgh, llgc
 (define_insn "*zero_extend<HQI:mode><GPR:mode>2_extimm"
-  [(set (match_operand:GPR 0 "register_operand" "=d,d")
-        (zero_extend:GPR (match_operand:HQI 1 "nonimmediate_operand" "d,T")))]
+  [(set (match_operand:GPR 0 "register_operand" "=d,d,d")
+        (zero_extend:GPR (match_operand:HQI 1 "nonimmediate_operand" "d,T,v")))]
   "TARGET_EXTIMM"
   "@
    ll<g><hc>r\t%0,%1
-   ll<g><hc>\t%0,%1"
-  [(set_attr "op_type" "RRE,RXY")
-   (set_attr "z10prop" "z10_super_E1,z10_fwd_A3")])
+   ll<g><hc>\t%0,%1
+   vlgv<HQI:bhfgq>\t%0,%v1,0"
+  [(set_attr "op_type" "RRE,RXY,VRS")
+   (set_attr "cpu_facility" "*,*,vx")
+   (set_attr "z10prop" "z10_super_E1,z10_fwd_A3,*")])
 
 ; llgh, llgc
 (define_insn "*zero_extend<HQI:mode><GPR:mode>2"
@@ -11923,15 +12036,43 @@
 ; Stack Protector Patterns
 ;
 
+; Insns stack_protect_get_tp{si,di} are similar to *get_tp_{31,64} but still
+; distinct in the sense that they force recomputation of the thread pointer
+; instead of potentially reloading it from stack.
+
+(define_insn_and_split "stack_protect_get_tpsi"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec_volatile:SI [(const_int 0)] UNSPECV_SP_GET_TP))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (reg:SI AR0_REGNUM))])
+
+(define_insn_and_split "stack_protect_get_tpdi"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(unspec_volatile:DI [(const_int 0)] UNSPECV_SP_GET_TP))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 1) (reg:SI AR0_REGNUM))
+   (set (match_dup 0) (ashift:DI (match_dup 0) (const_int 32)))
+   (set (strict_low_part (match_dup 1)) (reg:SI AR1_REGNUM))]
+  "operands[1] = gen_rtx_REG (SImode, REGNO (operands[0]));")
+
 (define_expand "stack_protect_set"
   [(set (match_operand 0 "memory_operand" "")
 	(match_operand 1 "memory_operand" ""))]
   ""
 {
 #ifdef TARGET_THREAD_SSP_OFFSET
+  rtx tp = gen_reg_rtx (Pmode);
+  if (TARGET_64BIT)
+    emit_insn (gen_stack_protect_get_tpdi (tp));
+  else
+    emit_insn (gen_stack_protect_get_tpsi (tp));
   operands[1]
-    = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, s390_get_thread_pointer (),
-                                        GEN_INT (TARGET_THREAD_SSP_OFFSET)));
+    = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tp,
+					GEN_INT (TARGET_THREAD_SSP_OFFSET)));
 #endif
   if (TARGET_64BIT)
     emit_insn (gen_stack_protect_setdi (operands[0], operands[1]));
@@ -11957,9 +12098,14 @@
 {
   rtx cc_reg, test;
 #ifdef TARGET_THREAD_SSP_OFFSET
+  rtx tp = gen_reg_rtx (Pmode);
+  if (TARGET_64BIT)
+    emit_insn (gen_stack_protect_get_tpdi (tp));
+  else
+    emit_insn (gen_stack_protect_get_tpsi (tp));
   operands[1]
-    = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, s390_get_thread_pointer (),
-                                        GEN_INT (TARGET_THREAD_SSP_OFFSET)));
+    = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tp,
+					GEN_INT (TARGET_THREAD_SSP_OFFSET)));
 #endif
   if (TARGET_64BIT)
     emit_insn (gen_stack_protect_testdi (operands[0], operands[1]));
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index e29255f..745634e 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -75,6 +75,8 @@
 			   V1DF V2DF
 			   (V1TF "TARGET_VXE") (TF "TARGET_VXE")])
 
+(define_mode_iterator VF [V2SF V4SF V2DF])
+
 ; All modes present in V_HW1 and VFT.
 (define_mode_iterator V_HW1_FT [V16QI V8HI V4SI V2DI V1TI V1DF
 			       V2DF (V1SF "TARGET_VXE") (V2SF "TARGET_VXE")
@@ -87,6 +89,13 @@
 (define_mode_iterator VF_HW [(V4SF "TARGET_VXE") V2DF (V1TF "TARGET_VXE")
 			     (TF "TARGET_VXE")])
 
+; FP scalar and vector modes
+(define_mode_iterator VFT_BFP [SF DF
+			      (V1SF "TARGET_VXE") (V2SF "TARGET_VXE") (V4SF "TARGET_VXE")
+			      V1DF V2DF
+			      (V1TF "TARGET_VXE") (TF "TARGET_VXE")])
+
+
 (define_mode_iterator V_8   [V1QI])
 (define_mode_iterator V_16  [V2QI  V1HI])
 (define_mode_iterator V_32  [V4QI  V2HI V1SI V1SF])
@@ -140,13 +149,13 @@
 
 ; The instruction suffix for integer instructions and instructions
 ; which do not care about whether it is floating point or integer.
-(define_mode_attr bhfgq[(V1QI "b") (V2QI "b") (V4QI "b") (V8QI "b") (V16QI "b")
-			(V1HI "h") (V2HI "h") (V4HI "h") (V8HI "h")
-			(V1SI "f") (V2SI "f") (V4SI "f")
-			(V1DI "g") (V2DI "g")
+(define_mode_attr bhfgq[(V1QI "b") (V2QI "b") (V4QI "b") (V8QI "b") (V16QI "b") (QI "b")
+			(V1HI "h") (V2HI "h") (V4HI "h") (V8HI "h") (HI "h")
+			(V1SI "f") (V2SI "f") (V4SI "f") (SI "f")
+			(V1DI "g") (V2DI "g") (DI "g")
 			(V1TI "q") (TI "q")
-			(V1SF "f") (V2SF "f") (V4SF "f")
-			(V1DF "g") (V2DF "g")
+			(V1SF "f") (V2SF "f") (V4SF "f") (SF "f")
+			(V1DF "g") (V2DF "g") (DF "g")
 			(V1TF "q") (TF "q")])
 
 ; This is for vmalhw. It gets an 'w' attached to avoid confusion with
@@ -506,26 +515,89 @@
 		   UNSPEC_VEC_SET))]
   "TARGET_VX")
 
+; Iterator for vec_set that does not use special float/vect overlay tricks
+(define_mode_iterator VEC_SET_NONFLOAT
+  [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI V2SF V4SF])
+; Iterator for single element float vectors
+(define_mode_iterator VEC_SET_SINGLEFLOAT [V1SF V1DF (V1TF "TARGET_VXE")])
+
 ; FIXME: Support also vector mode operands for 1
 ; FIXME: A target memory operand seems to be useful otherwise we end
 ; up with vl vlvgg vst.  Shouldn't the middle-end be able to handle
 ; that itself?
 ; vlvgb, vlvgh, vlvgf, vlvgg, vleb, vleh, vlef, vleg, vleib, vleih, vleif, vleig
 (define_insn "*vec_set<mode>"
-  [(set (match_operand:V                    0 "register_operand"  "=v,v,v")
-	(unspec:V [(match_operand:<non_vec> 1 "general_operand"    "d,R,K")
-		   (match_operand:SI        2 "nonmemory_operand" "an,I,I")
-		   (match_operand:V         3 "register_operand"   "0,0,0")]
-		  UNSPEC_VEC_SET))]
+  [(set (match_operand:VEC_SET_NONFLOAT      0 "register_operand"  "=v,v,v")
+	(unspec:VEC_SET_NONFLOAT
+	  [(match_operand:<non_vec>          1 "general_operand"    "d,R,K")
+	   (match_operand:SI                 2 "nonmemory_operand" "an,I,I")
+	   (match_operand:VEC_SET_NONFLOAT   3 "register_operand"   "0,0,0")]
+	  UNSPEC_VEC_SET))]
   "TARGET_VX
    && (!CONST_INT_P (operands[2])
-       || UINTVAL (operands[2]) < GET_MODE_NUNITS (<V:MODE>mode))"
+       || UINTVAL (operands[2]) < GET_MODE_NUNITS (<VEC_SET_NONFLOAT:MODE>mode))"
   "@
    vlvg<bhfgq>\t%v0,%1,%Y2
    vle<bhfgq>\t%v0,%1,%2
    vlei<bhfgq>\t%v0,%1,%2"
   [(set_attr "op_type" "VRS,VRX,VRI")])
 
+(define_insn "*vec_set<mode>"
+  [(set (match_operand:VEC_SET_SINGLEFLOAT     0 "register_operand"  "=v,v")
+	(unspec:VEC_SET_SINGLEFLOAT
+	  [(match_operand:<non_vec>            1 "general_operand"    "v,R")
+	   (match_operand:SI                   2 "nonmemory_operand" "an,I")
+	   (match_operand:VEC_SET_SINGLEFLOAT  3 "register_operand"   "0,0")]
+	  UNSPEC_VEC_SET))]
+  "TARGET_VX"
+  "@
+   vlr\t%v0,%v1
+   vle<bhfgq>\t%v0,%1,0"
+ [(set_attr "op_type" "VRR,VRX")])
+
+(define_insn "*vec_setv2df"
+  [(set (match_operand:V2DF                    0 "register_operand"  "=v,v,v,v")
+	(unspec:V2DF [(match_operand:DF        1 "general_operand"    "d,R,K,v")
+		      (match_operand:SI        2 "nonmemory_operand" "an,I,I,n")
+		      (match_operand:V2DF      3 "register_operand"   "0,0,0,0")]
+		     UNSPEC_VEC_SET))]
+  "TARGET_VX
+   && (!CONST_INT_P (operands[2])
+       || UINTVAL (operands[2]) < GET_MODE_NUNITS (V2DFmode))"
+  "@
+   vlvgg\t%v0,%1,%Y2
+   vleg\t%v0,%1,%2
+   vleig\t%v0,%1,%2
+   #"
+  [(set_attr "op_type" "VRS,VRX,VRI,*")])
+
+(define_split
+  [(set (match_operand:V2DF            0 "register_operand"  "")
+	(unspec:V2DF [(match_operand:DF    1 "register_operand"  "")
+		      (match_operand:SI        2 "const_int_operand" "")
+		      (match_operand:V2DF      3 "register_operand"  "")]
+		     UNSPEC_VEC_SET))]
+  "TARGET_VX
+   && (UINTVAL (operands[2]) < GET_MODE_NUNITS (V2DFmode))
+   && reload_completed
+   && VECTOR_REGNO_P (REGNO (operands[1]))"
+   [(set (match_dup 0)
+      (vec_select:V2DF
+        (vec_concat:V4DF
+	  (match_dup 1)
+	  (match_dup 3))
+	(parallel [(const_int 0) (match_dup 4)])))]
+{
+    operands[1] = gen_rtx_REG (V2DFmode, REGNO (operands[1]));
+    if (UINTVAL (operands[2]) == 0)
+      operands[4] = GEN_INT (3);
+    else
+    {
+      std::swap (operands[1], operands[3]);
+      operands[4] = GEN_INT (2);
+    }
+})
+
 ; vlvgb, vlvgh, vlvgf, vlvgg
 (define_insn "*vec_set<mode>_plus"
   [(set (match_operand:V                      0 "register_operand" "=v")
@@ -538,6 +610,14 @@
   "vlvg<bhfgq>\t%v0,%1,%Y4(%2)"
   [(set_attr "op_type" "VRS")])
 
+(define_expand "cstoreti4"
+  [(set (match_operand:SI 0 "register_operand")
+	(match_operator:SI 1 "ordered_comparison_operator"
+	 [(match_operand:TI 2 "register_operand")
+	  (match_operand:TI 3 "register_operand")]))]
+  "TARGET_VX"
+  "s390_expand_cstoreti4 (operands[0], operands[1], operands[2], operands[3]); DONE;")
+
 
 ;; FIXME: Support also vector mode operands for 0
 ;; This is used via RTL standard name as well as for expanding the builtin
@@ -554,18 +634,66 @@
 (define_insn "*vec_extract<mode>"
   [(set (match_operand:<non_vec> 0 "nonimmediate_operand" "=d,R")
        (vec_select:<non_vec>
-         (match_operand:V        1 "nonmemory_operand"  "v,v")
+         (match_operand:VI       1 "nonmemory_operand"  "v,v")
          (parallel
           [(match_operand:SI     2 "nonmemory_operand" "an,I")])))]
   "TARGET_VX"
   {
     if (CONST_INT_P (operands[2]))
-	  operands[2] = GEN_INT (UINTVAL (operands[2]) & (GET_MODE_NUNITS (<V:MODE>mode) - 1));
+	  operands[2] = GEN_INT (UINTVAL (operands[2]) & (GET_MODE_NUNITS (<VI:MODE>mode) - 1));
     if (which_alternative == 0)
       return "vlgv<bhfgq>\t%0,%v1,%Y2";
 	return "vste<bhfgq>\t%v1,%0,%2";
   }
-  [(set_attr "op_type" "VRS,VRX")])
+  [(set_attr "op_type" "VRS,VRX")
+   (set_attr "mnemonic" "vlgv<bhfgq>,vste<bhfgq>")])
+
+(define_insn "*vec_extract<mode>"
+  [(set (match_operand:<non_vec> 0 "nonimmediate_operand" "=d,R,v")
+       (vec_select:<non_vec>
+         (match_operand:VF       1 "nonmemory_operand"  "v,v,v")
+         (parallel
+          [(match_operand:SI     2 "nonmemory_operand" "an,I,n")])))]
+  "TARGET_VX"
+  {
+    if (CONST_INT_P (operands[2]))
+      operands[2] = GEN_INT (UINTVAL (operands[2]) & (GET_MODE_NUNITS (<VF:MODE>mode) - 1));
+    if (which_alternative == 0)
+      return "vlgv<bhfgq>\t%0,%v1,%Y2";
+    else if (which_alternative == 1)
+      return "vste<bhfgq>\t%v1,%0,%2";
+    else
+      return "#";
+  }
+  [(set_attr "op_type" "VRS,VRX,*")
+   (set_attr "mnemonic" "vlgv<bhfgq>,vste<bhfgq>,*")])
+
+(define_split
+  [(set (match_operand:<non_vec> 0 "register_operand" "")
+       (vec_select:<non_vec>
+         (match_operand:VF       1 "register_operand"  "")
+         (parallel
+          [(match_operand:SI     2 "const_int_operand" "")])))]
+  "TARGET_VX && reload_completed && VECTOR_REGNO_P (REGNO (operands[0]))"
+  [(set (match_dup 0)
+        (vec_duplicate:VF
+           (vec_select:<non_vec>
+              (match_dup 1)
+              (parallel [(match_dup 2)]))))]
+{
+    unsigned HOST_WIDE_INT idx = UINTVAL (operands[2]) & (GET_MODE_NUNITS (<VF:MODE>mode) - 1);
+    if (idx == 0)
+      {
+        rtx dest = gen_rtx_REG (<VF:MODE>mode, REGNO (operands[0]));
+        emit_insn (gen_mov<VF:mode> (dest, operands[1]));
+        DONE;
+      }
+    else
+      {
+        operands[0] = gen_rtx_REG (<VF:MODE>mode, REGNO (operands[0]));
+        operands[2] = GEN_INT (idx);
+      }
+})
 
 ; vlgvb, vlgvh, vlgvf, vlgvg
 (define_insn "*vec_extract<mode>_plus"
@@ -587,6 +715,42 @@
   DONE;
 })
 
+; Instructions vlgvb, vlgvh, vlgvf zero all remaining bits of a GPR, i.e.,
+; an implicit zero extend is done.
+
+(define_mode_iterator VLGV_DI [V1QI V2QI V4QI V8QI V16QI
+			       V1HI V2HI V4HI V8HI
+			       V1SI V2SI V4SI])
+(define_insn "*vec_extract<mode>_zero_extend"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(zero_extend:DI (vec_select:<non_vec>
+			  (match_operand:VLGV_DI 1 "register_operand" "v")
+			  (parallel [(match_operand:SI 2 "nonmemory_operand" "an")]))))]
+  "TARGET_VX"
+{
+  if (CONST_INT_P (operands[2]))
+    operands[2] = GEN_INT (UINTVAL (operands[2]) & (GET_MODE_NUNITS (<MODE>mode) - 1));
+  return "vlgv<bhfgq>\t%0,%v1,%Y2";
+}
+  [(set_attr "op_type" "VRS")
+   (set_attr "mnemonic" "vlgv<bhfgq>")])
+
+(define_mode_iterator VLGV_SI [V1QI V2QI V4QI V8QI V16QI
+			       V1HI V2HI V4HI V8HI])
+(define_insn "*vec_extract<mode>_zero_extend"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(zero_extend:SI (vec_select:<non_vec>
+			  (match_operand:VLGV_SI 1 "register_operand" "v")
+			  (parallel [(match_operand:SI 2 "nonmemory_operand" "an")]))))]
+  "TARGET_VX"
+{
+  if (CONST_INT_P (operands[2]))
+    operands[2] = GEN_INT (UINTVAL (operands[2]) & (GET_MODE_NUNITS (<MODE>mode) - 1));
+  return "vlgv<bhfgq>\t%0,%v1,%Y2";
+}
+  [(set_attr "op_type" "VRS")
+   (set_attr "mnemonic" "vlgv<bhfgq>")])
+
 (define_insn "*vec_vllezlf<mode>"
   [(set (match_operand:V_HW_4              0 "register_operand" "=v")
 	(vec_concat:V_HW_4
@@ -603,10 +767,10 @@
 ; Replicate from vector element
 ; vrepb, vreph, vrepf, vrepg
 (define_insn "*vec_splat<mode>"
-  [(set (match_operand:V_128_NOSINGLE   0 "register_operand" "=v")
-	(vec_duplicate:V_128_NOSINGLE
+  [(set (match_operand:V   0 "register_operand" "=v")
+	(vec_duplicate:V
 	 (vec_select:<non_vec>
-	  (match_operand:V_128_NOSINGLE 1 "register_operand"  "v")
+	  (match_operand:V 1 "register_operand"  "v")
 	  (parallel
 	   [(match_operand:QI 2 "const_mask_operand" "C")]))))]
   "TARGET_VX && UINTVAL (operands[2]) < GET_MODE_NUNITS (<MODE>mode)"
@@ -945,7 +1109,7 @@
       else
 	{
 	  reg_pair += 2;  // get rid of prefix %f
-	  snprintf (buf, sizeof (buf), "ldr\t%%f0,%%f1;vpdi\t%%%%v%s,%%v1,%%%%v%s,5", reg_pair, reg_pair);
+	  snprintf (buf, sizeof (buf), "vlr\t%%v0,%%v1;vpdi\t%%%%v%s,%%v1,%%%%v%s,5", reg_pair, reg_pair);
 	  output_asm_insn (buf, operands);
 	  return "";
 	}
@@ -2209,6 +2373,28 @@
   operands[5] = gen_reg_rtx (V2DImode);
 })
 
+(define_insn "*vec_cmpv2di_lane0_<cc_tolower>"
+  [(set (reg:CC_SUZ CC_REGNUM)
+	(compare:CC_SUZ
+	  (vec_select:DI
+	    (match_operand:V2DI 0 "register_operand" "v")
+	    (parallel [(const_int 0)]))
+	  (vec_select:DI
+	    (match_operand:V2DI 1 "register_operand" "v")
+	    (parallel [(const_int 0)]))))]
+  "TARGET_VX"
+  "vec<l>g\t%v0,%v1"
+  [(set_attr "op_type" "VRR")])
+
+(define_insn "*vec_cmpti_<cc_tolower>"
+  [(set (reg:CC_SUZ CC_REGNUM)
+	(compare:CC_SUZ
+	  (match_operand:TI 0 "register_operand" "v")
+	  (match_operand:TI 1 "register_operand" "v")))]
+  "TARGET_VXE3"
+  "vec<l>q\t%v0,%v1"
+  [(set_attr "op_type" "VRR")])
+
 
 ;;
 ;; Floating point compares
@@ -3422,14 +3608,341 @@
   "veval\t%v0,%v1,%v2,%v3,%b4"
   [(set_attr "op_type" "VRI")])
 
-; reduc_smin
-; reduc_smax
-; reduc_umin
-; reduc_umax
-
 ; vec_pack_sfix_trunc: convert + pack ?
 ; vec_pack_ufix_trunc
 ; vec_unpacks_float_hi
 ; vec_unpacks_float_lo
 ; vec_unpacku_float_hi
 ; vec_unpacku_float_lo
+
+(define_expand "avg<mode>3_ceil"
+  [(set (match_operand:VIT_HW_VXE3_T                        0 "register_operand")
+	(unspec:VIT_HW_VXE3_T [(match_operand:VIT_HW_VXE3_T 1 "register_operand")
+			       (match_operand:VIT_HW_VXE3_T 2 "register_operand")]
+			      UNSPEC_VEC_AVG))]
+  "TARGET_VX")
+
+(define_expand "uavg<mode>3_ceil"
+  [(set (match_operand:VIT_HW_VXE3_T                        0 "register_operand")
+	(unspec:VIT_HW_VXE3_T [(match_operand:VIT_HW_VXE3_T 1 "register_operand")
+			       (match_operand:VIT_HW_VXE3_T 2 "register_operand")]
+			      UNSPEC_VEC_AVGU))]
+  "TARGET_VX")
+
+(define_expand "smul<mode>3_highpart"
+  [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand")
+	(smul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 "register_operand")
+				      (match_operand:VIT_HW_VXE3_DT 2 "register_operand")))]
+  "TARGET_VX")
+
+(define_expand "umul<mode>3_highpart"
+  [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand")
+	(umul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 "register_operand")
+				      (match_operand:VIT_HW_VXE3_DT 2 "register_operand")))]
+  "TARGET_VX")
+
+; fmax
+(define_expand "fmax<mode>3"
+  [(set (match_operand:VFT_BFP                  0 "register_operand")
+	(unspec:VFT_BFP [(match_operand:VFT_BFP 1 "register_operand")
+	       (match_operand:VFT_BFP           2 "register_operand")
+	       (const_int 4)]
+	      UNSPEC_FMAX))]
+  "TARGET_VXE")
+
+; fmin
+(define_expand "fmin<mode>3"
+  [(set (match_operand:VFT_BFP                  0 "register_operand")
+	(unspec:VFT_BFP [(match_operand:VFT_BFP 1 "register_operand")
+	       (match_operand:VFT_BFP           2 "register_operand")
+	       (const_int 4)]
+	      UNSPEC_FMIN))]
+  "TARGET_VXE")
+
+; reduc_plus
+(define_expand "reduc_plus_scal_<mode>"
+  [(set (match_dup 4)
+	(unspec:V4SI [(match_operand:VI_HW_QH 1 "register_operand")
+		      (match_dup 2)]
+		      UNSPEC_VEC_VSUM))
+   (set (match_dup 5)
+	(unspec:V2DI [(match_dup 4) (match_dup 3)] UNSPEC_VEC_VSUMQ))
+   (set (match_operand:<non_vec> 0 "register_operand")
+	(vec_select:<non_vec> (match_dup 6)
+			      (parallel [(match_dup 7)])))]
+  "TARGET_VX"
+{
+  operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
+  operands[3] = simplify_gen_subreg (V4SImode, operands[2], <MODE>mode, 0);
+  operands[4] = gen_reg_rtx (V4SImode);
+  operands[5] = gen_reg_rtx (V2DImode);
+  operands[6] = simplify_gen_subreg (<MODE>mode, operands[5], V2DImode, 0);
+  operands[7] = GEN_INT (16 / GET_MODE_SIZE (<non_vec>mode) - 1);
+})
+
+(define_expand "reduc_plus_scal_<mode>"
+  [(set (match_dup 3)
+	(unspec:V2DI [(match_operand:VI_HW_SD 1 "register_operand")
+		      (match_dup 2)]
+		      UNSPEC_VEC_VSUMQ))
+   (set (match_operand:<non_vec> 0 "register_operand")
+	(vec_select:<non_vec> (match_dup 4)
+			      (parallel [(match_dup 5)])))]
+  "TARGET_VX"
+{
+  operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
+  operands[3] = gen_reg_rtx (V2DImode);
+  operands[4] = simplify_gen_subreg (<MODE>mode, operands[3], V2DImode, 0);
+  operands[5] = GEN_INT (16 / GET_MODE_SIZE (<non_vec>mode) - 1);
+})
+
+(define_expand "reduc_plus_scal_v2df"
+  [(set (match_dup 2)
+	(unspec:V2DF [(match_operand:V2DF 1 "register_operand")
+		      (match_dup 1)
+		      (const_int 8)]
+		     UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 3) (plus:V2DF (match_dup 1) (match_dup 2)))
+   (set (match_operand:DF 0 "register_operand")
+	(vec_select:DF (match_dup 3) (parallel [(const_int 0)])))]
+  "TARGET_VX"
+{
+  operands[2] = gen_reg_rtx (V2DFmode);
+  operands[3] = gen_reg_rtx (V2DFmode);
+})
+
+(define_expand "reduc_plus_scal_v4sf"
+  [(set (match_dup 2)
+	(unspec:V4SF [(match_operand:V4SF 1 "register_operand")
+		      (match_dup 1)
+		      (const_int 4)]
+		     UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 3) (plus:V4SF (match_dup 1) (match_dup 2)))
+   (set (match_dup 4)
+	(unspec:V4SF [(match_dup 3) (match_dup 3) (const_int 8)]
+		     UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 5) (plus:V4SF (match_dup 3) (match_dup 4)))
+   (set (match_operand:SF 0 "register_operand")
+	(vec_select:SF (match_dup 5) (parallel [(const_int 0)])))]
+  "TARGET_VXE"
+{
+  operands[2] = gen_reg_rtx (V4SFmode);
+  operands[3] = gen_reg_rtx (V4SFmode);
+  operands[4] = gen_reg_rtx (V4SFmode);
+  operands[5] = gen_reg_rtx (V4SFmode);
+})
+
+; reduc_fmin, reduc_fmax, reduc_smin, reduc_smax
+
+(define_int_iterator REDUC_FMINMAX [UNSPEC_FMAX UNSPEC_FMIN])
+(define_int_attr reduc_fminmax_name [(UNSPEC_FMAX "fmax") (UNSPEC_FMIN "fmin")])
+(define_code_iterator REDUC_MINMAX [smin smax])
+(define_code_attr reduc_minmax_name [(smin "smin") (smax "smax")])
+
+(define_expand "reduc_<reduc_fminmax_name>_scal_v2df"
+  [(set (match_dup 2)
+	(unspec:V2DF [(match_operand:V2DF 1 "register_operand")
+		      (match_dup 1)
+		      (const_int 8)]
+		     UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 3)
+	(unspec:V2DF [(match_dup 1) (match_dup 2) (const_int 4)] REDUC_FMINMAX))
+   (set (match_operand:DF 0 "register_operand" "")
+	(vec_select:DF (match_dup 3) (parallel [(const_int 0)])))]
+  "TARGET_VX"
+{
+  operands[2] = gen_reg_rtx (V2DFmode);
+  operands[3] = gen_reg_rtx (V2DFmode);
+})
+
+(define_expand "reduc_<reduc_fminmax_name>_scal_v4sf"
+  [(set (match_dup 2)
+	(unspec:V4SF [(match_operand:V4SF 1 "register_operand")
+		      (match_dup 1)
+		      (const_int 4)]
+		     UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 3)
+	(unspec:V4SF [(match_dup 1) (match_dup 2) (const_int 4)] REDUC_FMINMAX))
+   (set (match_dup 4)
+	(unspec:V4SF [(match_dup 3)
+		      (match_dup 3)
+		      (const_int 8)]
+		     UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 5)
+	(unspec:V4SF [(match_dup 3) (match_dup 4) (const_int 4)] REDUC_FMINMAX))
+   (set (match_operand:SF 0 "register_operand")
+	(vec_select:SF (match_dup 5) (parallel [(const_int 0)])))]
+   "TARGET_VXE"
+{
+  operands[2] = gen_reg_rtx (V4SFmode);
+  operands[3] = gen_reg_rtx (V4SFmode);
+  operands[4] = gen_reg_rtx (V4SFmode);
+  operands[5] = gen_reg_rtx (V4SFmode);
+})
+
+(define_expand "reduc_<reduc_minmax_name>_scal_v2df"
+  [(set (match_dup 2)
+	(unspec:V2DF [(match_operand:V2DF 1 "register_operand")
+		      (match_dup 1)
+		      (const_int 8)]
+		     UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 3)
+	(REDUC_MINMAX:V2DF (match_dup 1) (match_dup 2)))
+   (set (match_operand:DF 0 "register_operand" "")
+	(vec_select:DF (match_dup 3) (parallel [(const_int 0)])))]
+  "TARGET_VX"
+{
+  operands[2] = gen_reg_rtx (V2DFmode);
+  operands[3] = gen_reg_rtx (V2DFmode);
+})
+
+(define_expand "reduc_<reduc_minmax_name>_scal_v4sf"
+  [(set (match_dup 2)
+	(unspec:V4SF [(match_operand:V4SF 1 "register_operand")
+		      (match_dup 1)
+		      (const_int 4)]
+		     UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 3)
+	(REDUC_MINMAX:V4SF (match_dup 1) (match_dup 2)))
+   (set (match_dup 4)
+	(unspec:V4SF [(match_dup 3)
+		      (match_dup 3)
+		      (const_int 8)]
+		     UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 5)
+	(REDUC_MINMAX:V4SF (match_dup 3) (match_dup 4)))
+   (set (match_operand:SF 0 "register_operand" "")
+	(vec_select:SF (match_dup 5) (parallel [(const_int 0)])))]
+   "TARGET_VXE"
+{
+  operands[2] = gen_reg_rtx (V4SFmode);
+  operands[3] = gen_reg_rtx (V4SFmode);
+  operands[4] = gen_reg_rtx (V4SFmode);
+  operands[5] = gen_reg_rtx (V4SFmode);
+})
+
+; reduce_and, reduc_ior, reduc_xor
+; reduc_smin, reduc_smax, reduc_umin, reduc_umax
+
+(define_code_iterator REDUCBIN [and xor ior smin smax umin umax])
+(define_code_attr reduc_bin_insn [(and "and") (xor "xor") (ior "ior")
+				  (smin "smin") (smax "smax")
+				  (umin "umin") (umax "umax")])
+
+(define_expand "reduc_<reduc_bin_insn>_scal_v2di"
+  [(set (match_dup 2)
+	(unspec:V2DI [(match_operand:V2DI 1 "register_operand")
+		      (match_dup 1)
+		      (const_int 8)]
+		     UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 3)
+	(REDUCBIN:V2DI (match_dup 1) (match_dup 2)))
+   (set (match_operand:DI 0 "register_operand" "")
+	(vec_select:DI (match_dup 3) (parallel [(const_int 0)])))]
+  "TARGET_VX"
+{
+  operands[2] = gen_reg_rtx (V2DImode);
+  operands[3] = gen_reg_rtx (V2DImode);
+})
+
+(define_expand "reduc_<reduc_bin_insn>_scal_v4si"
+  [(set (match_dup 2)
+	(unspec:V4SI [(match_operand:V4SI 1 "register_operand")
+		      (match_dup 1)
+		      (const_int 4)]
+		     UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 3)
+	(REDUCBIN:V4SI (match_dup 1) (match_dup 2)))
+   (set (match_dup 4)
+	(unspec:V4SI [(match_dup 3)
+		      (match_dup 3)
+		      (const_int 8)]
+		     UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 5)
+	(REDUCBIN:V4SI (match_dup 3) (match_dup 4)))
+   (set (match_operand:SI 0 "register_operand" "")
+	(vec_select:SI (match_dup 5) (parallel [(const_int 0)])))]
+  "TARGET_VX"
+{
+  operands[2] = gen_reg_rtx (V4SImode);
+  operands[3] = gen_reg_rtx (V4SImode);
+  operands[4] = gen_reg_rtx (V4SImode);
+  operands[5] = gen_reg_rtx (V4SImode);
+})
+
+(define_expand "reduc_<reduc_bin_insn>_scal_v8hi"
+  [(set (match_dup 2)
+	(unspec:V8HI [(match_operand:V8HI 1 "register_operand")
+		      (match_dup 1)
+		      (const_int 2)]
+		     UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 3)
+	(REDUCBIN:V8HI (match_dup 1) (match_dup 2)))
+   (set (match_dup 4)
+	(unspec:V8HI [(match_dup 3)
+		      (match_dup 3)
+		      (const_int 4)]
+		     UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 5)
+	(REDUCBIN:V8HI (match_dup 3) (match_dup 4)))
+   (set (match_dup 6)
+	(unspec:V8HI [(match_dup 5)
+		      (match_dup 5)
+		      (const_int 8)]
+		     UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 7)
+	(REDUCBIN:V8HI (match_dup 5) (match_dup 6)))
+   (set (match_operand:HI 0 "register_operand" "")
+	(vec_select:HI (match_dup 7) (parallel [(const_int 0)])))]
+  "TARGET_VX"
+{
+  operands[2] = gen_reg_rtx (V8HImode);
+  operands[3] = gen_reg_rtx (V8HImode);
+  operands[4] = gen_reg_rtx (V8HImode);
+  operands[5] = gen_reg_rtx (V8HImode);
+  operands[6] = gen_reg_rtx (V8HImode);
+  operands[7] = gen_reg_rtx (V8HImode);
+})
+
+(define_expand "reduc_<reduc_bin_insn>_scal_v16qi"
+  [(set (match_dup 2)
+	(unspec:V16QI [(match_operand:V16QI 1 "register_operand")
+		       (match_dup 1)
+		       (const_int 1)]
+		      UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 3)
+	(REDUCBIN:V16QI (match_dup 1) (match_dup 2)))
+   (set (match_dup 4)
+	(unspec:V16QI [(match_dup 3)
+		       (match_dup 3)
+		       (const_int 2)]
+		      UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 5)
+	(REDUCBIN:V16QI (match_dup 3) (match_dup 4)))
+   (set (match_dup 6)
+	(unspec:V16QI [(match_dup 5)
+		       (match_dup 5)
+		       (const_int 4)]
+		      UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 7)
+	(REDUCBIN:V16QI (match_dup 5) (match_dup 6)))
+   (set (match_dup 8)
+	(unspec:V16QI [(match_dup 7)
+		       (match_dup 7)
+		       (const_int 8)]
+		      UNSPEC_VEC_SLDBYTE))
+   (set (match_dup 9)
+	(REDUCBIN:V16QI (match_dup 7) (match_dup 8)))
+   (set (match_operand:QI 0 "register_operand" "")
+	(vec_select:QI (match_dup 9) (parallel [(const_int 0)])))]
+  "TARGET_VX"
+{
+  operands[2] = gen_reg_rtx (V16QImode);
+  operands[3] = gen_reg_rtx (V16QImode);
+  operands[4] = gen_reg_rtx (V16QImode);
+  operands[5] = gen_reg_rtx (V16QImode);
+  operands[6] = gen_reg_rtx (V16QImode);
+  operands[7] = gen_reg_rtx (V16QImode);
+  operands[8] = gen_reg_rtx (V16QImode);
+  operands[9] = gen_reg_rtx (V16QImode);
+})
diff --git a/gcc/config/s390/vx-builtins.md b/gcc/config/s390/vx-builtins.md
index a7bb7ff..9b89b13 100644
--- a/gcc/config/s390/vx-builtins.md
+++ b/gcc/config/s390/vx-builtins.md
@@ -982,20 +982,18 @@
 
 ; vmhb, vmhh, vmhf, vmhg, vmhq
 (define_insn "vec_smulh<mode>"
-  [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand"                       "=v")
-	(unspec:VIT_HW_VXE3_DT [(match_operand:VIT_HW_VXE3_DT 1 "register_operand" "v")
-				(match_operand:VIT_HW_VXE3_DT 2 "register_operand" "v")]
-			       UNSPEC_VEC_SMULT_HI))]
+  [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand"                              "=v")
+	(smul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 "register_operand" "v")
+				      (match_operand:VIT_HW_VXE3_DT 2 "register_operand" "v")))]
   "TARGET_VX"
   "vmh<bhfgq>\t%v0,%v1,%v2"
   [(set_attr "op_type" "VRR")])
 
 ; vmlhb, vmlhh, vmlhf, vmlhg, vmlhq
 (define_insn "vec_umulh<mode>"
-  [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand"                       "=v")
-	(unspec:VIT_HW_VXE3_DT [(match_operand:VIT_HW_VXE3_DT 1 "register_operand" "v")
-				(match_operand:VIT_HW_VXE3_DT 2 "register_operand" "v")]
-			       UNSPEC_VEC_UMULT_HI))]
+  [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand"                              "=v")
+	(umul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 "register_operand" "v")
+				      (match_operand:VIT_HW_VXE3_DT 2 "register_operand" "v")))]
   "TARGET_VX"
   "vmlh<bhfgq>\t%v0,%v1,%v2"
   [(set_attr "op_type" "VRR")])
@@ -2136,23 +2134,22 @@
   "<vw>fche<sdx>bs\t%v2,%v0,%v1"
   [(set_attr "op_type" "VRR")])
 
-
 (define_insn "vfmin<mode>"
-  [(set (match_operand:VF_HW                0 "register_operand"  "=v")
-	(unspec:VF_HW [(match_operand:VF_HW 1 "register_operand"   "v")
-		       (match_operand:VF_HW 2 "register_operand"   "v")
-		       (match_operand:QI    3 "const_mask_operand" "C")]
-		      UNSPEC_VEC_VFMIN))]
+  [(set (match_operand:VFT_BFP                  0 "register_operand"  "=v")
+	(unspec:VFT_BFP [(match_operand:VFT_BFP 1 "register_operand"   "v")
+		         (match_operand:VFT_BFP 2 "register_operand"   "v")
+		         (match_operand:QI      3 "const_mask_operand" "C")]
+		        UNSPEC_FMIN))]
   "TARGET_VXE"
   "<vw>fmin<sdx>b\t%v0,%v1,%v2,%b3"
   [(set_attr "op_type" "VRR")])
 
 (define_insn "vfmax<mode>"
-  [(set (match_operand:VF_HW                0 "register_operand"  "=v")
-	(unspec:VF_HW [(match_operand:VF_HW 1 "register_operand"   "v")
-		       (match_operand:VF_HW 2 "register_operand"   "v")
-		       (match_operand:QI    3 "const_mask_operand" "C")]
-		      UNSPEC_VEC_VFMAX))]
+  [(set (match_operand:VFT_BFP                  0 "register_operand"  "=v")
+	(unspec:VFT_BFP [(match_operand:VFT_BFP 1 "register_operand"   "v")
+		         (match_operand:VFT_BFP 2 "register_operand"   "v")
+		         (match_operand:QI      3 "const_mask_operand" "C")]
+		        UNSPEC_FMAX))]
   "TARGET_VXE"
   "<vw>fmax<sdx>b\t%v0,%v1,%v2,%b3"
   [(set_attr "op_type" "VRR")])
diff --git a/gcc/config/sh/predicates.md b/gcc/config/sh/predicates.md
index 7349c97..e67ec8a 100644
--- a/gcc/config/sh/predicates.md
+++ b/gcc/config/sh/predicates.md
@@ -630,9 +630,7 @@
 ;; Same as treg_set_expr but disallow constants 0 and 1 which can be loaded
 ;; into the T bit.
 (define_predicate "treg_set_expr_not_const01"
-  (and (match_test "op != const0_rtx")
-       (match_test "op != const1_rtx")
-       (match_operand 0 "treg_set_expr")))
+  (match_test "sh_recog_treg_set_expr_not_01 (op, mode)"))
 
 ;; A predicate describing the T bit register in any form.
 (define_predicate "t_reg_operand"
diff --git a/gcc/config/sh/sh-modes.def b/gcc/config/sh/sh-modes.def
index 80650b4..e31ae69 100644
--- a/gcc/config/sh/sh-modes.def
+++ b/gcc/config/sh/sh-modes.def
@@ -17,6 +17,12 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
+/* SH has the same reversed quiet bit as MIPS.  */
+RESET_FLOAT_FORMAT (SF, mips_single_format);
+RESET_FLOAT_FORMAT (DF, mips_double_format);
+/* TFmode: IEEE quad floating point (software).  */
+FLOAT_MODE (TF, 16, mips_quad_format);
+
 /* Vector modes.  */
 VECTOR_MODE  (INT, QI, 2);    /*                 V2QI */
 VECTOR_MODES (INT, 4);        /*            V4QI V2HI */
diff --git a/gcc/config/sh/sh-protos.h b/gcc/config/sh/sh-protos.h
index c8cc19f..e78b669 100644
--- a/gcc/config/sh/sh-protos.h
+++ b/gcc/config/sh/sh-protos.h
@@ -261,6 +261,7 @@ extern rtx_insn* sh_peephole_emit_move_insn (rtx dst, rtx src);
 
 extern bool sh_in_recog_treg_set_expr (void);
 extern bool sh_recog_treg_set_expr (rtx op, machine_mode mode);
+extern bool sh_recog_treg_set_expr_not_01 (rtx op, machine_mode mode);
 
 /* Result value of sh_split_treg_set_expr.  Contains the first insn emitted
    and the optional trailing nott insn.  */
diff --git a/gcc/config/sh/sh.cc b/gcc/config/sh/sh.cc
index 1bc34e0..09e4ff7 100644
--- a/gcc/config/sh/sh.cc
+++ b/gcc/config/sh/sh.cc
@@ -12348,6 +12348,23 @@ sh_recog_treg_set_expr (rtx op, machine_mode mode)
   return result >= 0;
 }
 
+/* Return TRUE if OP is an expression for which there is a pattern to
+   set the T bit unless the expression is trivially loadable into
+   the T bit, FALSE otherwise.  */
+bool
+sh_recog_treg_set_expr_not_01 (rtx op, machine_mode mode)
+{
+  if (op == const0_rtx || op == const1_rtx)
+    return false;
+
+  /* A right shift of 31 will return 0 or 1.  */
+  if ((GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
+      && INTVAL (XEXP (op, 1)) == 31)
+    return false;
+
+  return sh_recog_treg_set_expr (op, mode);
+}
+
 /* Returns true when recog of a 'treg_set_expr' is currently in progress.
    This can be used as a condition for insn/split patterns to allow certain
    T bit setting patters only to be matched as sub expressions of other
diff --git a/gcc/config/sh/sh_treg_combine.cc b/gcc/config/sh/sh_treg_combine.cc
index 3dbd6c3..696fe32 100644
--- a/gcc/config/sh/sh_treg_combine.cc
+++ b/gcc/config/sh/sh_treg_combine.cc
@@ -945,10 +945,7 @@ sh_treg_combine::make_not_reg_insn (rtx dst_reg, rtx src_reg) const
   else
     gcc_unreachable ();
 
-  rtx i = get_insns ();
-  end_sequence ();
-
-  return i;
+  return end_sequence ();
 }
 
 rtx_insn *
diff --git a/gcc/config/sparc/sparc.cc b/gcc/config/sparc/sparc.cc
index 2196a0c..ffd1fb9 100644
--- a/gcc/config/sparc/sparc.cc
+++ b/gcc/config/sparc/sparc.cc
@@ -4762,8 +4762,7 @@ sparc_legitimize_tls_address (rtx addr)
 					     addr, const1_rtx));
 	use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
 	RTL_CONST_CALL_P (insn) = 1;
-	insn = get_insns ();
-	end_sequence ();
+	insn = end_sequence ();
 	emit_libcall_block (insn, ret, o0, addr);
 	break;
 
@@ -4782,8 +4781,7 @@ sparc_legitimize_tls_address (rtx addr)
 					      const1_rtx));
 	use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
 	RTL_CONST_CALL_P (insn) = 1;
-	insn = get_insns ();
-	end_sequence ();
+	insn = end_sequence ();
 	/* Attach a unique REG_EQUAL, to allow the RTL optimizers to
 	  share the LD_BASE result with other LD model accesses.  */
 	emit_libcall_block (insn, temp3, o0,
@@ -12530,8 +12528,7 @@ sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
 	  if (!TARGET_VXWORKS_RTP)
 	    pic_offset_table_rtx = got_register_rtx;
 	  scratch = sparc_legitimize_pic_address (funexp, scratch);
-	  seq = get_insns ();
-	  end_sequence ();
+	  seq = end_sequence ();
 	  emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
 	}
       else if (TARGET_ARCH32)
@@ -12557,8 +12554,7 @@ sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
 	      spill_reg = gen_rtx_REG (DImode, 15);  /* %o7 */
 	      start_sequence ();
 	      sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
-	      seq = get_insns ();
-	      end_sequence ();
+	      seq = end_sequence ();
 	      emit_and_preserve (seq, spill_reg, 0);
 	      break;
 
@@ -13242,8 +13238,7 @@ sparc_init_pic_reg (void)
   load_got_register ();
   if (!TARGET_VXWORKS_RTP)
     emit_move_insn (pic_offset_table_rtx, got_register_rtx);
-  seq = get_insns ();
-  end_sequence ();
+  seq = end_sequence ();
 
   entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
   insert_insn_on_edge (seq, entry_edge);
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index 4d46cfd..c6e06b4 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -3014,17 +3014,18 @@
   rtx shift_16 = GEN_INT (16);
   int op1_subbyte = 0;
 
-  if (GET_CODE (operand1) == SUBREG)
+  if (GET_CODE (operands[1]) == SUBREG)
     {
-      op1_subbyte = SUBREG_BYTE (operand1);
+      op1_subbyte = SUBREG_BYTE (operands[1]);
       op1_subbyte /= GET_MODE_SIZE (SImode);
       op1_subbyte *= GET_MODE_SIZE (SImode);
-      operand1 = XEXP (operand1, 0);
+      operands[1] = XEXP (operands[1], 0);
     }
 
-  emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operand1, op1_subbyte),
+  emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operands[1],
+						op1_subbyte),
 			  shift_16));
-  emit_insn (gen_lshrsi3 (operand0, temp, shift_16));
+  emit_insn (gen_lshrsi3 (operands[0], temp, shift_16));
   DONE;
 })
 
@@ -3097,17 +3098,18 @@
   rtx shift_48 = GEN_INT (48);
   int op1_subbyte = 0;
 
-  if (GET_CODE (operand1) == SUBREG)
+  if (GET_CODE (operands[1]) == SUBREG)
     {
-      op1_subbyte = SUBREG_BYTE (operand1);
+      op1_subbyte = SUBREG_BYTE (operands[1]);
       op1_subbyte /= GET_MODE_SIZE (DImode);
       op1_subbyte *= GET_MODE_SIZE (DImode);
-      operand1 = XEXP (operand1, 0);
+      operands[1] = XEXP (operands[1], 0);
     }
 
-  emit_insn (gen_ashldi3 (temp, gen_rtx_SUBREG (DImode, operand1, op1_subbyte),
+  emit_insn (gen_ashldi3 (temp, gen_rtx_SUBREG (DImode, operands[1],
+						op1_subbyte),
 			  shift_48));
-  emit_insn (gen_lshrdi3 (operand0, temp, shift_48));
+  emit_insn (gen_lshrdi3 (operands[0], temp, shift_48));
   DONE;
 })
 
@@ -3283,17 +3285,18 @@
   rtx shift_16 = GEN_INT (16);
   int op1_subbyte = 0;
 
-  if (GET_CODE (operand1) == SUBREG)
+  if (GET_CODE (operands[1]) == SUBREG)
     {
-      op1_subbyte = SUBREG_BYTE (operand1);
+      op1_subbyte = SUBREG_BYTE (operands[1]);
       op1_subbyte /= GET_MODE_SIZE (SImode);
       op1_subbyte *= GET_MODE_SIZE (SImode);
-      operand1 = XEXP (operand1, 0);
+      operands[1] = XEXP (operands[1], 0);
     }
 
-  emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operand1, op1_subbyte),
+  emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operands[1],
+						op1_subbyte),
 			  shift_16));
-  emit_insn (gen_ashrsi3 (operand0, temp, shift_16));
+  emit_insn (gen_ashrsi3 (operands[0], temp, shift_16));
   DONE;
 })
 
@@ -3315,25 +3318,26 @@
   int op1_subbyte = 0;
   int op0_subbyte = 0;
 
-  if (GET_CODE (operand1) == SUBREG)
+  if (GET_CODE (operands[1]) == SUBREG)
     {
-      op1_subbyte = SUBREG_BYTE (operand1);
+      op1_subbyte = SUBREG_BYTE (operands[1]);
       op1_subbyte /= GET_MODE_SIZE (SImode);
       op1_subbyte *= GET_MODE_SIZE (SImode);
-      operand1 = XEXP (operand1, 0);
+      operands[1] = XEXP (operands[1], 0);
     }
-  if (GET_CODE (operand0) == SUBREG)
+  if (GET_CODE (operands[0]) == SUBREG)
     {
-      op0_subbyte = SUBREG_BYTE (operand0);
+      op0_subbyte = SUBREG_BYTE (operands[0]);
       op0_subbyte /= GET_MODE_SIZE (SImode);
       op0_subbyte *= GET_MODE_SIZE (SImode);
-      operand0 = XEXP (operand0, 0);
+      operands[0] = XEXP (operands[0], 0);
     }
-  emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operand1, op1_subbyte),
+  emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operands[1],
+						op1_subbyte),
 			  shift_24));
-  if (GET_MODE (operand0) != SImode)
-    operand0 = gen_rtx_SUBREG (SImode, operand0, op0_subbyte);
-  emit_insn (gen_ashrsi3 (operand0, temp, shift_24));
+  if (GET_MODE (operands[0]) != SImode)
+    operands[0] = gen_rtx_SUBREG (SImode, operands[0], op0_subbyte);
+  emit_insn (gen_ashrsi3 (operands[0], temp, shift_24));
   DONE;
 })
 
@@ -3354,17 +3358,18 @@
   rtx shift_24 = GEN_INT (24);
   int op1_subbyte = 0;
 
-  if (GET_CODE (operand1) == SUBREG)
+  if (GET_CODE (operands[1]) == SUBREG)
     {
-      op1_subbyte = SUBREG_BYTE (operand1);
+      op1_subbyte = SUBREG_BYTE (operands[1]);
       op1_subbyte /= GET_MODE_SIZE (SImode);
       op1_subbyte *= GET_MODE_SIZE (SImode);
-      operand1 = XEXP (operand1, 0);
+      operands[1] = XEXP (operands[1], 0);
     }
 
-  emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operand1, op1_subbyte),
+  emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operands[1],
+						op1_subbyte),
 			  shift_24));
-  emit_insn (gen_ashrsi3 (operand0, temp, shift_24));
+  emit_insn (gen_ashrsi3 (operands[0], temp, shift_24));
   DONE;
 })
 
@@ -3385,17 +3390,18 @@
   rtx shift_56 = GEN_INT (56);
   int op1_subbyte = 0;
 
-  if (GET_CODE (operand1) == SUBREG)
+  if (GET_CODE (operands[1]) == SUBREG)
     {
-      op1_subbyte = SUBREG_BYTE (operand1);
+      op1_subbyte = SUBREG_BYTE (operands[1]);
       op1_subbyte /= GET_MODE_SIZE (DImode);
       op1_subbyte *= GET_MODE_SIZE (DImode);
-      operand1 = XEXP (operand1, 0);
+      operands[1] = XEXP (operands[1], 0);
     }
 
-  emit_insn (gen_ashldi3 (temp, gen_rtx_SUBREG (DImode, operand1, op1_subbyte),
+  emit_insn (gen_ashldi3 (temp, gen_rtx_SUBREG (DImode, operands[1],
+						op1_subbyte),
 			  shift_56));
-  emit_insn (gen_ashrdi3 (operand0, temp, shift_56));
+  emit_insn (gen_ashrdi3 (operands[0], temp, shift_56));
   DONE;
 })
 
@@ -3416,17 +3422,18 @@
   rtx shift_48 = GEN_INT (48);
   int op1_subbyte = 0;
 
-  if (GET_CODE (operand1) == SUBREG)
+  if (GET_CODE (operands[1]) == SUBREG)
     {
-      op1_subbyte = SUBREG_BYTE (operand1);
+      op1_subbyte = SUBREG_BYTE (operands[1]);
       op1_subbyte /= GET_MODE_SIZE (DImode);
       op1_subbyte *= GET_MODE_SIZE (DImode);
-      operand1 = XEXP (operand1, 0);
+      operands[1] = XEXP (operands[1], 0);
     }
 
-  emit_insn (gen_ashldi3 (temp, gen_rtx_SUBREG (DImode, operand1, op1_subbyte),
+  emit_insn (gen_ashldi3 (temp, gen_rtx_SUBREG (DImode, operands[1],
+						op1_subbyte),
 			  shift_48));
-  emit_insn (gen_ashrdi3 (operand0, temp, shift_48));
+  emit_insn (gen_ashrdi3 (operands[0], temp, shift_48));
   DONE;
 })
 
diff --git a/gcc/config/stormy16/stormy16.cc b/gcc/config/stormy16/stormy16.cc
index ba2c8cd..5b92743 100644
--- a/gcc/config/stormy16/stormy16.cc
+++ b/gcc/config/stormy16/stormy16.cc
@@ -405,8 +405,7 @@ xstormy16_split_cbranch (machine_mode mode, rtx label, rtx comparison,
 
   start_sequence ();
   xstormy16_expand_arith (mode, COMPARE, dest, op0, op1);
-  seq = get_insns ();
-  end_sequence ();
+  seq = end_sequence ();
 
   gcc_assert (INSN_P (seq));
 
diff --git a/gcc/config/stormy16/stormy16.md b/gcc/config/stormy16/stormy16.md
index 70c8282..15c60ad 100644
--- a/gcc/config/stormy16/stormy16.md
+++ b/gcc/config/stormy16/stormy16.md
@@ -702,8 +702,7 @@
   [(parallel [(set (match_operand:SI 0 "register_operand" "")
 		   (neg:SI (match_operand:SI 1 "register_operand" "")))
 	      (clobber (reg:BI CARRY_REG))])]
-  ""
-  { operands[2] = gen_reg_rtx (HImode); })
+  "")
 
 (define_insn_and_split "*negsi2_internal"
   [(set (match_operand:SI 0 "register_operand" "=&r")
diff --git a/gcc/config/vxworks-dummy.h b/gcc/config/vxworks-dummy.h
index 494799d..516728c 100644
--- a/gcc/config/vxworks-dummy.h
+++ b/gcc/config/vxworks-dummy.h
@@ -40,9 +40,21 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define TARGET_VXWORKS_RTP false
 #endif
 
+/* True if offsets between different segments may vary, so we must avoid
+   cross-segment GOT- and PC-relative address computations.  */
+#ifndef TARGET_VXWORKS_VAROFF
+#define TARGET_VXWORKS_VAROFF false
+#endif
+
 /* The symbol that points to an RTP's table of GOTs.  */
 #define VXWORKS_GOTT_BASE (gcc_unreachable (), "")
 
 /* The symbol that holds the index of the current module's GOT in
    VXWORKS_GOTT_BASE.  */
 #define VXWORKS_GOTT_INDEX (gcc_unreachable (), "")
+
+/* True if PIC relies on the GOTT_* symbols above.  As of VxWorks7, they are no
+   longer used.  */
+#ifndef TARGET_VXWORKS_GOTTPIC
+#define TARGET_VXWORKS_GOTTPIC false
+#endif
diff --git a/gcc/config/vxworks.h b/gcc/config/vxworks.h
index 204a8e0..d2b6025 100644
--- a/gcc/config/vxworks.h
+++ b/gcc/config/vxworks.h
@@ -159,6 +159,18 @@ extern void vxworks_driver_init (unsigned int *, struct cl_decoded_option **);
    Earlier versions did not, not even for RTPS.  */
 #define VXWORKS_HAVE_TLS TARGET_VXWORKS7
 
+/* RTP segments could be loaded with varying offsets, so cross-segment offsets
+   could not be assumed to be constant.  This rules out some PC- and
+   GOT-relative addressing.  */
+#undef TARGET_VXWORKS_VAROFF
+#define TARGET_VXWORKS_VAROFF (!TARGET_VXWORKS7 && TARGET_VXWORKS_RTP)
+
+/* GOTT_BASE and GOTT_INDEX symbols are only used by some ports up to VxWorks6.
+   This macro is only used by i386 so far.  Other ports seem to keep on using
+   GOTTPIC from VxWorks7 on, but they don't test this macro.  */
+#undef TARGET_VXWORKS_GOTTPIC
+#define TARGET_VXWORKS_GOTTPIC (!TARGET_VXWORKS7)
+
 /* On Vx6 and previous, the libraries to pick up depends on the architecture,
    so cannot be defined for all archs at once.  On Vx7, a VSB is always needed
    and its structure is fixed and does not depend on the arch.  We can thus
diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md
index 77c9571..727ec1e 100644
--- a/gcc/config/xtensa/constraints.md
+++ b/gcc/config/xtensa/constraints.md
@@ -130,7 +130,7 @@
  (and (match_code "mem")
       (match_test "smalloffset_mem_p (op)")))
 
-(define_memory_constraint "T"
+(define_special_memory_constraint "T"
  "Memory in a literal pool (addressable with an L32R instruction)."
  (and (match_code "mem")
       (match_test "!TARGET_CONST16 && constantpool_mem_p (op)")))
diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md
index aad4146..20160a4 100644
--- a/gcc/config/xtensa/predicates.md
+++ b/gcc/config/xtensa/predicates.md
@@ -183,25 +183,15 @@
   (and (match_code "const_int")
        (match_test "xtensa_mem_offset (INTVAL (op), SFmode)")))
 
-(define_predicate "reload_operand"
-  (match_code "mem")
-{
-  const_rtx addr = XEXP (op, 0);
-  if (REG_P (addr))
-    return REGNO (addr) == A1_REG;
-  if (GET_CODE (addr) == PLUS)
-    return REG_P (XEXP (addr, 0))
-	   && REGNO (XEXP (addr, 0)) == A1_REG
-	   && CONST_INT_P (XEXP (addr, 1));
-  return false;
-})
-
 (define_predicate "branch_operator"
   (match_code "eq,ne,lt,ge"))
 
 (define_predicate "ubranch_operator"
   (match_code "ltu,geu"))
 
+(define_predicate "alt_ubranch_operator"
+  (match_code "gtu,leu"))
+
 (define_predicate "boolean_operator"
   (match_code "eq,ne"))
 
diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h
index 1f5dcf5..98e75c6 100644
--- a/gcc/config/xtensa/xtensa-protos.h
+++ b/gcc/config/xtensa/xtensa-protos.h
@@ -60,6 +60,7 @@ extern bool xtensa_tls_referenced_p (rtx);
 extern enum rtx_code xtensa_shlrd_which_direction (rtx, rtx);
 extern bool xtensa_split1_finished_p (void);
 extern void xtensa_split_DI_reg_imm (rtx *);
+extern char *xtensa_bswapsi2_output (rtx_insn *, const char *);
 
 #ifdef TREE_CODE
 extern void init_cumulative_args (CUMULATIVE_ARGS *, int);
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 53db06e..f3b89de 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -48,7 +48,6 @@ along with GCC; see the file COPYING3.  If not see
 #include "alias.h"
 #include "explow.h"
 #include "expr.h"
-#include "reload.h"
 #include "langhooks.h"
 #include "gimplify.h"
 #include "builtins.h"
@@ -160,6 +159,10 @@ static void xtensa_asm_trampoline_template (FILE *);
 static void xtensa_trampoline_init (rtx, tree, rtx);
 static bool xtensa_output_addr_const_extra (FILE *, rtx);
 static bool xtensa_cannot_force_const_mem (machine_mode, rtx);
+static machine_mode xtensa_promote_function_mode (const_tree,
+						  machine_mode,
+						  int *, const_tree,
+						  int);
 
 static reg_class_t xtensa_preferred_reload_class (rtx, reg_class_t);
 static reg_class_t xtensa_preferred_output_reload_class (rtx, reg_class_t);
@@ -197,6 +200,9 @@ static void xtensa_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
 				    tree function);
 
 static rtx xtensa_delegitimize_address (rtx);
+static reg_class_t xtensa_ira_change_pseudo_allocno_class (int, reg_class_t,
+							   reg_class_t);
+static HARD_REG_SET xtensa_zero_call_used_regs (HARD_REG_SET);
 
 
 
@@ -233,9 +239,7 @@ static rtx xtensa_delegitimize_address (rtx);
 #define TARGET_EXPAND_BUILTIN_VA_START xtensa_va_start
 
 #undef TARGET_PROMOTE_FUNCTION_MODE
-#define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
-#undef TARGET_PROMOTE_PROTOTYPES
-#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+#define TARGET_PROMOTE_FUNCTION_MODE xtensa_promote_function_mode
 
 #undef TARGET_RETURN_IN_MEMORY
 #define TARGET_RETURN_IN_MEMORY xtensa_return_in_memory
@@ -366,6 +370,12 @@ static rtx xtensa_delegitimize_address (rtx);
 #undef TARGET_DIFFERENT_ADDR_DISPLACEMENT_P
 #define TARGET_DIFFERENT_ADDR_DISPLACEMENT_P hook_bool_void_true
 
+#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
+#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS xtensa_ira_change_pseudo_allocno_class
+
+#undef TARGET_ZERO_CALL_USED_REGS
+#define TARGET_ZERO_CALL_USED_REGS xtensa_zero_call_used_regs
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 
@@ -413,12 +423,13 @@ xtensa_uimm8x4 (HOST_WIDE_INT v)
 }
 
 
-static bool
-xtensa_b4const (HOST_WIDE_INT v)
+bool
+xtensa_b4const_or_zero (HOST_WIDE_INT v)
 {
   switch (v)
     {
     case -1:
+    case 0:
     case 1:
     case 2:
     case 3:
@@ -441,15 +452,6 @@ xtensa_b4const (HOST_WIDE_INT v)
 
 
 bool
-xtensa_b4const_or_zero (HOST_WIDE_INT v)
-{
-  if (v == 0)
-    return true;
-  return xtensa_b4const (v);
-}
-
-
-bool
 xtensa_b4constu (HOST_WIDE_INT v)
 {
   switch (v)
@@ -599,8 +601,8 @@ constantpool_address_p (const_rtx addr)
 
       /* Make sure the address is word aligned.  */
       offset = XEXP (addr, 1);
-      if ((!CONST_INT_P (offset))
-	  || ((INTVAL (offset) & 3) != 0))
+      if (! CONST_INT_P (offset)
+	  || (INTVAL (offset) & 3) != 0)
 	return false;
 
       sym = XEXP (addr, 0);
@@ -609,6 +611,7 @@ constantpool_address_p (const_rtx addr)
   if (SYMBOL_REF_P (sym)
       && CONSTANT_POOL_ADDRESS_P (sym))
     return true;
+
   return false;
 }
 
@@ -1482,8 +1485,7 @@ xtensa_copy_incoming_a7 (rtx opnd)
   if (mode == DFmode || mode == DImode)
     emit_insn (gen_movsi_internal (gen_rtx_SUBREG (SImode, tmp, 0),
 				   gen_rtx_REG (SImode, A7_REG - 1)));
-  entry_insns = get_insns ();
-  end_sequence ();
+  entry_insns = end_sequence ();
 
   if (cfun->machine->vararg_a7)
     {
@@ -1644,8 +1646,7 @@ xtensa_expand_block_set_libcall (rtx dst_mem,
 		     GEN_INT (value), SImode,
 		     GEN_INT (bytes), SImode);
 
-  seq = get_insns ();
-  end_sequence ();
+  seq = end_sequence ();
 
   return seq;
 }
@@ -1706,8 +1707,7 @@ xtensa_expand_block_set_unrolled_loop (rtx dst_mem,
     }
   while (bytes > 0);
 
-  seq = get_insns ();
-  end_sequence ();
+  seq = end_sequence ();
 
   return seq;
 }
@@ -1788,8 +1788,7 @@ xtensa_expand_block_set_small_loop (rtx dst_mem,
   emit_insn (gen_addsi3 (dst, dst, GEN_INT (align)));
   emit_cmp_and_jump_insns (dst, end, NE, const0_rtx, SImode, true, label);
 
-  seq = get_insns ();
-  end_sequence ();
+  seq = end_sequence ();
 
   return seq;
 }
@@ -2467,8 +2466,7 @@ xtensa_call_tls_desc (rtx sym, rtx *retp)
   emit_move_insn (a_io, arg);
   call_insn = emit_call_insn (gen_tls_call (a_io, fn, sym, const1_rtx));
   use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), a_io);
-  insns = get_insns ();
-  end_sequence ();
+  insns = end_sequence ();
 
   *retp = a_io;
   return insns;
@@ -2647,6 +2645,94 @@ xtensa_split_DI_reg_imm (rtx *operands)
 }
 
 
+/* Return the asm output string of bswapsi2_internal insn pattern.
+   It does this by scanning backwards for the BB from the specified insn,
+   and if an another bswapsi2_internal is found, it omits the instruction
+   to set SAR to 8. If not found, or if a CALL, JUMP, ASM, or other insn
+   that clobbers SAR is found first, prepend an instruction to set SAR to
+   8 as usual.  */
+
+static int
+xtensa_bswapsi2_output_1 (rtx_insn *insn)
+{
+  int icode;
+  rtx pat;
+  const char *iname;
+
+  /* CALL insn do not preserve SAR.
+     JUMP insn only appear at the end of BB, so they do not need to be
+     considered when scanning backwards.  */
+  if (CALL_P (insn))
+    return -1;
+
+  switch (icode = INSN_CODE (insn))
+    {
+    /* rotate insns clobber SAR.  */
+    case CODE_FOR_rotlsi3:
+    case CODE_FOR_rotrsi3:
+      return -1;
+    /* simple shift insns clobber SAR if non-immediate shift amounts.  */
+    case CODE_FOR_ashlsi3_internal:
+    case CODE_FOR_ashrsi3:
+    case CODE_FOR_lshrsi3:
+      if (! CONST_INT_P (XEXP (SET_SRC (PATTERN (insn)), 1)))
+	return -1;
+      break;
+    /* this insn always set SAR to 8.  */
+    case CODE_FOR_bswapsi2_internal:
+      return 1;
+    default:
+      break;
+    }
+
+  /* "*shift_per_byte" and "*shlrd_*" complex shift insns clobber SAR.  */
+  if (icode >= CODE_FOR_nothing
+      && (! strcmp (iname = insn_data[icode].name, "*shift_per_byte")
+	  || ! strncmp (iname, "*shlrd_", 7)))
+    return -1;
+
+  /* asm statements may also clobber SAR, so they are anything goes.  */
+  if (NONJUMP_INSN_P (insn))
+    switch (GET_CODE (pat = PATTERN (insn)))
+      {
+      case SET:
+	return GET_CODE (SET_SRC (pat)) == ASM_OPERANDS ? -1 : 0;
+      case PARALLEL:
+	return (GET_CODE (pat = XVECEXP (pat, 0, 0)) == SET
+		&& GET_CODE (SET_SRC (pat)) == ASM_OPERANDS)
+	       || GET_CODE (pat) == ASM_OPERANDS
+	       || GET_CODE (pat) == ASM_INPUT ? -1 : 0;
+      case ASM_OPERANDS:
+	return -1;
+      default:
+	break;
+    }
+
+  /* All other insns are not interested in SAR.  */
+  return 0;
+}
+
+char *
+xtensa_bswapsi2_output (rtx_insn *insn, const char *output)
+{
+  static char result[128];
+  int i;
+
+  strcpy (result, "ssai\t8\n\t");
+  while ((insn = prev_nonnote_nondebug_insn_bb (insn)))
+    if ((i = xtensa_bswapsi2_output_1 (insn)) < 0)
+      break;
+    else if (i > 0)
+      {
+	result[0] = '\0';
+	break;
+      }
+  strcat (result, output);
+
+  return result;
+}
+
+
 /* Try to split an integer value into what are suitable for two consecutive
    immediate addition instructions, ADDI or ADDMI.  */
 
@@ -3048,6 +3134,8 @@ xtensa_modes_tieable_p (machine_mode mode1, machine_mode mode2)
    'K'  CONST_INT, print number of bits in mask for EXTUI
    'R'  CONST_INT, print (X & 0x1f)
    'L'  CONST_INT, print ((32 - X) & 0x1f)
+   'U', CONST_DOUBLE:SF, print (REAL_EXP (rval) - 1)
+   'V', CONST_DOUBLE:SF, print (1 - REAL_EXP (rval))
    'D'  REG, print second register of double-word register operand
    'N'  MEM, print address of next word following a memory operand
    'v'  MEM, if memory reference is volatile, output a MEMW before it
@@ -3144,6 +3232,20 @@ print_operand (FILE *file, rtx x, int letter)
 	output_operand_lossage ("invalid %%R value");
       break;
 
+    case 'U':
+      if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
+	fprintf (file, "%d", REAL_EXP (CONST_DOUBLE_REAL_VALUE (x)) - 1);
+      else
+	output_operand_lossage ("invalid %%U value");
+      break;
+
+    case 'V':
+      if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
+	fprintf (file, "%d", 1 - REAL_EXP (CONST_DOUBLE_REAL_VALUE (x)));
+      else
+	output_operand_lossage ("invalid %%V value");
+      break;
+
     case 'x':
       if (CONST_INT_P (x))
 	printx (file, INTVAL (x));
@@ -4430,17 +4532,25 @@ static int
 xtensa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
 			   reg_class_t from, reg_class_t to)
 {
-  if (from == to && from != BR_REGS && to != BR_REGS)
+  /* If both are equal (except for BR_REGS) or belong to AR_REGS,
+     the cost is 2 (the default value).  */
+  if ((from == to && from != BR_REGS && to != BR_REGS)
+      || (reg_class_subset_p (from, AR_REGS)
+	  && reg_class_subset_p (to, AR_REGS)))
     return 2;
-  else if (reg_class_subset_p (from, AR_REGS)
-	   && reg_class_subset_p (to, AR_REGS))
+
+  /* The cost between AR_REGS and FR_REGS is 2 (the default value).  */
+  if ((reg_class_subset_p (from, AR_REGS) && to == FP_REGS)
+      || (from == FP_REGS && reg_class_subset_p (to, AR_REGS)))
     return 2;
-  else if (reg_class_subset_p (from, AR_REGS) && to == ACC_REG)
-    return 3;
-  else if (from == ACC_REG && reg_class_subset_p (to, AR_REGS))
+
+  if ((reg_class_subset_p (from, AR_REGS) && to == ACC_REG)
+      || (from == ACC_REG && reg_class_subset_p (to, AR_REGS)))
     return 3;
-  else
-    return 10;
+
+  /* Otherwise, spills to stack (because greater than 2x the default
+     MEMORY_MOVE_COST).  */
+  return 10;
 }
 
 /* Compute a (partial) cost for rtx X.  Return true if the complete
@@ -4483,7 +4593,8 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code,
 	    }
 	  break;
 	case COMPARE:
-	  if ((INTVAL (x) == 0) || xtensa_b4const (INTVAL (x)))
+	  if (xtensa_b4const_or_zero (INTVAL (x))
+	      || xtensa_b4constu (INTVAL (x)))
 	    {
 	      *total = 0;
 	      return true;
@@ -4672,29 +4783,56 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code,
     }
 }
 
+/* Return TRUE if the specified insn corresponds to one or more L32R machine
+   instructions.  */
+
 static bool
 xtensa_is_insn_L32R_p (const rtx_insn *insn)
 {
-  rtx x = PATTERN (insn);
+  rtx pat, dest, src;
+  machine_mode mode;
 
-  if (GET_CODE (x) != SET)
+  /* RTX insns that are not "(set (reg) ...)" cannot become L32R instructions:
+     - it is permitted to apply PATTERN() to the insn without validation.
+       See insn_cost() in gcc/rtlanal.cc.
+     - it is used register_operand() instead of REG() to identify things that
+       don't look like REGs but will eventually become so as well.  */
+  if (GET_CODE (pat = PATTERN (insn)) != SET
+      || ! register_operand (dest = SET_DEST (pat), VOIDmode))
     return false;
 
-  x = XEXP (x, 1);
-  if (MEM_P (x))
-    {
-      x = XEXP (x, 0);
-      return (SYMBOL_REF_P (x) || CONST_INT_P (x))
-	     && CONSTANT_POOL_ADDRESS_P (x);
-    }
-
-  /* relaxed MOVI instructions, that will be converted to L32R by the
-     assembler.  */
-  if (CONST_INT_P (x)
-      && ! xtensa_simm12b (INTVAL (x)))
+  /* If the source is a reference to a literal pool entry, then the insn
+     obviously corresponds to an L32R instruction.  */
+  if (constantpool_mem_p (src = SET_SRC (pat)))
     return true;
 
-  return false;
+  /* Similarly, an insn whose source is not a constant obviously does not
+     correspond to L32R.  */
+  if (! CONSTANT_P (src))
+    return false;
+
+  /* If the source is a CONST_INT whose value fits into signed 12 bits, then
+     the insn corresponds to a MOVI instruction (rather than an L32R one),
+     regardless of the configuration of TARGET_CONST16 or
+     TARGET_AUTOLITPOOLS.  Note that the destination register can be non-
+     SImode.  */
+  if (((mode = GET_MODE (dest)) == SImode
+       || mode == HImode || mode == SFmode)
+      && CONST_INT_P (src) && xtensa_simm12b (INTVAL (src)))
+    return false;
+
+  /* If TARGET_CONST16 is configured, constants of the remaining forms
+     correspond to pairs of CONST16 instructions, not L32R.  */
+  if (TARGET_CONST16)
+    return false;
+
+  /* The last remaining form of constant is one of the following:
+     - CONST_INTs with large values
+     - floating-point constants
+     - symbolic constants
+     and is all handled by a relaxed MOVI instruction, which is later
+     converted to an L32R instruction by the assembler.  */
+  return true;
 }
 
 /* Compute a relative costs of RTL insns.  This is necessary in order to
@@ -4703,7 +4841,7 @@ xtensa_is_insn_L32R_p (const rtx_insn *insn)
 static int
 xtensa_insn_cost (rtx_insn *insn, bool speed)
 {
-  if (!(recog_memoized (insn) < 0))
+  if (! (recog_memoized (insn) < 0))
     {
       int len = get_attr_length (insn);
 
@@ -4716,7 +4854,7 @@ xtensa_insn_cost (rtx_insn *insn, bool speed)
 
 	  /* "L32R" may be particular slow (implementation-dependent).  */
 	  if (xtensa_is_insn_L32R_p (insn))
-	    return COSTS_N_INSNS (1 + xtensa_extra_l32r_costs);
+	    return COSTS_N_INSNS ((1 + xtensa_extra_l32r_costs) * n);
 
 	  /* Cost based on the pipeline model.  */
 	  switch (get_attr_type (insn))
@@ -4761,7 +4899,7 @@ xtensa_insn_cost (rtx_insn *insn, bool speed)
 	    {
 	      /* "L32R" itself plus constant in litpool.  */
 	      if (xtensa_is_insn_L32R_p (insn))
-		len = 3 + 4;
+		len += (len / 3) * 4;
 
 	      /* Consider fractional instruction length (for example, ".n"
 		 short instructions or "L32R" litpool constants.  */
@@ -4774,6 +4912,19 @@ xtensa_insn_cost (rtx_insn *insn, bool speed)
   return pattern_cost (PATTERN (insn), speed);
 }
 
+/* Worker function for TARGET_PROMOTE_FUNCTION_MODE.  */
+
+static machine_mode
+xtensa_promote_function_mode (const_tree type, machine_mode mode,
+			      int *punsignedp, const_tree, int)
+{
+  if (GET_MODE_CLASS (mode) == MODE_INT
+      && GET_MODE_SIZE (mode) < GET_MODE_SIZE (SImode))
+    return SImode;
+
+  return promote_mode (type, mode, punsignedp);
+}
+
 /* Worker function for TARGET_RETURN_IN_MEMORY.  */
 
 static bool
@@ -5428,4 +5579,72 @@ xtensa_delegitimize_address (rtx op)
   return op;
 }
 
+/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS, in order to tell
+   the register allocator to avoid using ALL_REGS rclass.  */
+
+static reg_class_t
+xtensa_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class,
+					reg_class_t best_class)
+{
+  if (allocno_class != ALL_REGS)
+    return allocno_class;
+
+  if (best_class != ALL_REGS)
+    return best_class;
+
+  return FLOAT_MODE_P (PSEUDO_REGNO_MODE (regno)) ? FP_REGS : AR_REGS;
+}
+
+/* Implement TARGET_ZERO_CALL_USED_REGS.  */
+
+static HARD_REG_SET
+xtensa_zero_call_used_regs (HARD_REG_SET selected_regs)
+{
+  unsigned int regno;
+  int zeroed_regno = -1;
+  hard_reg_set_iterator hrsi;
+  rtvec argvec, convec;
+
+  EXECUTE_IF_SET_IN_HARD_REG_SET (selected_regs, 1, regno, hrsi)
+    {
+      if (GP_REG_P (regno))
+	{
+	  emit_move_insn (gen_rtx_REG (SImode, regno), const0_rtx);
+	  if (zeroed_regno < 0)
+	    zeroed_regno = regno;
+	  continue;
+	}
+      if (TARGET_BOOLEANS && BR_REG_P (regno))
+	{
+	  gcc_assert (zeroed_regno >= 0);
+	  argvec = rtvec_alloc (1);
+	  RTVEC_ELT (argvec, 0) = gen_rtx_REG (SImode, zeroed_regno);
+	  convec = rtvec_alloc (1);
+	  RTVEC_ELT (convec, 0) = gen_rtx_ASM_INPUT (SImode, "r");
+	  emit_insn (gen_rtx_ASM_OPERANDS (VOIDmode, "wsr\t%0, BR",
+					   "", 0, argvec, convec,
+					   rtvec_alloc (0),
+					   UNKNOWN_LOCATION));
+	  continue;
+	}
+      if (TARGET_HARD_FLOAT && FP_REG_P (regno))
+	{
+	  gcc_assert (zeroed_regno >= 0);
+	  emit_move_insn (gen_rtx_REG (SFmode, regno),
+			  gen_rtx_REG (SFmode, zeroed_regno));
+	  continue;
+	}
+      if (TARGET_MAC16 && ACC_REG_P (regno))
+	{
+	  gcc_assert (zeroed_regno >= 0);
+	  emit_move_insn (gen_rtx_REG (SImode, regno),
+			  gen_rtx_REG (SImode, zeroed_regno));
+	  continue;
+	}
+      CLEAR_HARD_REG_BIT (selected_regs, regno);
+    }
+
+  return selected_regs;
+}
+
 #include "gt-xtensa.h"
diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h
index 9009db0..a8a0565 100644
--- a/gcc/config/xtensa/xtensa.h
+++ b/gcc/config/xtensa/xtensa.h
@@ -36,6 +36,7 @@ along with GCC; see the file COPYING3.  If not see
 #define TARGET_MINMAX		XCHAL_HAVE_MINMAX
 #define TARGET_SEXT		XCHAL_HAVE_SEXT
 #define TARGET_CLAMPS		XCHAL_HAVE_CLAMPS
+#define TARGET_DEPBITS		XCHAL_HAVE_DEPBITS
 #define TARGET_BOOLEANS		XCHAL_HAVE_BOOLEANS
 #define TARGET_HARD_FLOAT	XCHAL_HAVE_FP
 #define TARGET_HARD_FLOAT_DIV	XCHAL_HAVE_FP_DIV
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 88f011c..52ffb16 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -41,6 +41,8 @@
   UNSPEC_LSETUP_START
   UNSPEC_LSETUP_END
   UNSPEC_FRAME_BLOCKAGE
+  UNSPEC_CEIL
+  UNSPEC_FLOOR
 ])
 
 (define_c_enum "unspecv" [
@@ -86,6 +88,7 @@
 ;; This mode iterator allows the HI and QI patterns to be defined from
 ;; the same template.
 (define_mode_iterator HQI [HI QI])
+(define_mode_attr mode_bits [(HI "16") (QI "8")])
 
 ;; This mode iterator allows the SI and HI patterns to be defined from
 ;; the same template.
@@ -103,6 +106,11 @@
 (define_code_attr m_float [(float "float") (unsigned_float "ufloat")])
 (define_code_attr s_float [(float "") (unsigned_float "uns")])
 
+;; This iterator and attribute allow FP-to-integer rounding of two types
+;; to be generated from one template.
+(define_int_iterator ANY_ROUND [UNSPEC_CEIL UNSPEC_FLOOR])
+(define_int_attr m_round [(UNSPEC_CEIL "ceil") (UNSPEC_FLOOR "floor")])
+
 
 ;; Attributes.
 
@@ -169,19 +177,18 @@
 ;; Addition.
 
 (define_insn "addsi3"
-  [(set (match_operand:SI 0 "register_operand" "=D,D,a,a,a")
-	(plus:SI (match_operand:SI 1 "register_operand" "%d,d,r,r,r")
-		 (match_operand:SI 2 "add_operand" "d,O,r,J,N")))]
-  ""
-  "@
-   add.n\t%0, %1, %2
-   addi.n\t%0, %1, %d2
-   add\t%0, %1, %2
-   addi\t%0, %1, %d2
-   addmi\t%0, %1, %x2"
-  [(set_attr "type"	"arith,arith,arith,arith,arith")
-   (set_attr "mode"	"SI")
-   (set_attr "length"	"2,2,3,3,3")])
+  [(set (match_operand:SI 0 "register_operand")
+	(plus:SI (match_operand:SI 1 "register_operand")
+		 (match_operand:SI 2 "add_operand")))]
+  ""
+  {@ [cons: =0, %1, 2; attrs: type, length]
+     [D, d, d; arith, 2] add.n\t%0, %1, %2
+     [D, d, O; arith, 2] addi.n\t%0, %1, %d2
+     [a, r, r; arith, 3] add\t%0, %1, %2
+     [a, r, J; arith, 3] addi\t%0, %1, %d2
+     [a, r, N; arith, 3] addmi\t%0, %1, %x2
+  }
+  [(set_attr "mode" "SI")])
 
 (define_insn "*addsubx"
   [(set (match_operand:SI 0 "register_operand" "=a")
@@ -385,18 +392,15 @@
    (set_attr "length"	"3")])
 
 (define_insn "<u>mulhisi3"
-  [(set (match_operand:SI 0 "register_operand" "=C,A")
-	(mult:SI (any_extend:SI
-		  (match_operand:HI 1 "register_operand" "%r,r"))
-		 (any_extend:SI
-		  (match_operand:HI 2 "register_operand" "r,r"))))]
+  [(set (match_operand:SI 0 "register_operand")
+	(mult:SI (any_extend:SI (match_operand:HI 1 "register_operand"))
+		 (any_extend:SI (match_operand:HI 2 "register_operand"))))]
   "TARGET_MUL16 || TARGET_MAC16"
-  "@
-   mul16<su>\t%0, %1, %2
-   <u>mul.aa.ll\t%1, %2"
-  [(set_attr "type"	"mul16,mac16")
-   (set_attr "mode"	"SI")
-   (set_attr "length"	"3,3")])
+  {@ [cons: =0, %1, 2; attrs: type, length]
+     [C, r, r; mul16, 3] mul16<su>\t%0, %1, %2
+     [A, r, r; mac16, 3] <u>mul.aa.ll\t%1, %2
+  }
+  [(set_attr "mode" "SI")])
 
 (define_insn "muladdhisi"
   [(set (match_operand:SI 0 "register_operand" "=A")
@@ -645,36 +649,15 @@
 })
 
 (define_insn "bswapsi2_internal"
-  [(set (match_operand:SI 0 "register_operand" "=a,&a")
-	(bswap:SI (match_operand:SI 1 "register_operand" "0,r")))
-   (clobber (match_scratch:SI 2 "=&a,X"))]
+  [(set (match_operand:SI 0 "register_operand")
+	(bswap:SI (match_operand:SI 1 "register_operand")))
+   (clobber (match_scratch:SI 2))]
   "!optimize_debug && optimize > 1 && !optimize_size"
-{
-  rtx_insn *prev_insn = prev_nonnote_nondebug_insn (insn);
-  const char *init = "ssai\t8\;";
-  static char result[128];
-  if (prev_insn && NONJUMP_INSN_P (prev_insn))
-    {
-      rtx x = PATTERN (prev_insn);
-      if (GET_CODE (x) == PARALLEL && XVECLEN (x, 0) == 2
-	  && GET_CODE (XVECEXP (x, 0, 0)) == SET
-	  && GET_CODE (XVECEXP (x, 0, 1)) == CLOBBER)
-	{
-	  x = XEXP (XVECEXP (x, 0, 0), 1);
-	  if (GET_CODE (x) == BSWAP && GET_MODE (x) == SImode)
-	    init = "";
-	}
-    }
-  sprintf (result,
-	   (which_alternative == 0)
-	   ? "%s" "srli\t%%2, %%1, 16\;src\t%%2, %%2, %%1\;src\t%%2, %%2, %%2\;src\t%%0, %%1, %%2"
-	   : "%s" "srli\t%%0, %%1, 16\;src\t%%0, %%0, %%1\;src\t%%0, %%0, %%0\;src\t%%0, %%1, %%0",
-	   init);
-  return result;
-}
-   [(set_attr "type"	"arith,arith")
-    (set_attr "mode"	"SI")
-    (set_attr "length"	"15,15")])
+  {@ [cons: =0, 1, =2; attrs: type, length]
+     [ a, 0, &a; arith, 15] << xtensa_bswapsi2_output (insn, "srli\t%2, %1, 16\;src\t%2, %2, %1\;src\t%2, %2, %2\;src\t%0, %1, %2");
+     [&a, r,  X; arith, 15] << xtensa_bswapsi2_output (insn, "srli\t%0, %1, 16\;src\t%0, %0, %1\;src\t%0, %0, %0\;src\t%0, %1, %0");
+  }
+  [(set_attr "mode" "SI")])
 
 (define_expand "bswapdi2"
   [(set (match_operand:DI 0 "register_operand" "")
@@ -735,16 +718,15 @@
 ;; Logical instructions.
 
 (define_insn "andsi3"
-  [(set (match_operand:SI 0 "register_operand" "=a,a")
-	(and:SI (match_operand:SI 1 "register_operand" "%r,r")
-		(match_operand:SI 2 "mask_operand" "P,r")))]
+  [(set (match_operand:SI 0 "register_operand")
+	(and:SI (match_operand:SI 1 "register_operand")
+		(match_operand:SI 2 "mask_operand")))]
   ""
-  "@
-   extui\t%0, %1, 0, %K2
-   and\t%0, %1, %2"
-  [(set_attr "type"	"arith,arith")
-   (set_attr "mode"	"SI")
-   (set_attr "length"	"3,3")])
+  {@ [cons: =0, %1, 2; attrs: type, length]
+     [a, r, P; arith, 3] extui\t%0, %1, 0, %K2
+     [a, r, r; arith, 3] and\t%0, %1, %2
+  }
+  [(set_attr "mode" "SI")])
 
 (define_insn_and_split "*andsi3_bitcmpl"
   [(set (match_operand:SI 0 "register_operand" "=a")
@@ -937,27 +919,15 @@
 
 ;; Zero-extend instructions.
 
-(define_insn "zero_extendhisi2"
-  [(set (match_operand:SI 0 "register_operand" "=a,a")
-	(zero_extend:SI (match_operand:HI 1 "nonimmed_operand" "r,U")))]
+(define_insn "zero_extend<mode>si2"
+  [(set (match_operand:SI 0 "register_operand")
+	(zero_extend:SI (match_operand:HQI 1 "nonimmed_operand")))]
   ""
-  "@
-   extui\t%0, %1, 0, 16
-   %v1l16ui\t%0, %1"
-  [(set_attr "type"	"arith,load")
-   (set_attr "mode"	"SI")
-   (set_attr "length"	"3,3")])
-
-(define_insn "zero_extendqisi2"
-  [(set (match_operand:SI 0 "register_operand" "=a,a")
-	(zero_extend:SI (match_operand:QI 1 "nonimmed_operand" "r,U")))]
-  ""
-  "@
-   extui\t%0, %1, 0, 8
-   %v1l8ui\t%0, %1"
-  [(set_attr "type"	"arith,load")
-   (set_attr "mode"	"SI")
-   (set_attr "length"	"3,3")])
+  {@ [cons: =0, 1; attrs: type, length]
+     [a, r; arith, 3] extui\t%0, %1, 0, <mode_bits>
+     [a, U; load , 3] %v1l<mode_bits>ui\t%0, %1
+  }
+  [(set_attr "mode" "SI")])
 
 
 ;; Sign-extend instructions.
@@ -975,15 +945,14 @@
 })
 
 (define_insn "extendhisi2_internal"
-  [(set (match_operand:SI 0 "register_operand" "=B,a")
-	(sign_extend:SI (match_operand:HI 1 "sext_operand" "r,U")))]
+  [(set (match_operand:SI 0 "register_operand")
+	(sign_extend:SI (match_operand:HI 1 "sext_operand")))]
   ""
-  "@
-   sext\t%0, %1, 15
-   %v1l16si\t%0, %1"
-  [(set_attr "type"	"arith,load")
-   (set_attr "mode"	"SI")
-   (set_attr "length"	"3,3")])
+  {@ [cons: =0, 1; attrs: type, length]
+     [B, r; arith, 3] sext\t%0, %1, 15
+     [a, U; load , 3] %v1l16si\t%0, %1
+  }
+  [(set_attr "mode" "SI")])
 
 (define_expand "extendqisi2"
   [(set (match_operand:SI 0 "register_operand" "")
@@ -1007,7 +976,7 @@
    (set_attr "length"	"3")])
 
 
-;; Field extract instructions.
+;; Field extract and insert instructions.
 
 (define_expand "extvsi"
   [(set (match_operand:SI 0 "register_operand" "")
@@ -1141,6 +1110,25 @@
    (set_attr "mode"	"SI")
    (set_attr "length"	"6")])
 
+(define_insn "insvsi"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+a")
+			 (match_operand:SI 1 "extui_fldsz_operand" "i")
+			 (match_operand:SI 2 "const_int_operand" "i"))
+	(match_operand:SI 3 "register_operand" "r"))]
+  "TARGET_DEPBITS"
+{
+  int shift;
+  if (BITS_BIG_ENDIAN)
+    shift = (32 - (INTVAL (operands[1]) + INTVAL (operands[2]))) & 0x1f;
+  else
+    shift = INTVAL (operands[2]) & 0x1f;
+  operands[2] = GEN_INT (shift);
+  return "depbits\t%0, %3, %2, %1";
+}
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
 
 ;; Conversions.
 
@@ -1168,12 +1156,7 @@
 	(any_fix:SI (mult:SF (match_operand:SF 1 "register_operand" "f")
 			     (match_operand:SF 2 "fix_scaling_operand" "F"))))]
   "TARGET_HARD_FLOAT"
-{
-  static char result[64];
-  sprintf (result, "<m_fix>.s\t%%0, %%1, %d",
-	   REAL_EXP (CONST_DOUBLE_REAL_VALUE (operands[2])) - 1);
-  return result;
-}
+  "<m_fix>.s\t%0, %1, %U2"
   [(set_attr "type"	"fconv")
    (set_attr "mode"	"SF")
    (set_attr "length"	"3")])
@@ -1192,12 +1175,36 @@
 	(mult:SF (any_float:SF (match_operand:SI 1 "register_operand" "a"))
 		 (match_operand:SF 2 "float_scaling_operand" "F")))]
   "TARGET_HARD_FLOAT"
-{
-  static char result[64];
-  sprintf (result, "<m_float>.s\t%%0, %%1, %d",
-	   1 - REAL_EXP (CONST_DOUBLE_REAL_VALUE (operands[2])));
-  return result;
-}
+  "<m_float>.s\t%0, %1, %V2"
+  [(set_attr "type"	"fconv")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+(define_insn "l<m_round>sfsi2"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec:SI [(match_operand:SF 1 "register_operand" "f")] ANY_ROUND))]
+  "TARGET_HARD_FLOAT"
+  "<m_round>.s\t%0, %1, 0"
+  [(set_attr "type"	"fconv")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+(define_insn "*l<m_round>sfsi2_2x"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec:SI [(plus:SF (match_operand:SF 1 "register_operand" "f")
+			     (match_dup 1))] ANY_ROUND))]
+  "TARGET_HARD_FLOAT"
+  "<m_round>.s\t%0, %1, 1"
+  [(set_attr "type"	"fconv")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+(define_insn "*l<m_round>sfsi2_scaled"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec:SI [(mult:SF (match_operand:SF 1 "register_operand" "f")
+			     (match_operand:SF 2 "fix_scaling_operand" "F"))] ANY_ROUND))]
+  "TARGET_HARD_FLOAT"
+  "<m_round>.s\t%0, %1, %U2"
   [(set_attr "type"	"fconv")
    (set_attr "mode"	"SF")
    (set_attr "length"	"3")])
@@ -1252,7 +1259,10 @@
       std::swap (operands[0], operands[1]);
       std::swap (operands[2], operands[3]);
     }
-})
+}
+  [(set_attr "type"	"move,move,load,load,store")
+   (set_attr "mode"	"DI")
+   (set_attr "length"	"6,12,6,6,6")])
 
 (define_split
   [(set (match_operand:DI 0 "register_operand")
@@ -1279,29 +1289,28 @@
 })
 
 (define_insn "movsi_internal"
-  [(set (match_operand:SI 0 "nonimmed_operand" "=D,D,D,D,R,R,a,q,a,a,W,a,a,U,*a,*A")
-	(match_operand:SI 1 "move_operand" "M,D,d,R,D,d,r,r,I,Y,i,T,U,r,*A,*r"))]
+  [(set (match_operand:SI 0 "nonimmed_operand")
+	(match_operand:SI 1 "move_operand"))]
   "xtensa_valid_move (SImode, operands)"
-  "@
-   movi.n\t%0, %x1
-   mov.n\t%0, %1
-   mov.n\t%0, %1
-   %v1l32i.n\t%0, %1
-   %v0s32i.n\t%1, %0
-   %v0s32i.n\t%1, %0
-   mov\t%0, %1
-   movsp\t%0, %1
-   movi\t%0, %x1
-   movi\t%0, %1
-   const16\t%0, %t1\;const16\t%0, %b1
-   %v1l32r\t%0, %1
-   %v1l32i\t%0, %1
-   %v0s32i\t%1, %0
-   rsr\t%0, ACCLO
-   wsr\t%1, ACCLO"
-  [(set_attr "type"	"move,move,move,load,store,store,move,move,move,move,move,load,load,store,rsr,wsr")
-   (set_attr "mode"	"SI")
-   (set_attr "length"	"2,2,2,2,2,2,3,3,3,3,6,3,3,3,3,3")])
+  {@ [cons: =0, 1; attrs: type, length]
+     [ D,  M; move , 2] movi.n\t%0, %x1
+     [ D,  D; move , 2] mov.n\t%0, %1
+     [ D,  d; move , 2] ^
+     [ D,  R; load , 2] %v1l32i.n\t%0, %1
+     [ R,  D; store, 2] %v0s32i.n\t%1, %0
+     [ R,  d; store, 2] ^
+     [ a,  r; move , 3] mov\t%0, %1
+     [ q,  r; move , 3] movsp\t%0, %1
+     [ a,  I; move , 3] movi\t%0, %x1
+     [ a,  Y; load , 3] movi\t%0, %1
+     [ W,  i; move , 6] const16\t%0, %t1\;const16\t%0, %b1
+     [ a,  T; load , 3] %v1l32r\t%0, %1
+     [ a,  U; load , 3] %v1l32i\t%0, %1
+     [ U,  r; store, 3] %v0s32i\t%1, %0
+     [*a, *A; rsr  , 3] rsr\t%0, ACCLO
+     [*A, *r; wsr  , 3] wsr\t%1, ACCLO
+  }
+  [(set_attr "mode" "SI")])
 
 (define_split
   [(set (match_operand:SHI 0 "register_operand")
@@ -1351,23 +1360,22 @@
 })
 
 (define_insn "movhi_internal"
-  [(set (match_operand:HI 0 "nonimmed_operand" "=D,D,a,a,a,a,a,U,*a,*A")
-	(match_operand:HI 1 "move_operand" "M,d,r,I,Y,T,U,r,*A,*r"))]
+  [(set (match_operand:HI 0 "nonimmed_operand")
+	(match_operand:HI 1 "move_operand"))]
   "xtensa_valid_move (HImode, operands)"
-  "@
-   movi.n\t%0, %x1
-   mov.n\t%0, %1
-   mov\t%0, %1
-   movi\t%0, %x1
-   movi\t%0, %1
-   %v1l32r\t%0, %1
-   %v1l16ui\t%0, %1
-   %v0s16i\t%1, %0
-   rsr\t%0, ACCLO
-   wsr\t%1, ACCLO"
-  [(set_attr "type"	"move,move,move,move,move,load,load,store,rsr,wsr")
-   (set_attr "mode"	"HI")
-   (set_attr "length"	"2,2,3,3,3,3,3,3,3,3")])
+  {@ [cons: =0, 1; attrs: type, length]
+     [ D,  M; move , 2] movi.n\t%0, %x1
+     [ D,  d; move , 2] mov.n\t%0, %1
+     [ a,  r; move , 3] mov\t%0, %1
+     [ a,  I; move , 3] movi\t%0, %x1
+     [ a,  Y; load , 3] movi\t%0, %1
+     [ a,  T; load , 3] %v1l32r\t%0, %1
+     [ a,  U; load , 3] %v1l16ui\t%0, %1
+     [ U,  r; store, 3] %v0s16i\t%1, %0
+     [*a, *A; rsr  , 3] rsr\t%0, ACCLO
+     [*A, *r; wsr  , 3] wsr\t%1, ACCLO
+  }
+  [(set_attr "mode" "HI")])
 
 ;; 8-bit Integer moves
 
@@ -1381,21 +1389,20 @@
 })
 
 (define_insn "movqi_internal"
-  [(set (match_operand:QI 0 "nonimmed_operand" "=D,D,a,a,a,U,*a,*A")
-	(match_operand:QI 1 "move_operand" "M,d,r,I,U,r,*A,*r"))]
+  [(set (match_operand:QI 0 "nonimmed_operand")
+	(match_operand:QI 1 "move_operand"))]
   "xtensa_valid_move (QImode, operands)"
-  "@
-   movi.n\t%0, %x1
-   mov.n\t%0, %1
-   mov\t%0, %1
-   movi\t%0, %x1
-   %v1l8ui\t%0, %1
-   %v0s8i\t%1, %0
-   rsr\t%0, ACCLO
-   wsr\t%1, ACCLO"
-  [(set_attr "type"	"move,move,move,move,load,store,rsr,wsr")
-   (set_attr "mode"	"QI")
-   (set_attr "length"	"2,2,3,3,3,3,3,3")])
+  {@ [cons: =0, 1; attrs: type, length]
+     [ D,  M; move , 2] movi.n\t%0, %x1
+     [ D,  d; move , 2] mov.n\t%0, %1
+     [ a,  r; move , 3] mov\t%0, %1
+     [ a,  I; move , 3] movi\t%0, %x1
+     [ a,  U; load , 3] %v1l8ui\t%0, %1
+     [ U,  r; store, 3] %v0s8i\t%1, %0
+     [*a, *A; rsr  , 3] rsr\t%0, ACCLO
+     [*A, *r; wsr  , 3] wsr\t%1, ACCLO
+  }
+  [(set_attr "mode" "QI")])
 
 ;; Sub-word reloads from the constant pool.
 
@@ -1453,30 +1460,29 @@
 })
 
 (define_insn "movsf_internal"
-  [(set (match_operand:SF 0 "nonimmed_operand" "=f,f,^U,D,a,D,R,a,f,a,a,W,a,U")
-	(match_operand:SF 1 "move_operand" "f,^U,f,d,T,R,d,r,r,f,Y,iF,U,r"))]
+  [(set (match_operand:SF 0 "nonimmed_operand")
+	(match_operand:SF 1 "move_operand"))]
   "((register_operand (operands[0], SFmode)
      || register_operand (operands[1], SFmode))
     && !(FP_REG_P (xt_true_regnum (operands[0]))
 	 && (constantpool_mem_p (operands[1]) || CONSTANT_P (operands[1]))))"
-  "@
-   mov.s\t%0, %1
-   %v1lsi\t%0, %1
-   %v0ssi\t%1, %0
-   mov.n\t%0, %1
-   %v1l32r\t%0, %1
-   %v1l32i.n\t%0, %1
-   %v0s32i.n\t%1, %0
-   mov\t%0, %1
-   wfr\t%0, %1
-   rfr\t%0, %1
-   movi\t%0, %y1
-   const16\t%0, %t1\;const16\t%0, %b1
-   %v1l32i\t%0, %1
-   %v0s32i\t%1, %0"
-  [(set_attr "type"	"farith,fload,fstore,move,load,load,store,move,farith,farith,move,move,load,store")
-   (set_attr "mode"	"SF")
-   (set_attr "length"	"3,3,3,2,3,2,2,3,3,3,3,6,3,3")])
+  {@ [cons: =0, 1; attrs: type, length]
+     [f,  f; farith, 3] mov.s\t%0, %1
+     [f, ^U; fload , 3] %v1lsi\t%0, %1
+     [U,  f; fstore, 3] %v0ssi\t%1, %0
+     [D,  d; move  , 2] mov.n\t%0, %1
+     [a,  T; load  , 3] %v1l32r\t%0, %1
+     [D,  R; load  , 2] %v1l32i.n\t%0, %1
+     [R,  d; store , 2] %v0s32i.n\t%1, %0
+     [a,  r; move  , 3] mov\t%0, %1
+     [f,  r; farith, 3] wfr\t%0, %1
+     [a,  f; farith, 3] rfr\t%0, %1
+     [a,  Y; load  , 3] movi\t%0, %y1
+     [W, iF; move  , 6] const16\t%0, %t1\;const16\t%0, %b1
+     [a,  U; load  , 3] %v1l32i\t%0, %1
+     [U,  r; store , 3] %v0s32i\t%1, %0
+  }
+  [(set_attr "mode" "SF")])
 
 (define_insn "*lsiu"
   [(set (match_operand:SF 0 "register_operand" "=f")
@@ -1598,7 +1604,10 @@
       std::swap (operands[0], operands[1]);
       std::swap (operands[2], operands[3]);
     }
-})
+}
+  [(set_attr "type"	"move,load,move,load,load,store")
+   (set_attr "mode"	"DF")
+   (set_attr "length"	"6,6,12,6,6,6")])
 
 ;; Block moves
 
@@ -1641,16 +1650,15 @@
 })
 
 (define_insn "ashlsi3_internal"
-  [(set (match_operand:SI 0 "register_operand" "=a,a")
-	(ashift:SI (match_operand:SI 1 "register_operand" "r,r")
-		   (match_operand:SI 2 "arith_operand" "J,r")))]
+  [(set (match_operand:SI 0 "register_operand")
+	(ashift:SI (match_operand:SI 1 "register_operand")
+		   (match_operand:SI 2 "arith_operand")))]
   ""
-  "@
-   slli\t%0, %1, %R2
-   ssl\t%2\;sll\t%0, %1"
-  [(set_attr "type"	"arith,arith")
-   (set_attr "mode"	"SI")
-   (set_attr "length"	"3,6")])
+  {@ [cons: =0, 1, 2; attrs: type, length]
+     [a, r, J; arith, 3] slli\t%0, %1, %R2
+     [a, r, r; arith, 6] ssl\t%2\;sll\t%0, %1
+  }
+  [(set_attr "mode" "SI")])
 
 (define_split
   [(set (match_operand:SI 0 "register_operand")
@@ -1662,35 +1670,26 @@
 		 (match_dup 1)))])
 
 (define_insn "ashrsi3"
-  [(set (match_operand:SI 0 "register_operand" "=a,a")
-	(ashiftrt:SI (match_operand:SI 1 "register_operand" "r,r")
-		     (match_operand:SI 2 "arith_operand" "J,r")))]
+  [(set (match_operand:SI 0 "register_operand")
+	(ashiftrt:SI (match_operand:SI 1 "register_operand")
+		     (match_operand:SI 2 "arith_operand")))]
   ""
-  "@
-   srai\t%0, %1, %R2
-   ssr\t%2\;sra\t%0, %1"
-  [(set_attr "type"	"arith,arith")
-   (set_attr "mode"	"SI")
-   (set_attr "length"	"3,6")])
+  {@ [cons: =0, 1, 2; attrs: type, length]
+     [a, r, J; arith, 3] srai\t%0, %1, %R2
+     [a, r, r; arith, 6] ssr\t%2\;sra\t%0, %1
+  }
+  [(set_attr "mode" "SI")])
 
 (define_insn "lshrsi3"
-  [(set (match_operand:SI 0 "register_operand" "=a,a")
-	(lshiftrt:SI (match_operand:SI 1 "register_operand" "r,r")
-		     (match_operand:SI 2 "arith_operand" "J,r")))]
+  [(set (match_operand:SI 0 "register_operand")
+	(lshiftrt:SI (match_operand:SI 1 "register_operand")
+		     (match_operand:SI 2 "arith_operand")))]
   ""
-{
-  if (which_alternative == 0)
-    {
-      if ((INTVAL (operands[2]) & 0x1f) < 16)
-	return "srli\t%0, %1, %R2";
-      else
-	return "extui\t%0, %1, %R2, %L2";
-    }
-  return "ssr\t%2\;srl\t%0, %1";
-}
-  [(set_attr "type"	"arith,arith")
-   (set_attr "mode"	"SI")
-   (set_attr "length"	"3,6")])
+  {@ [cons: =0, 1, 2; attrs: type, length]
+     [a, r, J; arith, 3] << (INTVAL (operands[2]) & 0x1f) < 16 ? \"srli\t%0, %1, %R2\" : \"extui\t%0, %1, %R2, %L2\";
+     [a, r, r; arith, 6] ssr\t%2\;srl\t%0, %1
+  }
+  [(set_attr "mode" "SI")])
 
 (define_insn "*shift_per_byte"
   [(set (match_operand:SI 0 "register_operand" "=a")
@@ -1893,28 +1892,26 @@
    (set_attr "length"	"6")])
 
 (define_insn "rotlsi3"
-  [(set (match_operand:SI 0 "register_operand" "=a,a")
-	(rotate:SI (match_operand:SI 1 "register_operand" "r,r")
-		     (match_operand:SI 2 "arith_operand" "J,r")))]
+  [(set (match_operand:SI 0 "register_operand")
+	(rotate:SI (match_operand:SI 1 "register_operand")
+		     (match_operand:SI 2 "arith_operand")))]
   ""
-  "@
-   ssai\t%L2\;src\t%0, %1, %1
-   ssl\t%2\;src\t%0, %1, %1"
-  [(set_attr "type"	"multi,multi")
-   (set_attr "mode"	"SI")
-   (set_attr "length"	"6,6")])
+  {@ [cons: =0, 1, 2; attrs: type, length]
+     [a, r, J; multi, 6] ssai\t%L2\;src\t%0, %1, %1
+     [a, r, r; multi, 6] ssl\t%2\;src\t%0, %1, %1
+  }
+  [(set_attr "mode" "SI")])
 
 (define_insn "rotrsi3"
-  [(set (match_operand:SI 0 "register_operand" "=a,a")
-	(rotatert:SI (match_operand:SI 1 "register_operand" "r,r")
-		     (match_operand:SI 2 "arith_operand" "J,r")))]
+  [(set (match_operand:SI 0 "register_operand")
+	(rotatert:SI (match_operand:SI 1 "register_operand")
+		     (match_operand:SI 2 "arith_operand")))]
   ""
-  "@
-   ssai\t%R2\;src\t%0, %1, %1
-   ssr\t%2\;src\t%0, %1, %1"
-  [(set_attr "type"	"multi,multi")
-   (set_attr "mode"	"SI")
-   (set_attr "length"	"6,6")])
+  {@ [cons: =0, 1, 2; attrs: type, length]
+     [a, r, J; multi, 6] ssai\t%R2\;src\t%0, %1, %1
+     [a, r, r; multi, 6] ssr\t%2\;src\t%0, %1, %1
+  }
+  [(set_attr "mode" "SI")])
 
 
 ;; Comparisons.
@@ -1973,26 +1970,23 @@
 			[(match_operand:SI 0 "register_operand" "r")
 			 (const_int -2147483648)])
 		      (label_ref (match_operand 1 ""))
-		      (pc)))]
+		      (pc)))
+   (clobber (match_scratch:SI 3 "=a"))]
   "TARGET_ABS"
   "#"
-  "&& can_create_pseudo_p ()"
+  "&& 1"
   [(set (match_dup 3)
 	(abs:SI (match_dup 0)))
    (set (pc)
 	(if_then_else (match_op_dup 2
-			[(zero_extract:SI (match_dup 3)
-					  (const_int 1)
-					  (match_dup 4))
+			[(match_dup 3)
 			 (const_int 0)])
 		      (label_ref (match_dup 1))
 		      (pc)))]
 {
-  operands[3] = gen_reg_rtx (SImode);
-  operands[4] = GEN_INT (BITS_BIG_ENDIAN ? 0 : 31);
-  operands[2] = gen_rtx_fmt_ee (reverse_condition (GET_CODE (operands[2])),
-				VOIDmode, XEXP (operands[2], 0),
-				const0_rtx);
+  if (GET_CODE (operands[3]) == SCRATCH)
+    operands[3] = gen_reg_rtx (SImode);
+  PUT_CODE (operands[2], GET_CODE (operands[2]) == EQ ? LT : GE);
 }
   [(set_attr "type"	"jump")
    (set_attr "mode"	"none")
@@ -2139,7 +2133,7 @@
 		      (label_ref (match_dup 1))
 		      (pc)))]
 {
-  operands[3] = GEN_INT ((1 << GET_MODE_BITSIZE (GET_MODE (operands[3]))) - 1);
+  operands[3] = GEN_INT (GET_MODE_MASK (GET_MODE (operands[3])));
 })
 
 (define_insn_and_split "*masktrue_const_pow2_minus_one"
@@ -3319,35 +3313,41 @@
 				    (const_int 8)
 				    (const_int 9))))])
 
-(define_peephole2
-  [(set (match_operand:SI 0 "register_operand")
-	(match_operand:SI 6 "reload_operand"))
-   (set (match_operand:SI 1 "register_operand")
-	(match_operand:SI 7 "reload_operand"))
-   (set (match_operand:SF 2 "register_operand")
-	(match_operand:SF 4 "register_operand"))
-   (set (match_operand:SF 3 "register_operand")
-	(match_operand:SF 5 "register_operand"))]
-  "REGNO (operands[0]) == REGNO (operands[4])
-   && REGNO (operands[1]) == REGNO (operands[5])
-   && peep2_reg_dead_p (4, operands[0])
-   && peep2_reg_dead_p (4, operands[1])"
-  [(set (match_dup 2)
-	(match_dup 6))
-   (set (match_dup 3)
-	(match_dup 7))]
+(define_insn_and_split "*eqne_in_range"
+  [(set (pc)
+	(if_then_else (match_operator 4 "alt_ubranch_operator"
+			[(plus:SI (match_operand:SI 0 "register_operand" "r")
+				  (match_operand:SI 1 "const_int_operand" "i"))
+			 (match_operand:SI 2 "const_int_operand" "i")])
+		      (label_ref (match_operand 3 ""))
+		      (pc)))
+   (clobber (match_scratch:SI 5 "=&a"))]
+  "TARGET_MINMAX && TARGET_CLAMPS
+   && INTVAL (operands[1]) * 2 - INTVAL (operands[2]) == 1
+   && IN_RANGE (exact_log2 (INTVAL (operands[1])), 7, 22)"
+  "#"
+  "&& 1"
+  [(set (match_dup 5)
+	(smin:SI (smax:SI (match_dup 0)
+			  (match_dup 1))
+		 (match_dup 2)))
+   (set (pc)
+	(if_then_else (match_op_dup 4
+			[(match_dup 0)
+			 (match_dup 5)])
+		      (label_ref (match_dup 3))
+		      (pc)))]
 {
-  HARD_REG_SET regs;
-  int i;
-  CLEAR_HARD_REG_SET (regs);
-  for (i = 0; i <= 3; ++i)
-    if (TEST_HARD_REG_BIT (regs, REGNO (operands[i])))
-      FAIL;
-    else
-      SET_HARD_REG_BIT (regs, REGNO (operands[i]));
-  operands[6] = gen_rtx_MEM (SFmode, XEXP (operands[6], 0));
-  operands[7] = gen_rtx_MEM (SFmode, XEXP (operands[7], 0));
-})
+  HOST_WIDE_INT v = INTVAL (operands[1]);
+  operands[1] = GEN_INT (-v);
+  operands[2] = GEN_INT (v - 1);
+  PUT_CODE (operands[4], GET_CODE (operands[4]) == GTU ? NE : EQ);
+  if (GET_CODE (operands[5]) == SCRATCH)
+    operands[5] = gen_reg_rtx (SImode);
+}
+  [(set_attr "type"	"jump")
+   (set_attr "mode"	"none")
+   (set_attr "length"	"6")])
 
 (define_split
   [(clobber (match_operand 0 "register_operand"))]
@@ -3434,49 +3434,3 @@ FALLTHRU:;
   operands[1] = GEN_INT (imm0);
   operands[2] = GEN_INT (imm1);
 })
-
-(define_peephole2
-  [(set (match_operand 0 "register_operand")
-	(match_operand 1 "register_operand"))]
-  "REG_NREGS (operands[0]) == 1 && GP_REG_P (REGNO (operands[0]))
-   && REG_NREGS (operands[1]) == 1 && GP_REG_P (REGNO (operands[1]))
-   && peep2_reg_dead_p (1, operands[1])"
-  [(const_int 0)]
-{
-  basic_block bb = BLOCK_FOR_INSN (curr_insn);
-  rtx_insn *head = BB_HEAD (bb), *insn;
-  rtx dest = operands[0], src = operands[1], pattern, t_dest, dest_orig;
-  for (insn = PREV_INSN (curr_insn);
-       insn && insn != head;
-       insn = PREV_INSN (insn))
-    if (CALL_P (insn))
-      break;
-    else if (INSN_P (insn))
-      {
-	if (GET_CODE (pattern = PATTERN (insn)) == SET
-	    && REG_P (t_dest = SET_DEST (pattern))
-	    && REG_NREGS (t_dest) == 1
-	    && REGNO (t_dest) == REGNO (src))
-	{
-	  dest_orig = SET_DEST (pattern);
-	  SET_DEST (pattern) = gen_rtx_REG (GET_MODE (t_dest),
-					    REGNO (dest));
-	  extract_insn (insn);
-	  if (!constrain_operands (true, get_enabled_alternatives (insn)))
-	    {
-	      SET_DEST (pattern) = dest_orig;
-	      goto ABORT;
-	    }
-	  df_insn_rescan (insn);
-	  goto FALLTHRU;
-	}
-	if (reg_overlap_mentioned_p (dest, pattern)
-	    || reg_overlap_mentioned_p (src, pattern)
-	    || set_of (dest, insn)
-	    || set_of (src, insn))
-	  break;
-      }
-ABORT:
-  FAIL;
-FALLTHRU:;
-})